#!/usr/bin/perl -w # # Take one of the raw data files of the form: yyyymmdd line_of_data # which may either a plain file or a flat file with multiple tagged entries # and extract the required data into a simple table (minus the date) # # Version 1.0 # # fc - 31st May 2010 use Date::Calc qw(Delta_Days); # $date1 = '20090101'; # $date2 = '20090102'; # $diff = compare_yyyymmdd_dates( $date1, $date2 ); # print "$date1 $date2 -> $diff\n"; # exit(1); $FLAG_DROP_DATE = 1; # Can ONLY INCLUDE the data when $DOWNSAMP == 1 $FILE_IN = ''; $FILE_OUT = ''; $TAG = ''; # For the flat files $DATE_ST = ''; $DATE_END = ''; $DATE_YEAR = ''; # These are an alternative to giving full start and end $DATE_MONTH = ''; # If given, year is needed; month is optional $DOWNSAMP = 1; $" = "\t"; if( not @ARGV ) { print STDERR "\n"; print STDERR "Usage: table_dat.pl -in in_file -start yyyymmdd -end yyyymmdd [options]\n"; print STDERR " -out out_file default is to stdout\n"; print STDERR " -tag TAG flat file tag\n"; print STDERR " -downsamp # down sample the data; # -> 1\n"; print STDERR "\n"; exit( 1 ); } for( $i=0; $i<@ARGV; $i++ ) { if ( $ARGV[$i] eq '-in' ) { $FILE_IN = $ARGV[++$i]; } elsif( $ARGV[$i] eq '-out' ) { $FILE_OUT = $ARGV[++$i]; } elsif( $ARGV[$i] eq '-tag' ) { $TAG = $ARGV[++$i]; } elsif( $ARGV[$i] eq '-start' ) { $DATE_ST = $ARGV[++$i]; } elsif( $ARGV[$i] eq '-end' ) { $DATE_END = $ARGV[++$i]; } elsif( $ARGV[$i] eq '-year' ) { $DATE_YEAR = $ARGV[++$i]; } # elsif( $ARGV[$i] eq '-month' ) { $DATE_MONTH = $ARGV[++$i]; } # Not implemented just yet elsif( $ARGV[$i] eq '-downsamp') { $DOWNSAMP = $ARGV[++$i]; } else { die "ERROR unknown option: $ARGV[$i]\n"; } } -r $FILE_IN or die "ERROR: input data is not there or not readable\n"; if( $FILE_OUT ) { open STDOUT, ">$FILE_OUT" or die "ERROR opening $FILE_OUT for STDOUT: $!\n"; } # Sort out the requested time slice if( $DATE_ST ) { $DATE_END or die "ERROR: must provide an end date\n"; not $DATE_YEAR or die "ERROR: -start and -year incompatiable\n"; not $DATE_MONTH or die "ERROR: -start and -month incompatiable\n"; } else { $DATE_YEAR or die "ERROR: -start OR -year needed\n"; $DATE_ST = $DATE_YEAR."0101"; $DATE_END = $DATE_YEAR."1231"; # NEED TO implement the month level here } $flag_record = 1; open IN, $FILE_IN or die "ERROR trying to open input file: $FILE_IN\n"; while() { next if /^%/; next if /^\s+/; if( /^>(\S+)/ ) { $tag = $1; if( $tag eq $TAG ) { $flag_record = 1; } else { $flag_record = 0; } next; } next unless $flag_record; chomp; /^(\S+)\s+(.*)/ or die "ERROR parsing line $. of $FILE_IN\n"; $ln = $_; $date = $1; $data = $2; if( 0 <= compare_yyyymmdd_dates( $DATE_ST, $date) and 0 >= compare_yyyymmdd_dates( $DATE_END, $date) ) { $ln = $data if $FLAG_DROP_DATE; if( 1 == $DOWNSAMP) { print "$ln\n"; } else # Down sample the data { @nums = split /\s+/, $data; # ASSUMING there is NO DATE here -- now enforced by using $data rather than $ln here @samp = (); for( $i=0; $i<@nums; $i+=$DOWNSAMP ) { for( $val=0, $j=0; $j<$DOWNSAMP; $j++ ) { $val += $nums[$i+$j]; } $val = int( 10 * $val / $DOWNSAMP ) /10; push @samp, $val; } print "@samp\n"; } } } close IN; close STDOUT; #===== # # return the number of days between the two dates. # if date2 is the next day from date1, return 1, etc # sub compare_yyyymmdd_dates { my( $date1, $date2 ) = @_; my( $y1,$m1,$d1, $y2,$m2,$d2 ); $date1 =~ /^(\d\d\d\d)(\d\d)(\d\d)$/ or die "Error [compare_yyyymmdd_dates] parsing date: $date1\n"; $y1 = $1; $m1 = $2; $d1 = $3; $date2 =~ /^(\d\d\d\d)(\d\d)(\d\d)$/ or die "Error [compare_yyyymmdd_dates] parsing date: $date2\n"; $y2 = $1; $m2 = $2; $d2 = $3; return( Delta_Days($y1,$m1,$d1, $y2,$m2,$d2) ); }