#! /bin/bash # # Script to retrieve raw data from the PBO Strainmeter Archives. # Version 1.0 06jun2006 --jrw # Jim Wright # # bash is not the best tool for parsing and validating user input, # but in this script I try to assume the absolute minimum for externally # available tools. Other than standard awk, sed, tr and friends, I only # require bash and wget. If you do not have wget already, retrieve it # from http://www.gnu.org/software/wget/. Version 1.10.2 is current as # of this writing. # # Usage examples: # # pbo-strain-fetch -s b005 -b 2006:030 # Retrieve all raw files from station B005 for January 30, 2006. # # pbo-strain-fetch -s b005 -r 600 -b 2005:360 -e 2006:5 # Retrieve the raw 10-minute files from station B005 for all # days from December 26, 2005 through January 5, 2006. # # pbo-strain-fetch -s b005 -r 1,20 -b 2006:1 # Retrieve the raw 1-second and 20-Hz files from station B005 # for January 1, 2006. usage() { cat << EOT Usage: $(basename $0) -s station -b begin [-e end] [-a archive] [-r rates] station is 4-character or 16-character station name begin is first date to retrieve end is optional end date to retrieve, for a range of files specify dates as YYYY:DOY archive is optional archive center from which to retrieve, ncedc or dmc rates is optional list of data rates to retrieve: 600,1,20 EOT exit } STATION="" BEGIN="" END="" ARC="ncedc" RATES="600,1,20" #DEBUG=echo # uncomment for testing # grok command line while getopts ":a:b:e:s:r:" OPT ; do case $OPT in s) STATION=$OPTARG ;; b) BEGIN=$OPTARG ;; e) END=$OPTARG ;; a) ARC=$OPTARG ;; r) RATES=$OPTARG ;; *) echo "Error: Unrecognized option '$OPT'" usage ;; esac done shift $(($OPTIND - 1)) # validate station selection, and set variables STATION=$(echo $STATION | tr '[[:upper:]]' '[[:lower:]]') case "$STATION" in b001|golbeck01bwa2005) BNUM=B001 ; STN=golbeck01bwa2005 ;; b004|hokofallsbwa2005) BNUM=B004 ; STN=hokofallsbwa2005 ;; p403|floequarybwa2005) BNUM=P403 ; STN=floequarybwa2005 ;; b005|shoresnw1bwa2005) BNUM=B005 ; STN=shoresnw1bwa2005 ;; b006|shoresne2bwa2005) BNUM=B006 ; STN=shoresne2bwa2005 ;; b007|shoresso3bwa2005) BNUM=B007 ; STN=shoresso3bwa2005 ;; b009|pacgeosi1bbc2005) BNUM=B009 ; STN=pacgeosi1bbc2005 ;; b010|pacgeosi2bbc2005) BNUM=B010 ; STN=pacgeosi2bbc2005 ;; b011|pacgeosi3bbc2005) BNUM=B011 ; STN=pacgeosi3bbc2005 ;; b012|ucluelet1bbc2005) BNUM=B012 ; STN=ucluelet1bbc2005 ;; b018|delphi018bor2006) BNUM=B018 ; STN=delphi018bor2006 ;; b022|seaside22bor2006) BNUM=B022 ; STN=seaside22bor2006 ;; b035|grants035bor2006) BNUM=B035 ; STN=grants035bor2006 ;; *) echo "Error: Unrecognized station: '$STATION'" usage ;; esac # validate archive, and set base url for file retrieval ARC=$(echo $ARC | tr '[[:upper:]]' '[[:lower:]]') case "$ARC" in ucb|ncedc) BASEURL="http://www.ncedc.org/pbo/strain/raw/bsm" ;; iris|dmc) BASEURL="http://www.iris.edu/pbo/raw/bsm" ;; *) echo "Error: Unrecognized archive: '$ARC'" usage ;; esac # validate dates and set up numeric variables for begin and end colon=$(echo $BEGIN | tr -c -d ':') if [ ${#colon} -ne 1 ] ; then echo "Error: Invalid beginning date format: '$BEGIN'" usage fi colon=$(echo $END | tr -c -d ':') if [ "$END" != "" -a ${#colon} -ne 1 ] ; then echo "Error: Invalid ending date format: '$END'" usage fi BEGIN_YYYY=$(( $(echo $BEGIN | sed -e 's/:.*//' -e 's/^0*//') )) BEGIN_DOY=$(( $(echo $BEGIN | sed -e 's/.*://' -e 's/^0*//') )) END_YYYY=$(( $(echo $END | sed -e 's/:.*//' -e 's/^0*//') )) END_DOY=$(( $(echo $END | sed -e 's/.*://' -e 's/^0*//') )) # date component variables will be numeric if the user gave us # a well-formed value, or 0 if they did not. catch case of bad # input becoming 0. but don't try to enforce further sanity checks # on the actual year or day-of-year entered. however we do insist # that the end date be after the start date, if an end date is provided. if [ $BEGIN_YYYY -le 0 -o $BEGIN_DOY -le 0 ] ; then echo "Error: Beginning year or day-of-year can not be zero: '$BEGIN'" usage elif [ $END_YYYY -gt 0 -a $END_DOY -gt 0 ] ; then if [ \( $BEGIN_YYYY -gt $END_YYYY \) -o \ \( $BEGIN_YYYY -eq $END_YYYY -a $BEGIN_DOY -gt $END_DOY \) ] ; then echo "Error: Begin date cannot be larger than end date: '$BEGIN'/'$END'" usage fi elif [ \( $END_YYYY -lt 0 -o $END_DOY -lt 0 \) -o \ \( $END_YYYY -eq 0 -a $END_DOY -ne 0 \) -o \ \( $END_YYYY -ne 0 -a $END_DOY -eq 0 \) ] ; then echo "Error: Ending year or day-of-year must be greater than zero: '$END'" usage fi # finally, if they did not specify an end date, set it equal to begin date if [ $END_YYYY -eq 0 -a $END_DOY -eq 0 ] ; then END_YYYY=$BEGIN_YYYY END_DOY=$BEGIN_DOY fi # validate rates and set up variable for retrieving files EXTENSIONS="" for i in $(echo $RATES | tr ',' ' ') ; do case $i in 600) EXTENSIONS="$EXTENSIONS Day.tgz" ;; 1) EXTENSIONS="$EXTENSIONS _01.tar" ;; 20) EXTENSIONS="$EXTENSIONS _20.tar" ;; *) echo "Error: Unrecognized data rate: '$RATES'" usage ;; esac done # done validating user input, optionally print status so far if [ $DEBUG ] ; then echo "==============================" echo "== STN $STN" echo "== BNUM $BNUM" echo "== EXTENSIONS $EXTENSIONS" echo "== BEGIN_YYYY $BEGIN_YYYY" echo "== BEGIN_DOY $BEGIN_DOY" echo "== END_YYYY $END_YYYY" echo "== END_DOY $END_DOY" echo "== BASEURL $BASEURL" echo "==============================" fi # now do the work of downloading the files YYYY=$BEGIN_YYYY DOY=$BEGIN_DOY until [ $YYYY -gt $END_YYYY -o \( $YYYY -eq $END_YYYY -a $DOY -gt $END_DOY \) ] ; do if [ $DOY -lt 10 ] ; then ddd="00${DOY}" elif [ $DOY -lt 100 ] ; then ddd="0${DOY}" else ddd=$DOY fi for EXT in $EXTENSIONS ; do $DEBUG wget $BASEURL/$STN/$YYYY/$ddd/${BNUM}.${YYYY}${ddd}${EXT} done # account for year rollover. treat every year as a leap year. # worst that will happen is that we try to retrieve nonexistant files. DOY=$(( $DOY + 1 )) if [ $DOY -gt 366 ] ; then YYYY=$(( $YYYY + 1 )) DOY=1 fi done