#! /usr/bin/env bash # # $Header: /u/gcmpack/MITgcm/utils/scripts/gluemnc,v 1.16 2011/11/10 14:26:16 mlosch Exp $ # $Name: $ # This is a shell script to combine multiple MITgcm mnc output files from # different tiles into one global file. # All of the variables should be in one directory, where this script is run. # # To combine all state.0000000000.t*.nc files, # gluemnc state.0000000000.*.nc # This will result in an output file state.0000000000.glob.nc # Where glob is for global. # # You can even combine all mnc files, use # gluemnc *.nc # This will result in a series of global files, # state.0000000000.glob.nc state.0000000100.glob.nc, ... # grid.0000000000.glob.nc grid.0000000100.glob.nc, ... # diag.0000000000.glob.nc diag.0000000100.glob.nc, ... # # A lot of hard drive activity is needed. If you have a fast drive # export TMPDIR=. On some high-performance # systems, this is already done for you. # # **********WARNINGS********** # This will probably not work at all with exch2/cubed sphere. # In that case, you probably can assemble all of the tiles on a face, # but combining faces is currently not implemented. # # Be sure you have enough disk space for the copies! In this version # nothing is done to assure all of the data is copied. # # Be careful! It will be easy to exceed the 2 GB limit for the old 32-bit # version of netcdf. If you do not have large-file support or 64-bit netcdf # you will have to be clever in dividing up your tiled files, # e.g., along the time dimension before combining to global files. # The nco operator ncks is adept at shortening files to fewer snapshots. # ***************************** # # Good luck and happy gluing, # Baylor Fox-Kemper DEBUG="--dbg_lvl=0" LOGFILE="/dev/null" DIRORIG=`pwd` if [ ! ${#TMPDIR} -gt 0 ]; then TMPDIR=$DIRORIG fi export DIRNAME="$TMPDIR/gluedir.$RANDOM" mkdir $DIRNAME echo Using temporary directory $DIRNAME if [ -f xplodemnc ]; then cp xplodemnc $DIRNAME else cp `which xplodemnc` $DIRNAME fi # find an unambiguous name for a new record dimension myrecname=record`echo $DIRNAME | awk -F. '{print $NF}'` cd $DIRNAME inone=$1 inone=${1:?"You must input mnc filenames to be glued"} for somefile in $@ do ln -s $DIRORIG/$somefile . if [ ! -s $somefile ]; then echo "Error: $somefile is missing or empty" exit 1 fi done prels=${@%.t???.nc} for somepre in $prels do inls=0 for somepres in $sprels do if [ "$somepre" = "$somepres" ]; then inls=1 fi done if [ "$inls" = "0" ]; then sprels=$sprels" "$somepre fi done prels=$sprels # ML: determine the coordinate variable (this is hack for the unlikely # case that we do not have X or Y as coordinate variables; this can # happen, when only U-point or V-point variables are written to a # diagnostics stream; I do not know if this always works, but it works for me) echo Determine a usable coordinate variable somefile=${prels}.t001.nc # first try X and Y Xcoord=X Ycoord=Y Xtest=$(ncdump -vX -l 10000 $somefile | grep "X = ") Ytest=$(ncdump -vY -l 10000 $somefile | grep "Y = ") if [ ${#Xtest} = 0 ]; then echo "X not found, trying Xp1" Xtest=$(ncdump -vXp1 -l 10000 $somefile | grep "Xp1 = ") Xcoord=Xp1 fi if [ ${#Xtest} = 0 ]; then echo "no X-coordinate found" Xcoord= fi if [ ${#Ytest} = 0 ]; then echo "Y not found, trying Yp1" Ytest=$(ncdump -vYp1 -l 10000 $somefile | grep "Yp1 = ") Ycoord=Yp1 fi if [ ${#Ytest} = 0 ]; then echo "no Y-coordinate found" Ycoord= fi if [ ${#Xcoord} = 0 ]; then echo cannot continue exit fi if [ ${#Ycoord} = 0 ]; then echo cannot continue exit fi for somepre in $prels do echo Making $somepre.glob.nc... Xsls= Ysls= for somefile in $@ do if [ "${somefile%.t???.nc}" = "$somepre" ]; then echo Scanning $somefile... Xs=$(ncdump -v${Xcoord} -l 10000 $somefile | grep "${Xcoord} = ") Xs=${Xs#*;} Xs=${Xs%;*} Xs=$(echo $Xs | sed s/' '//g) Xsls=$Xsls$Xs" " Ys=$(ncdump -v${Ycoord} -l 10000 $somefile | grep "${Ycoord} = ") Ys=${Ys#*;} Ys=${Ys%;*} Ys=$(echo $Ys | sed s/' '//g) Ysls=$Ysls$Ys" " fi done sYsls= sXsls= # Determine all the X locations countx=0 for someXs in $Xsls do inls=0 for somesXs in $sXsls do if [ "$someXs" = "$somesXs" ]; then inls=1 fi done if [ "$inls" = "0" ]; then sXsls=$sXsls$someXs" " countx=$((countx))+1 fi done echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX echo $((countx)) tiles found in x-direction. echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX # Determine all the Y locations county=0 for someYs in $Ysls do inls=0 for somesYs in $sYsls do if [ "$someYs" = "$somesYs" ]; then inls=1 fi done if [ "$inls" = "0" ]; then sYsls=$sYsls$someYs" " county=$((county))+1 fi done echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY echo $((county)) tiles found in y-direction. echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY countyy=1000 countxx=1000 cntls= for someX in $sXsls do countxx=$((countxx+1)) cntls=$cntls$countxx" " echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX echo Prepping X tile $((countxx-1000)) echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX for somefile in $@ do if [ "${somefile%.t???.nc}" = "$somepre" ]; then Xs=$(ncdump -v${Xcoord} -l 10000 $somefile | grep "${Xcoord} = ") Xs=${Xs#*;} Xs=${Xs%;*} Xs=$(echo $Xs | sed s/' '//g) if [ "$someX" = $Xs ]; then ./xplodemnc $somefile if [ -f iter.$somefile ]; then mv iter.$somefile iter.${somefile%t???.nc}glob.nc fi for somesplit in $(ls *.$somefile) do # Added to account for grid files with no T dimension defined: # Search for the unlimited dimension and get its name, assuming # that its first character is a letter from the alphabet, the "tr" # removes the blank characters recname=$(ncdump -h $somesplit \ | sed -n 's/\([a-z,A-Z]*\) = \(UNLIMITED\) .*/\1/p' \ | tr -d ' \t' ) if [[ -z "$recname" ]]; then echo "No record dimension found, adding one now: "$myrecname ncecat $DEBUG -O -u $myrecname $somesplit $somesplit > $LOGFILE recname=$myrecname fi withY=$(ncdump -h $somesplit | grep "Y =") if [ ${#withY} -gt 1 ]; then echo Changing Y to record variable in $somesplit ncpdq $DEBUG -O -a Y,$recname $somesplit $somesplit > $LOGFILE mv $somesplit i$countxx.$somesplit fi if [ -f $somesplit ]; then withYp1=$(ncdump -h $somesplit | grep "Yp1 =") if [ ${#withYp1} -gt 1 ]; then Yp1len=${withYp1#*= } Yp1len=$((${Yp1len% ;}-1)) # Strip off the repeated value in Yp1 echo Changing Yp1 to record variable in $somesplit ncpdq $DEBUG -O -a Yp1,$recname -F -d Yp1,1,$Yp1len $somesplit $somesplit > $LOGFILE mv $somesplit i$countxx.$somesplit fi fi done fi fi done done echo Tile names $cntls for countxx in $cntls do echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX echo Combining X tile $((countxx-1000)) echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX varls= cxfilels=$(ls i$countxx.*) oldvar= for somefile in $cxfilels do varname=`echo $somefile | sed 's/^i[0-9][0-9][0-9][0-9]\.//; s/\..*nc//'` if [ "$varname" = "$oldvar" ]; then echo $varname repeated else varls=$varls$varname" " fi oldvar=$varname done echo Found these variables to combine: $varls for somevar in $varls do echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY echo Combining $somevar files in Y echo YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY filelist=(`ls i$countxx.$somevar.$somepre.*.nc`) withY=$(ncdump -h ${filelist[0]} | grep "Y =") withYp1=$(ncdump -h ${filelist[0]} | grep "Yp1 =") ncrcat $DEBUG i$countxx.$somevar.$somepre.*.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE echo Just combined $countxx.$somevar rm i$countxx.$somevar.$somepre.t???.nc # Recover the name of the record variable, there are two possibilities: # T (MITgcm convention) and $myrecname. # I must admit that this could be more elegant. recname=$(ncdump -h $somevar.$somepre.gloy.$countxx.nc | grep $myrecname) if [[ -z $recname ]]; then recname=T else recname=$myrecname fi if [ ${#withY} -gt 1 ]; then echo Changing $recname to record variable in $somevar.$somepre.gloy.$countxx.nc ncpdq $DEBUG -O -a $recname,Y $somevar.$somepre.gloy.$countxx.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE fi if [ ${#withYp1} -gt 1 ]; then echo Changing $recname to record variable in $somevar.$somepre.gloy.$countxx.nc ncpdq $DEBUG -O -a $recname,Yp1 $somevar.$somepre.gloy.$countxx.nc $somevar.$somepre.gloy.$countxx.nc > $LOGFILE fi done done for somevar in $varls do echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX echo Combining $somevar files in X... echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX for somegloy in $(ls $somevar.$somepre.gloy.*.nc) do withX=$(ncdump -h $somegloy | grep "X =") withXp1=$(ncdump -h $somegloy | grep "Xp1 =") recname=$(ncdump -h $somegloy \ | sed -n 's/\([a-z,A-Z]*\) = \(UNLIMITED\) .*/\1/p' \ | tr -d ' \t' ) if [ ${#withX} -gt 1 ]; then echo Changing X to record variable in $somegloy ncpdq $DEBUG -O -a X,$recname $somegloy $somegloy > $LOGFILE fi if [ ${#withXp1} -gt 1 ]; then Xp1len=${withXp1#*= } Xp1len=$((${Xp1len% ;}-1)) # Strip off the repeated value in Xp1 echo Changing Xp1 to record variable in $somegloy echo ncpdq $DEBUG -O -a Xp1,$recname -F -d Xp1,1,$Xp1len $somegloy $somegloy > $LOGFILE ncpdq $DEBUG -O -a Xp1,$recname -F -d Xp1,1,$Xp1len $somegloy $somegloy > $LOGFILE fi done echo Combining $somevar.gloy files... ncrcat $DEBUG $somevar.$somepre.gloy.*.nc $somevar.$somepre.glob.nc > $LOGFILE # rm $somevar.$somepre.gloy.*.nc # recname is still valid, so change back to it without testing for it again if [ ${#withX} -gt 1 ]; then echo Changing $recname to record variable in $somevar.$somepre.glob.nc ncpdq $DEBUG -O -a $recname,X $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc > $LOGFILE fi if [ ${#withXp1} -gt 1 ]; then echo Changing $recname to record variable in $somevar.$somepre.glob.nc ncpdq $DEBUG -O -a $recname,Xp1 $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc > $LOGFILE fi if [ "$recname" = "$myrecname" ]; then # only for variables that did not have a record dimension to begin with echo "removing record dimension $recname from $somevar.$somepre.glob.nc" ncwa $DEBUG -O -a $recname $somevar.$somepre.glob.nc $somevar.$somepre.glob.nc fi ncks $DEBUG -A $somevar.$somepre.glob.nc $somepre.glob.nc > $LOGFILE # rm $somevar.$somepre.glob.nc done if [ -f iter.$somepre.glob.nc ]; then ncks $DEBUG -A iter.$somepre.glob.nc $somepre.glob.nc > $LOGFILE fi # rm iter.$somepre.glob.nc # another hack by rpa to accomodate grid.nc files # (there are several variables with just Z dimension that we want to keep) # varsz=$( ncdump -h $somepre.t001.nc | sed -n 's/^\s*\(double\|float\).* \(\w*\)(Z\w*).*/\2/p' ) # The OR ("\|") and "\s", "\w" only work for GNU-sed, but not for # BSD-sed or SunOS-sed, therefore we need to use some work-arounds: varsz=$( ncdump -h $somepre.t001.nc | egrep "double|float" \ | grep -v , | sed -n 's/.* \(.*\)(Z.*).*/\1/p' ) fixed= for varz in $varsz do # check to make sure the variable does not already exist in the glob file if [[ -z $( ncdump -h $somepre.glob.nc | grep " $varz(" ) ]] then echo "Adding variable $varz to $somepre.glob.nc" ncks $DEBUG -A -v $varz $somepre.t001.nc $somepre.glob.nc > $LOGFILE fixed='yes' fi done cp $somepre.glob.nc $DIRORIG done cd $DIRORIG rm -rf $DIRNAME