C $Header: /u/gcmpack/MITgcm/pkg/exch2/exch2_rx1_cube.template,v 1.15 2012/09/03 19:39:25 jmc Exp $
C $Name:  $

#include "CPP_EEOPTIONS.h"

CBOP
C     !ROUTINE: EXCH_R4_CUBE

C     !INTERFACE:
      SUBROUTINE EXCH2_R41_CUBE(
     U            array,
     I            signOption, fieldCode,
     I            myOLw, myOLe, myOLs, myOLn, myNz,
     I            exchWidthX, exchWidthY,
     I            cornerMode, myThid )

C     !DESCRIPTION:
C     Scalar field (1 component) Exchange:
C     Fill-in tile-edge overlap-region of a 1 component scalar field
C     with corresponding near-edge interior data point

C     !USES:
      IMPLICIT NONE

C     == Global data ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "W2_EXCH2_SIZE.h"
#include "W2_EXCH2_TOPOLOGY.h"
#include "W2_EXCH2_BUFFER.h"

C     !INPUT/OUTPUT PARAMETERS:
C     array       :: Array with edges to exchange.
C     signOption  :: Flag controlling whether field sign depends on orientation
C                 :: (signOption not yet implemented but needed for SM exch)
C     fieldCode   :: field code (position on staggered grid)
C     myOLw,myOLe :: West and East overlap region sizes.
C     myOLs,myOLn :: South and North overlap region sizes.
C     exchWidthX  :: Width of data region exchanged in X.
C     exchWidthY  :: Width of data region exchanged in Y.
C     cornerMode  :: halo-corner-region treatment: update/ignore corner region
C     myThid      :: my Thread Id. number

      INTEGER myOLw, myOLe, myOLs, myOLn, myNz
      _R4 array(1-myOLw:sNx+myOLe,
     &          1-myOLs:sNy+myOLn,
     &          myNz, nSx, nSy)
      LOGICAL signOption
      CHARACTER*2 fieldCode
      INTEGER exchWidthX
      INTEGER exchWidthY
      INTEGER cornerMode
      INTEGER myThid

C     !LOCAL VARIABLES:
C     e2_msgHandles :: Synchronization and coordination data structure used to
C                   :: coordinate access to e2Bufr1_R4 or to regulate message
C                   :: buffering. In PUT communication sender will increment
C                   :: handle entry once data is ready in buffer. Receiver will
C                   :: decrement handle once data is consumed from buffer.
C                   :: For MPI MSG communication MPI_Wait uses handle to check
C                   :: Isend has cleared. This is done in routine after receives.
C     note: a) current implementation does not use e2_msgHandles for communication
C              between threads: all-threads barriers are used (see CNH note below).
C              For a 2-threads synchro communication (future version),
C              e2_msgHandles should be shared (in common block, moved to BUFFER.h)
C           b) 1rst dim=2 so that it could be used also by exch2_rx2_cube.
      INTEGER bi, bj
C     Variables for working through W2 topology
      INTEGER e2_msgHandles( 2, W2_maxNeighbours, nSx, nSy )
      INTEGER thisTile, farTile, N, nN, oN
      INTEGER tIlo, tIhi, tJlo, tJhi, tKlo, tKhi
      INTEGER tIStride, tJStride, tKStride
      INTEGER i1Lo, i1Hi, j1Lo, j1Hi, k1Lo, k1Hi
      LOGICAL updateCorners

#ifdef ALLOW_USE_MPI
      INTEGER iBufr, nri, nrj
C     MPI stuff (should be in a routine call)
      INTEGER mpiStatus(MPI_STATUS_SIZE)
      INTEGER mpiRc
      INTEGER wHandle
#endif
CEOP

      updateCorners = cornerMode .EQ. EXCH_UPDATE_CORNERS
C-    Tile size of array to exchange:
      i1Lo  = 1-myOLw
      i1Hi  = sNx+myOLe
      j1Lo  = 1-myOLs
      j1Hi  = sNy+myOLn
      k1Lo  = 1
      k1Hi  = myNz

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|

C     Prevent anyone to access shared buffer while an other thread modifies it
      CALL BAR2( myThid )

C--   Post sends into buffer (buffer level 1):
      DO bj=myByLo(myThid), myByHi(myThid)
       DO bi=myBxLo(myThid), myBxHi(myThid)
        thisTile=W2_myTileList(bi,bj)
        nN=exch2_nNeighbours(thisTile)
        DO N=1,nN
          farTile=exch2_neighbourId(N,thisTile)
          oN = exch2_opposingSend(N,thisTile)
          CALL EXCH2_GET_SCAL_BOUNDS(
     I               fieldCode, exchWidthX, updateCorners,
     I               farTile, oN,
     O               tIlo, tiHi, tjLo, tjHi,
     O               tiStride, tjStride,
     I               myThid )
          tKLo=1
          tKHi=myNz
          tKStride=1
C-    Put my points in buffer for neighbour N to fill points
C     (tIlo:tIhi:tiStride,tJlo:tJhi,tJStride,tKlo:tKhi,tKStride)
C     in its copy of "array".
          CALL EXCH2_PUT_R41(
     I               tIlo, tIhi, tiStride,
     I               tJlo, tJhi, tjStride,
     I               tKlo, tKhi, tkStride,
     I               thisTile, N,
     I               e2BufrRecSize,
     O               iBuf1Filled(N,bi,bj),
     O               e2Bufr1_R4(1,N,bi,bj,1),
     I               array(1-myOLw,1-myOLs,1,bi,bj),
     I               i1Lo, i1Hi, j1Lo, j1Hi, k1Lo, k1Hi,
     O               e2_msgHandles(1,N,bi,bj),
     I               W2_myCommFlag(N,bi,bj), myThid )
        ENDDO
       ENDDO
      ENDDO

C     Wait until all threads finish filling buffer
      CALL BAR2( myThid )

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|

#ifdef ALLOW_USE_MPI
      IF ( usingMPI ) THEN

      _BEGIN_MASTER( myThid )

C--   Send my data (in buffer, level 1) to target Process
      DO bj=1,nSy
       DO bi=1,nSx
        thisTile=W2_myTileList(bi,bj)
        nN=exch2_nNeighbours(thisTile)
        DO N=1,nN
C-    Skip the call if this is an internal exchange
         IF ( W2_myCommFlag(N,bi,bj) .EQ. 'M' ) THEN
          CALL EXCH2_SEND_R41(
     I               thisTile, N,
     I               e2BufrRecSize,
     I               iBuf1Filled(N,bi,bj),
     I               e2Bufr1_R4(1,N,bi,bj,1),
     O               e2_msgHandles(1,N,bi,bj),
     I               W2_myCommFlag(N,bi,bj), myThid )
         ENDIF
        ENDDO
       ENDDO
      ENDDO

C--   Receive data (in buffer, level 2) from source Process
      DO bj=1,nSy
       DO bi=1,nSx
        thisTile=W2_myTileList(bi,bj)
        nN=exch2_nNeighbours(thisTile)
        DO N=1,nN
C-    Skip the call if this is an internal exchange
         IF ( W2_myCommFlag(N,bi,bj) .EQ. 'M' ) THEN
          CALL EXCH2_GET_SCAL_BOUNDS(
     I               fieldCode, exchWidthX, updateCorners,
     I               thisTile, N,
     O               tIlo, tiHi, tjLo, tjHi,
     O               tiStride, tjStride,
     I               myThid )
          nri = 1 + (tIhi-tIlo)/tiStride
          nrj = 1 + (tJhi-tJlo)/tjStride
          iBufr = nri*nrj*myNz
C       Receive from neighbour N to fill buffer and later on the array
          CALL EXCH2_RECV_R41(
     I               thisTile, N,
     I               e2BufrRecSize,
     I               iBufr,
     O               e2Bufr1_R4(1,N,bi,bj,2),
     I               W2_myCommFlag(N,bi,bj), myThid )
         ENDIF
        ENDDO
       ENDDO
      ENDDO

C--   Clear message handles/locks
      DO bj=1,nSy
       DO bi=1,nSx
        thisTile=W2_myTileList(bi,bj)
        nN=exch2_nNeighbours(thisTile)
        DO N=1,nN
C     Note: In a between process tile-tile data transport using
C           MPI the sender needs to clear an Isend wait handle here.
C           In a within process tile-tile data transport using true
C           shared address space/or direct transfer through commonly
C           addressable memory blocks the receiver needs to assert
C           that he has consumed the buffer the sender filled here.
         farTile=exch2_neighbourId(N,thisTile)
         IF     ( W2_myCommFlag(N,bi,bj) .EQ. 'M' ) THEN
          wHandle = e2_msgHandles(1,N,bi,bj)
          CALL MPI_Wait( wHandle, mpiStatus, mpiRc )
         ELSEIF ( W2_myCommFlag(N,bi,bj) .EQ. 'P' ) THEN
         ELSE
         ENDIF
        ENDDO
       ENDDO
      ENDDO

      _END_MASTER( myThid )
C     Everyone waits until master-thread finishes receiving
      CALL BAR2( myThid )

      ENDIF
#endif /* ALLOW_USE_MPI */

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|

C--   Extract from buffer (either from level 1 if local exch,
C                     or level 2 if coming from an other Proc)
      DO bj=myByLo(myThid), myByHi(myThid)
       DO bi=myBxLo(myThid), myBxHi(myThid)
        thisTile=W2_myTileList(bi,bj)
        nN=exch2_nNeighbours(thisTile)
        DO N=1,nN
          CALL EXCH2_GET_SCAL_BOUNDS(
     I               fieldCode, exchWidthX, updateCorners,
     I               thisTile, N,
     O               tIlo, tiHi, tjLo, tjHi,
     O               tiStride, tjStride,
     I               myThid )
          tKLo=1
          tKHi=myNz
          tKStride=1

C     From buffer, get my points
C     (tIlo:tIhi:tiStride,tJlo:tJhi,tJStride,tKlo:tKhi,tKStride) in "array":
C     Note: when transferring data within a process:
C      o e2Bufr entry to read is entry associated with opposing send record
C      o e2_msgHandle entry to read is entry associated with opposing send record.
          CALL EXCH2_GET_R41(
     I               tIlo, tIhi, tiStride,
     I               tJlo, tJhi, tjStride,
     I               tKlo, tKhi, tkStride,
     I               thisTile, N, bi, bj,
     I               e2BufrRecSize, W2_maxNeighbours, nSx, nSy,
     I               e2Bufr1_R4,
     U               array(1-myOLw,1-myOLs,1,bi,bj),
     I               i1Lo, i1Hi, j1Lo, j1Hi, k1Lo, k1Hi,
     U               e2_msgHandles,
     I               W2_myCommFlag(N,bi,bj), myThid )
        ENDDO
       ENDDO
      ENDDO

      RETURN
      END

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|

CEH3 ;;; Local Variables: ***
CEH3 ;;; mode:fortran ***
CEH3 ;;; End: ***