C $Header: /u/gcmpack/MITgcm/model/src/cg2d_sr.F,v 1.6 2012/05/11 23:28:48 jmc Exp $ C $Name: $ #include "CPP_OPTIONS.h" #ifdef TARGET_NEC_SX C set a sensible default for the outer loop unrolling parameter that can C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h #ifndef CG2D_OUTERLOOPITERS # define CG2D_OUTERLOOPITERS 10 #endif #endif /* TARGET_NEC_SX */ CBOP C !ROUTINE: CG2D_SR C !INTERFACE: SUBROUTINE CG2D_SR( U cg2d_b, cg2d_x, O firstResidual, minResidualSq, lastResidual, U numIters, nIterMin, I myThid ) C !DESCRIPTION: \bv C *==========================================================* C | SUBROUTINE CG2D C | o Two-dimensional grid problem conjugate-gradient C | inverter (with preconditioner). C *==========================================================* C | Con. grad is an iterative procedure for solving Ax = b. C | It requires the A be symmetric. C | This implementation assumes A is a five-diagonal C | matrix of the form that arises in the discrete C | representation of the del^2 operator in a C | two-dimensional space. C | Notes: C | ====== C | This implementation can support shared-memory C | multi-threaded execution. In order to do this COMMON C | blocks are used for many of the arrays - even ones that C | are only used for intermedaite results. This design is C | OK if you want to all the threads to collaborate on C | solving the same problem. On the other hand if you want C | the threads to solve several different problems C | concurrently this implementation will not work. C | C | This version implements the single-reduction CG algorithm of C | d Azevedo, Eijkhout, and Romine (Lapack Working Note 56, 1999). C | C. Wolfe, November 2009, clwolfe@ucsd.edu C *==========================================================* C \ev C !USES: IMPLICIT NONE C === Global data === #include "SIZE.h" #include "EEPARAMS.h" #include "PARAMS.h" #include "CG2D.h" C !INPUT/OUTPUT PARAMETERS: C === Routine arguments === C cg2d_b :: The source term or "right hand side" (output: normalised RHS) C cg2d_x :: The solution (input: first guess) C firstResidual :: the initial residual before any iterations C minResidualSq :: the lowest residual reached (squared) C lastResidual :: the actual residual reached C numIters :: Inp: the maximum number of iterations allowed C Out: the actual number of iterations used C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol. C Out: iteration number corresponding to lowest residual C myThid :: Thread on which I am working. _RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) _RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) _RL firstResidual _RL minResidualSq _RL lastResidual INTEGER numIters INTEGER nIterMin INTEGER myThid #ifdef ALLOW_SRCG C !LOCAL VARIABLES: C === Local variables ==== C bi, bj :: tile index in X and Y. C i, j, it2d :: Loop counters ( it2d counts CG iterations ) C actualIts :: actual CG iteration number C err_sq :: Measure of the square of the residual of Ax - b. C eta_qrN :: Used in computing search directions; suffix N and NM1 C eta_qrNM1 denote current and previous iterations respectively. C cgBeta :: coeff used to update conjugate direction vector "s". C alpha :: coeff used to update solution & residual C sumRHS :: Sum of right-hand-side. Sometimes this is a useful C debugging/trouble shooting diagnostic. For neumann problems C sumRHS needs to be ~0 or it converge at a non-zero residual. C cg2d_min :: used to store solution corresponding to lowest residual. C msgBuf :: Informational/error message buffer INTEGER bi, bj INTEGER i, j, it2d c INTEGER actualIts _RL cg2dTolerance_sq _RL err_sq, errTile(nSx,nSy) _RL eta_qrN, eta_qrNtile(nSx,nSy) _RL eta_qrNM1 _RL cgBeta _RL alpha, alphaTile(nSx,nSy) _RL sigma _RL delta, deltaTile(nSx,nSy) _RL sumRHS, sumRHStile(nSx,nSy) _RL rhsMax _RL rhsNorm _RL cg2d_min(1:sNx,1:sNy,nSx,nSy) CHARACTER*(MAX_LEN_MBUF) msgBuf LOGICAL printResidual CEOP C-- Initialise auxiliary constant, some output variable and inverter cg2dTolerance_sq = cg2dTolerance*cg2dTolerance minResidualSq = -1. _d 0 eta_qrNM1 = 1. _d 0 C-- Normalise RHS rhsMax = 0. _d 0 DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO j=1,sNy DO i=1,sNx cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax) ENDDO ENDDO ENDDO ENDDO IF (cg2dNormaliseRHS) THEN C- Normalise RHS : _GLOBAL_MAX_RL( rhsMax, myThid ) rhsNorm = 1. _d 0 IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO j=1,sNy DO i=1,sNx cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm ENDDO ENDDO ENDDO ENDDO C- end Normalise RHS ENDIF C-- Update overlaps CALL EXCH_XY_RL( cg2d_x, myThid ) C-- Initial residual calculation DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) IF ( nIterMin.GE.0 ) THEN DO j=1,sNy DO i=1,sNx cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj) ENDDO ENDDO ENDIF DO j=0,sNy+1 DO i=0,sNx+1 cg2d_s(i,j,bi,bj) = 0. ENDDO ENDDO sumRHStile(bi,bj) = 0. _d 0 errTile(bi,bj) = 0. _d 0 #ifdef TARGET_NEC_SX !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS #endif /* TARGET_NEC_SX */ DO j=1,sNy DO i=1,sNx cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) - & (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj) & +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj) & +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj) & +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj) & +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj) & ) errTile(bi,bj) = errTile(bi,bj) & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj) sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO CALL EXCH_S3D_RL( cg2d_r, 1, myThid ) CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid ) CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid ) it2d = 0 c actualIts = 0 firstResidual = SQRT(err_sq) IF ( nIterMin.GE.0 ) THEN nIterMin = 0 minResidualSq = err_sq ENDIF printResidual = .FALSE. IF ( debugLevel .GE. debLevZero ) THEN _BEGIN_MASTER( myThid ) printResidual = printResidualFreq.GE.1 WRITE(standardmessageunit,'(A,1P2E22.14)') & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax _END_MASTER( myThid ) ENDIF IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11 C DER (1999) do one iteration outside of the loop to start things up. C-- Solve preconditioning equation and update C-- conjugate direction vector "s". DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) eta_qrNtile(bi,bj) = 0. _d 0 #ifdef TARGET_NEC_SX !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS #endif /* TARGET_NEC_SX */ DO j=1,sNy DO i=1,sNx cg2d_y(i,j,bi,bj) = & pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj) & +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj) & +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj) & +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj) & +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj) cg2d_s(i,j,bi,bj) = cg2d_y(i,j,bi,bj) eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj) & +cg2d_y(i,j,bi,bj)*cg2d_r(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO CALL EXCH_S3D_RL( cg2d_s, 1, myThid ) CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid ) eta_qrNM1 = eta_qrN C== Evaluate laplace operator on conjugate gradient vector C== q = A.s DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) alphaTile(bi,bj) = 0. _d 0 #ifdef TARGET_NEC_SX !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS #endif /* TARGET_NEC_SX */ DO j=1,sNy DO i=1,sNx cg2d_q(i,j,bi,bj) = & aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj) & +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj) & +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj) & +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj) & +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj) alphaTile(bi,bj) = alphaTile(bi,bj) & + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid ) sigma = eta_qrN/alpha C== Update simultaneously solution and residual vectors (and Iter number) C Now compute "interior" points. DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO j=1,sNy DO i=1,sNx cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+sigma*cg2d_s(i,j,bi,bj) cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-sigma*cg2d_q(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO c actualIts = 1 CALL EXCH_S3D_RL( cg2d_r,1, myThid ) C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< DO 10 it2d=1, numIters-1 c actualIts = it2d C-- Solve preconditioning equation and update C-- conjugate direction vector "s". C-- z = M^-1 r DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) #ifdef TARGET_NEC_SX !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS #endif /* TARGET_NEC_SX */ DO j=1,sNy DO i=1,sNx cg2d_y(i,j,bi,bj) = & pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj) & +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj) & +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj) & +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj) & +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj) ENDDO ENDDO ENDDO ENDDO CALL EXCH_S3D_RL( cg2d_y, 1, myThid ) C== v = A.z C-- eta_qr = C-- delta = C-- Do the error calcuation here to consolidate global reductions DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) eta_qrNtile(bi,bj) = 0. _d 0 deltaTile(bi,bj) = 0. _d 0 errTile(bi,bj) = 0. _d 0 #ifdef TARGET_NEC_SX !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS #endif /* TARGET_NEC_SX */ DO j=1,sNy DO i=1,sNx cg2d_v(i,j,bi,bj) = & aW2d(i ,j ,bi,bj)*cg2d_y(i-1,j ,bi,bj) & +aW2d(i+1,j ,bi,bj)*cg2d_y(i+1,j ,bi,bj) & +aS2d(i ,j ,bi,bj)*cg2d_y(i ,j-1,bi,bj) & +aS2d(i ,j+1,bi,bj)*cg2d_y(i ,j+1,bi,bj) & +aC2d(i ,j ,bi,bj)*cg2d_y(i ,j ,bi,bj) eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj) & +cg2d_y(i,j,bi,bj)*cg2d_r(i,j,bi,bj) deltaTile(bi,bj) = deltaTile(bi,bj) & +cg2d_y(i,j,bi,bj)*cg2d_v(i,j,bi,bj) errTile(bi,bj) = errTile(bi,bj) & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj) ENDDO ENDDO sumPhi(1,bi,bj) = eta_qrNtile(bi,bj) sumPhi(2,bi,bj) = deltaTile(bi,bj) sumPhi(3,bi,bj) = errTile(bi,bj) ENDDO ENDDO C CALL GLOBAL_SUM_TILE_RL( eta_qrNtile, eta_qrN,myThid ) C CALL GLOBAL_SUM_TILE_RL( deltaTile, delta, myThid ) C CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid ) CALL GLOBAL_VEC_SUM_R8( 3, 3, sumPhi, myThid ) eta_qrN = sumPhi(1,1,1) delta = sumPhi(2,1,1) err_sq = sumPhi(3,1,1) IF ( printResidual ) THEN IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN WRITE(msgBuf,'(A,I6,A,1PE21.14)') & ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq) CALL PRINT_MESSAGE( msgBuf, standardMessageUnit, & SQUEEZE_RIGHT, myThid ) ENDIF ENDIF IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11 IF ( err_sq .LT. minResidualSq ) THEN C- Store lowest residual solution minResidualSq = err_sq nIterMin = it2d DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO j=1,sNy DO i=1,sNx cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO ENDIF cgBeta = eta_qrN/eta_qrNM1 eta_qrNM1 = eta_qrN alpha = delta - (cgBeta*cgBeta)*alpha sigma = eta_qrN/alpha C== Update simultaneously solution and residual vectors (and Iter number) DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO j=1,sNy DO i=1,sNx cg2d_s(i,j,bi,bj) = cg2d_y(i,j,bi,bj) & + cgBeta*cg2d_s(i,j,bi,bj) cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj) & + sigma*cg2d_s(i,j,bi,bj) cg2d_q(i,j,bi,bj) = cg2d_v(i,j,bi,bj) & + cgBeta*cg2d_q(i,j,bi,bj) cg2d_r(i,j,bi,bj) = cg2d_r(i,j,bi,bj) & - sigma*cg2d_q(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO c actualIts = it2d + 1 CALL EXCH_S3D_RL( cg2d_r, 1, myThid ) 10 CONTINUE DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) errTile(bi,bj) = 0. _d 0 DO j=1,sNy DO i=1,sNx errTile(bi,bj) = errTile(bi,bj) & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid ) 11 CONTINUE IF ( nIterMin.GE.0 .AND. err_sq .GT. minResidualSq ) THEN C- use the lowest residual solution (instead of current one = last residual) DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO j=1,sNy DO i=1,sNx cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO ENDIF IF (cg2dNormaliseRHS) THEN C-- Un-normalise the answer DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO j=1,sNy DO i=1,sNx cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm ENDDO ENDDO ENDDO ENDDO ENDIF C-- Return parameters to caller lastResidual = SQRT(err_sq) numIters = it2d c numIters = actualIts #endif /* ALLOW_SRCG */ RETURN END