Actual source code: fbcgsr.c


  2: /*
  3:     This file implements FBiCGStab-R.
  4:     Only allow right preconditioning.
  5:     FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
  6:       (1) There are fewer MPI_Allreduce calls.
  7:       (2) The convergence occasionally is much faster than that of FBiCGStab.
  8: */
  9: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
 10: #include <petsc/private/vecimpl.h>

 12: static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
 13: {
 14:   KSPSetWorkVecs(ksp, 8);
 15:   return 0;
 16: }

 18: static PetscErrorCode KSPSolve_FBCGSR(KSP ksp)
 19: {
 20:   PetscInt                    i, j, N;
 21:   PetscScalar                 tau, sigma, alpha, omega, beta;
 22:   PetscReal                   rho;
 23:   PetscScalar                 xi1, xi2, xi3, xi4;
 24:   Vec                         X, B, P, P2, RP, R, V, S, T, S2;
 25:   PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
 26:   PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
 27:   PetscScalar insums[4], outsums[4];
 28:   KSP_BCGS   *bcgs = (KSP_BCGS *)ksp->data;
 29:   PC          pc;
 30:   Mat         mat;

 33:   VecGetLocalSize(ksp->vec_sol, &N);

 35:   X  = ksp->vec_sol;
 36:   B  = ksp->vec_rhs;
 37:   P2 = ksp->work[0];

 39:   /* The followings are involved in modified inner product calculations and vector updates */
 40:   RP = ksp->work[1];
 41:   VecGetArray(RP, (PetscScalar **)&rp);
 42:   VecRestoreArray(RP, NULL);
 43:   R = ksp->work[2];
 44:   VecGetArray(R, (PetscScalar **)&r);
 45:   VecRestoreArray(R, NULL);
 46:   P = ksp->work[3];
 47:   VecGetArray(P, (PetscScalar **)&p);
 48:   VecRestoreArray(P, NULL);
 49:   V = ksp->work[4];
 50:   VecGetArray(V, (PetscScalar **)&v);
 51:   VecRestoreArray(V, NULL);
 52:   S = ksp->work[5];
 53:   VecGetArray(S, (PetscScalar **)&s);
 54:   VecRestoreArray(S, NULL);
 55:   T = ksp->work[6];
 56:   VecGetArray(T, (PetscScalar **)&t);
 57:   VecRestoreArray(T, NULL);
 58:   S2 = ksp->work[7];
 59:   VecGetArray(S2, (PetscScalar **)&s2);
 60:   VecRestoreArray(S2, NULL);

 62:   /* Only supports right preconditioning */
 64:   if (!ksp->guess_zero) {
 65:     if (!bcgs->guess) VecDuplicate(X, &bcgs->guess);
 66:     VecCopy(X, bcgs->guess);
 67:   } else {
 68:     VecSet(X, 0.0);
 69:   }

 71:   /* Compute initial residual */
 72:   KSPGetPC(ksp, &pc);
 73:   PCSetUp(pc);
 74:   PCGetOperators(pc, &mat, NULL);
 75:   if (!ksp->guess_zero) {
 76:     KSP_MatMult(ksp, mat, X, P2); /* P2 is used as temporary storage */
 77:     VecCopy(B, R);
 78:     VecAXPY(R, -1.0, P2);
 79:   } else {
 80:     VecCopy(B, R);
 81:   }

 83:   /* Test for nothing to do */
 84:   VecNorm(R, NORM_2, &rho);
 85:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
 86:   ksp->its = 0;
 87:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
 88:   else ksp->rnorm = 0;
 89:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
 90:   KSPLogResidualHistory(ksp, ksp->rnorm);
 91:   KSPMonitor(ksp, 0, ksp->rnorm);
 92:   (*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP);
 93:   if (ksp->reason) return 0;

 95:   /* Initialize iterates */
 96:   VecCopy(R, RP); /* rp <- r */
 97:   VecCopy(R, P);  /* p <- r */

 99:   /* Big loop */
100:   for (i = 0; i < ksp->max_it; i++) {
101:     /* matmult and pc */
102:     KSP_PCApply(ksp, P, P2);      /* p2 <- K p */
103:     KSP_MatMult(ksp, mat, P2, V); /* v <- A p2 */

105:     /* inner prodcuts */
106:     if (i == 0) {
107:       tau = rho * rho;
108:       VecDot(V, RP, &sigma); /* sigma <- (v,rp) */
109:     } else {
110:       PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
111:       tau = sigma = 0.0;
112:       for (j = 0; j < N; j++) {
113:         tau += r[j] * rp[j];   /* tau <- (r,rp) */
114:         sigma += v[j] * rp[j]; /* sigma <- (v,rp) */
115:       }
116:       PetscLogFlops(4.0 * N);
117:       PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
118:       insums[0] = tau;
119:       insums[1] = sigma;
120:       PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0);
121:       MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp));
122:       PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0);
123:       tau   = outsums[0];
124:       sigma = outsums[1];
125:     }

127:     /* scalar update */
128:     alpha = tau / sigma;

130:     /* vector update */
131:     VecWAXPY(S, -alpha, V, R); /* s <- r - alpha v */

133:     /* matmult and pc */
134:     KSP_PCApply(ksp, S, S2);      /* s2 <- K s */
135:     KSP_MatMult(ksp, mat, S2, T); /* t <- A s2 */

137:     /* inner prodcuts */
138:     PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
139:     xi1 = xi2 = xi3 = xi4 = 0.0;
140:     for (j = 0; j < N; j++) {
141:       xi1 += s[j] * s[j];  /* xi1 <- (s,s) */
142:       xi2 += t[j] * s[j];  /* xi2 <- (t,s) */
143:       xi3 += t[j] * t[j];  /* xi3 <- (t,t) */
144:       xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */
145:     }
146:     PetscLogFlops(8.0 * N);
147:     PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);

149:     insums[0] = xi1;
150:     insums[1] = xi2;
151:     insums[2] = xi3;
152:     insums[3] = xi4;

154:     PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0);
155:     MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp));
156:     PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0);
157:     xi1 = outsums[0];
158:     xi2 = outsums[1];
159:     xi3 = outsums[2];
160:     xi4 = outsums[3];

162:     /* test denominator */
163:     if ((xi3 == 0.0) || (sigma == 0.0)) {
165:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
166:       PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n");
167:       break;
168:     }

170:     /* scalar updates */
171:     omega = xi2 / xi3;
172:     beta  = -xi4 / sigma;
173:     rho   = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */

175:     /* vector updates */
176:     VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2); /* x <- alpha * p2 + omega * s2 + x */

178:     /* convergence test */
179:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
180:     ksp->its++;
181:     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
182:     else ksp->rnorm = 0;
183:     PetscObjectSAWsGrantAccess((PetscObject)ksp);
184:     KSPLogResidualHistory(ksp, ksp->rnorm);
185:     KSPMonitor(ksp, i + 1, ksp->rnorm);
186:     (*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP);
187:     if (ksp->reason) break;

189:     /* vector updates */
190:     PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0);
191:     for (j = 0; j < N; j++) {
192:       r[j] = s[j] - omega * t[j];                 /* r <- s - omega t */
193:       p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
194:     }
195:     PetscLogFlops(6.0 * N);
196:     PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0);
197:   }

199:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
200:   return 0;
201: }

203: /*MC
204:      KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab.

206:    Options Database Keys:
207:     see KSPSolve()

209:    Level: beginner

211:    Notes:
212:     Only allow right preconditioning

214: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGSL`, `KSPSetPCSide()`
215: M*/
216: PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
217: {
218:   KSP_BCGS *bcgs;

220:   PetscNew(&bcgs);

222:   ksp->data                = bcgs;
223:   ksp->ops->setup          = KSPSetUp_FBCGSR;
224:   ksp->ops->solve          = KSPSolve_FBCGSR;
225:   ksp->ops->destroy        = KSPDestroy_BCGS;
226:   ksp->ops->reset          = KSPReset_BCGS;
227:   ksp->ops->buildsolution  = KSPBuildSolution_BCGS;
228:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
229:   ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
230:   ksp->pc_side             = PC_RIGHT; /* set default PC side */

232:   KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3);
233:   KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2);
234:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1);
235:   return 0;
236: }