Actual source code: fbcgsr.c
2: /*
3: This file implements FBiCGStab-R.
4: Only allow right preconditioning.
5: FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
6: (1) There are fewer MPI_Allreduce calls.
7: (2) The convergence occasionally is much faster than that of FBiCGStab.
8: */
9: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
10: #include <petsc/private/vecimpl.h>
12: static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
13: {
14: KSPSetWorkVecs(ksp, 8);
15: return 0;
16: }
18: static PetscErrorCode KSPSolve_FBCGSR(KSP ksp)
19: {
20: PetscInt i, j, N;
21: PetscScalar tau, sigma, alpha, omega, beta;
22: PetscReal rho;
23: PetscScalar xi1, xi2, xi3, xi4;
24: Vec X, B, P, P2, RP, R, V, S, T, S2;
25: PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
26: PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
27: PetscScalar insums[4], outsums[4];
28: KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data;
29: PC pc;
30: Mat mat;
33: VecGetLocalSize(ksp->vec_sol, &N);
35: X = ksp->vec_sol;
36: B = ksp->vec_rhs;
37: P2 = ksp->work[0];
39: /* The followings are involved in modified inner product calculations and vector updates */
40: RP = ksp->work[1];
41: VecGetArray(RP, (PetscScalar **)&rp);
42: VecRestoreArray(RP, NULL);
43: R = ksp->work[2];
44: VecGetArray(R, (PetscScalar **)&r);
45: VecRestoreArray(R, NULL);
46: P = ksp->work[3];
47: VecGetArray(P, (PetscScalar **)&p);
48: VecRestoreArray(P, NULL);
49: V = ksp->work[4];
50: VecGetArray(V, (PetscScalar **)&v);
51: VecRestoreArray(V, NULL);
52: S = ksp->work[5];
53: VecGetArray(S, (PetscScalar **)&s);
54: VecRestoreArray(S, NULL);
55: T = ksp->work[6];
56: VecGetArray(T, (PetscScalar **)&t);
57: VecRestoreArray(T, NULL);
58: S2 = ksp->work[7];
59: VecGetArray(S2, (PetscScalar **)&s2);
60: VecRestoreArray(S2, NULL);
62: /* Only supports right preconditioning */
64: if (!ksp->guess_zero) {
65: if (!bcgs->guess) VecDuplicate(X, &bcgs->guess);
66: VecCopy(X, bcgs->guess);
67: } else {
68: VecSet(X, 0.0);
69: }
71: /* Compute initial residual */
72: KSPGetPC(ksp, &pc);
73: PCSetUp(pc);
74: PCGetOperators(pc, &mat, NULL);
75: if (!ksp->guess_zero) {
76: KSP_MatMult(ksp, mat, X, P2); /* P2 is used as temporary storage */
77: VecCopy(B, R);
78: VecAXPY(R, -1.0, P2);
79: } else {
80: VecCopy(B, R);
81: }
83: /* Test for nothing to do */
84: VecNorm(R, NORM_2, &rho);
85: PetscObjectSAWsTakeAccess((PetscObject)ksp);
86: ksp->its = 0;
87: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
88: else ksp->rnorm = 0;
89: PetscObjectSAWsGrantAccess((PetscObject)ksp);
90: KSPLogResidualHistory(ksp, ksp->rnorm);
91: KSPMonitor(ksp, 0, ksp->rnorm);
92: (*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP);
93: if (ksp->reason) return 0;
95: /* Initialize iterates */
96: VecCopy(R, RP); /* rp <- r */
97: VecCopy(R, P); /* p <- r */
99: /* Big loop */
100: for (i = 0; i < ksp->max_it; i++) {
101: /* matmult and pc */
102: KSP_PCApply(ksp, P, P2); /* p2 <- K p */
103: KSP_MatMult(ksp, mat, P2, V); /* v <- A p2 */
105: /* inner prodcuts */
106: if (i == 0) {
107: tau = rho * rho;
108: VecDot(V, RP, &sigma); /* sigma <- (v,rp) */
109: } else {
110: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
111: tau = sigma = 0.0;
112: for (j = 0; j < N; j++) {
113: tau += r[j] * rp[j]; /* tau <- (r,rp) */
114: sigma += v[j] * rp[j]; /* sigma <- (v,rp) */
115: }
116: PetscLogFlops(4.0 * N);
117: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
118: insums[0] = tau;
119: insums[1] = sigma;
120: PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0);
121: MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp));
122: PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0);
123: tau = outsums[0];
124: sigma = outsums[1];
125: }
127: /* scalar update */
128: alpha = tau / sigma;
130: /* vector update */
131: VecWAXPY(S, -alpha, V, R); /* s <- r - alpha v */
133: /* matmult and pc */
134: KSP_PCApply(ksp, S, S2); /* s2 <- K s */
135: KSP_MatMult(ksp, mat, S2, T); /* t <- A s2 */
137: /* inner prodcuts */
138: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
139: xi1 = xi2 = xi3 = xi4 = 0.0;
140: for (j = 0; j < N; j++) {
141: xi1 += s[j] * s[j]; /* xi1 <- (s,s) */
142: xi2 += t[j] * s[j]; /* xi2 <- (t,s) */
143: xi3 += t[j] * t[j]; /* xi3 <- (t,t) */
144: xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */
145: }
146: PetscLogFlops(8.0 * N);
147: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
149: insums[0] = xi1;
150: insums[1] = xi2;
151: insums[2] = xi3;
152: insums[3] = xi4;
154: PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0);
155: MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp));
156: PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0);
157: xi1 = outsums[0];
158: xi2 = outsums[1];
159: xi3 = outsums[2];
160: xi4 = outsums[3];
162: /* test denominator */
163: if ((xi3 == 0.0) || (sigma == 0.0)) {
165: ksp->reason = KSP_DIVERGED_BREAKDOWN;
166: PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n");
167: break;
168: }
170: /* scalar updates */
171: omega = xi2 / xi3;
172: beta = -xi4 / sigma;
173: rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */
175: /* vector updates */
176: VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2); /* x <- alpha * p2 + omega * s2 + x */
178: /* convergence test */
179: PetscObjectSAWsTakeAccess((PetscObject)ksp);
180: ksp->its++;
181: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
182: else ksp->rnorm = 0;
183: PetscObjectSAWsGrantAccess((PetscObject)ksp);
184: KSPLogResidualHistory(ksp, ksp->rnorm);
185: KSPMonitor(ksp, i + 1, ksp->rnorm);
186: (*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP);
187: if (ksp->reason) break;
189: /* vector updates */
190: PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0);
191: for (j = 0; j < N; j++) {
192: r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
193: p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
194: }
195: PetscLogFlops(6.0 * N);
196: PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0);
197: }
199: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
200: return 0;
201: }
203: /*MC
204: KSPFBCGSR - Implements a mathematically equivalent variant of FBiCGSTab.
206: Options Database Keys:
207: see KSPSolve()
209: Level: beginner
211: Notes:
212: Only allow right preconditioning
214: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGSL`, `KSPSetPCSide()`
215: M*/
216: PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
217: {
218: KSP_BCGS *bcgs;
220: PetscNew(&bcgs);
222: ksp->data = bcgs;
223: ksp->ops->setup = KSPSetUp_FBCGSR;
224: ksp->ops->solve = KSPSolve_FBCGSR;
225: ksp->ops->destroy = KSPDestroy_BCGS;
226: ksp->ops->reset = KSPReset_BCGS;
227: ksp->ops->buildsolution = KSPBuildSolution_BCGS;
228: ksp->ops->buildresidual = KSPBuildResidualDefault;
229: ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
230: ksp->pc_side = PC_RIGHT; /* set default PC side */
232: KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3);
233: KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2);
234: KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1);
235: return 0;
236: }