Actual source code: bcgsl.c
1: /*
2: * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
3: * "Enhanced implementation of BiCGStab(L) for solving linear systems
4: * of equations". This uses tricky delayed updating ideas to prevent
5: * round-off buildup.
6: */
7: #include petscblaslapack.h
8: #include src/ksp/ksp/kspimpl.h
9: #include bcgsl.h
12: /****************************************************
13: *
14: * Some memory allocation functions
15: *
16: ****************************************************/
20: PetscErrorCode bcgsl_cleanup_i(KSP ksp)
21: {
22: PetscErrorCode ierr;
25: /* free all workspace */
26: PetscFree(ksp->work);
27: return(0);
28: }
32: PetscErrorCode bcgsl_setup_i(KSP ksp)
33: {
34: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
35: PetscInt ell = bcgsl->ell;
39: KSPDefaultGetWork(ksp, 6+2*ell);
40: return(0);
41: }
45: static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
46: {
47: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *) ksp->data;
48: PetscScalar alpha, beta, nu, omega, sigma;
49: PetscScalar zero = 0;
50: PetscScalar rho0, rho1;
51: PetscReal kappa0, kappaA, kappa1;
52: PetscReal ghat, epsilon, abstol;
53: PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
54: PetscTruth bUpdateX;
55: PetscTruth bBombed = PETSC_FALSE;
57: PetscInt maxit;
58: PetscInt h, i, j, k, vi, ell;
59: PetscMPIInt rank;
60: PetscBLASInt ldMZ,bierr;
65: /* set up temporary vectors */
66: vi = 0;
67: ell = bcgsl->ell;
68: bcgsl->vB = ksp->work[vi]; vi++;
69: bcgsl->vRt = ksp->work[vi]; vi++;
70: bcgsl->vTm = ksp->work[vi]; vi++;
71: bcgsl->vvR = ksp->work+vi; vi += ell+1;
72: bcgsl->vvU = ksp->work+vi; vi += ell+1;
73: bcgsl->vXr = ksp->work[vi]; vi++;
74: ldMZ = ell+1;
75: {
76: PetscMalloc(ldMZ*sizeof(PetscScalar), &AY0c);
77: PetscMalloc(ldMZ*sizeof(PetscScalar), &AYlc);
78: PetscMalloc(ldMZ*sizeof(PetscScalar), &AYtc);
79: PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZa);
80: PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZb);
81: }
83: /* Prime the iterative solver */
84: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
86: KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
87: VecNorm(VVR[0], NORM_2, &zeta0);
88: rnmax_computed = zeta0;
89: rnmax_true = zeta0;
91: (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
92: if (ksp->reason) {
93: PetscFree(AY0c);
94: PetscFree(AYlc);
95: PetscFree(AYtc);
96: PetscFree(MZa);
97: PetscFree(MZb);
99: return(0);
100: }
102: VecSet(&zero, VVU[0]);
103: alpha = 0;
104: rho0 = omega = 1;
106: if (bcgsl->delta>0.0) {
107: VecCopy(VX, VXR);
108: VecSet(&zero, VX);
109: VecCopy(VVR[0], VB);
110: } else {
111: VecCopy(ksp->vec_rhs, VB);
112: }
114: /* Life goes on */
115: VecCopy(VVR[0], VRT);
116: zeta = zeta0;
118: KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
120: for (k=0; k<maxit; k += bcgsl->ell) {
121: PetscObjectTakeAccess(ksp);
122: ksp->its = k;
123: ksp->rnorm = zeta;
124: PetscObjectGrantAccess(ksp);
126: KSPLogResidualHistory(ksp, zeta);
127: KSPMonitor(ksp, ksp->its, zeta);
129: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
130: if (ksp->reason) break;
132: /* BiCG part */
133: rho0 = -omega*rho0;
134: nrm0 = zeta;
135: for (j=0; j<bcgsl->ell; j++) {
136: /* rho1 <- r_j' * r_tilde */
137: VecDot(VVR[j], VRT, &rho1);
138: if (rho1 == 0.0) {
139: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
140: bBombed = PETSC_TRUE;
141: break;
142: }
143: beta = alpha*(rho1/rho0);
144: rho0 = rho1;
145: nu = -beta;
146: for (i=0; i<=j; i++) {
147: /* u_i <- r_i - beta*u_i */
148: VecAYPX(&nu, VVR[i], VVU[i]);
149: }
150: /* u_{j+1} <- inv(K)*A*u_j */
151: KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
153: VecDot(VVU[j+1], VRT, &sigma);
154: if (sigma == 0.0) {
155: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
156: bBombed = PETSC_TRUE;
157: break;
158: }
159: alpha = rho1/sigma;
161: /* x <- x + alpha*u_0 */
162: VecAXPY(&alpha, VVU[0], VX);
164: nu = -alpha;
165: for (i=0; i<=j; i++) {
166: /* r_i <- r_i - alpha*u_{i+1} */
167: VecAXPY(&nu, VVU[i+1], VVR[i]);
168: }
170: /* r_{j+1} <- inv(K)*A*r_j */
171: KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
173: VecNorm(VVR[0], NORM_2, &nrm0);
174: if (bcgsl->delta>0.0) {
175: if (rnmax_computed<nrm0) rnmax_computed = nrm0;
176: if (rnmax_true<nrm0) rnmax_true = nrm0;
177: }
179: /* NEW: check for early exit */
180: (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
181: if (ksp->reason) {
182: PetscObjectTakeAccess(ksp);
183: ksp->its = k+j;
184: ksp->rnorm = nrm0;
185: PetscObjectGrantAccess(ksp);
186: break;
187: }
188: }
190: if (bBombed==PETSC_TRUE) break;
192: /* Polynomial part */
194: for (i=0; i<=bcgsl->ell; i++) {
195: for (j=0; j<i; j++) {
196: VecDot(VVR[j], VVR[i], &nu);
197: MZa[i+ldMZ*j] = nu;
198: MZa[j+ldMZ*i] = nu;
199: MZb[i+ldMZ*j] = nu;
200: MZb[j+ldMZ*i] = nu;
201: }
203: VecDot(VVR[i], VVR[i], &nu);
204: MZa[i+ldMZ*i] = nu;
205: MZb[i+ldMZ*i] = nu;
206: }
208: if (!bcgsl->bConvex || bcgsl->ell==1) {
209: PetscBLASInt ione = 1,bell = bcgsl->ell;
211: AY0c[0] = -1;
212: LApotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
213: if (ierr!=0) {
214: ksp->reason = KSP_DIVERGED_BREAKDOWN;
215: bBombed = PETSC_TRUE;
216: break;
217: }
218: BLcopy_(&bell, &MZb[1], &ione, &AY0c[1], &ione);
219: LApotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
220: } else {
221: PetscBLASInt neqs = bcgsl->ell-1;
222: PetscBLASInt ione = 1;
223: PetscScalar aone = 1.0, azero = 0.0;
225: LApotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
226: if (ierr!=0) {
227: ksp->reason = KSP_DIVERGED_BREAKDOWN;
228: bBombed = PETSC_TRUE;
229: break;
230: }
231: BLcopy_(&neqs, &MZb[1], &ione, &AY0c[1], &ione);
232: LApotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
233: AY0c[0] = -1;
234: AY0c[bcgsl->ell] = 0;
236: BLcopy_(&neqs, &MZb[1+ldMZ*(bcgsl->ell)], &ione, &AYlc[1], &ione);
237: LApotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);
239: AYlc[0] = 0;
240: AYlc[bcgsl->ell] = -1;
242: LAgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);
244: kappa0 = BLdot_(&ldMZ, AY0c, &ione, AYtc, &ione);
246: /* round-off can cause negative kappa's */
247: if (kappa0<0) kappa0 = -kappa0;
248: kappa0 = sqrt(kappa0);
250: kappaA = BLdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
252: LAgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);
254: kappa1 = BLdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
256: if (kappa1<0) kappa1 = -kappa1;
257: kappa1 = sqrt(kappa1);
259: if (kappa0!=0.0 && kappa1!=0.0) {
260: if (kappaA<0.7*kappa0*kappa1) {
261: ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
262: } else {
263: ghat = kappaA/(kappa1*kappa1);
264: }
265: for (i=0; i<=bcgsl->ell; i++) {
266: AY0c[i] = AY0c[i] - ghat* AYlc[i];
267: }
268: }
269: }
271: omega = AY0c[bcgsl->ell];
272: for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
273: omega = AY0c[h];
274: }
275: if (omega==0.0) {
276: ksp->reason = KSP_DIVERGED_BREAKDOWN;
277: break;
278: }
280: for (i=1; i<=bcgsl->ell; i++) {
281: nu = -AY0c[i];
282: VecAXPY(&nu, VVU[i], VVU[0]);
283: nu = AY0c[i];
284: VecAXPY(&nu, VVR[i-1], VX);
285: nu = -AY0c[i];
286: VecAXPY(&nu, VVR[i], VVR[0]);
287: }
289: VecNorm(VVR[0], NORM_2, &zeta);
291: /* Accurate Update */
292: if (bcgsl->delta>0.0) {
293: if (rnmax_computed<zeta) rnmax_computed = zeta;
294: if (rnmax_true<zeta) rnmax_true = zeta;
296: bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
297: if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
298: /* r0 <- b-inv(K)*A*X */
299: KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
300: nu = -1;
301: VecAYPX(&nu, VB, VVR[0]);
302: rnmax_true = zeta;
304: if (bUpdateX) {
305: nu = 1;
306: VecAXPY(&nu, VX, VXR);
307: VecSet(&zero, VX);
308: VecCopy(VVR[0], VB);
309: rnmax_computed = zeta;
310: }
311: }
312: }
313: }
315: KSPMonitor(ksp, ksp->its, zeta);
317: if (bcgsl->delta>0.0) {
318: nu = 1;
319: VecAXPY(&nu, VXR, VX);
320: }
322: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
323: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
325: PetscFree(AY0c);
326: PetscFree(AYlc);
327: PetscFree(AYtc);
328: PetscFree(MZa);
329: PetscFree(MZb);
331: return(0);
333: }
337: /*@C
338: KSPBCGSLSetXRes - Sets the parameter governing when
339: exact residuals will be used instead of computed residuals.
341: Collective on KSP
343: Input Parameters:
344: + ksp - iterative context obtained from KSPCreate
345: - delta - computed residuals are used alone when delta is not positive
347: Options Database Keys:
349: . -ksp_bcgsl_xres delta
351: Level: intermediate
353: .keywords: KSP, BiCGStab(L), set, exact residuals
355: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
356: @*/
357: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
358: {
359: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
363: if (ksp->setupcalled) {
364: if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
365: bcgsl_cleanup_i(ksp);
366: ksp->setupcalled = 0;
367: }
368: }
369: bcgsl->delta = delta;
370: return(0);
371: }
376: /*@C
377: KSPBCGSLSetPol - Sets the type of polynomial part will
378: be used in the BiCGSTab(L) solver.
380: Collective on KSP
382: Input Parameters:
383: + ksp - iterative context obtained from KSPCreate
384: - uMROR - set to PETSC_TRUE when the polynomial is a convex
385: combination of an MR and an OR step.
387: Options Database Keys:
389: + -ksp_bcgsl_cxpoly - use enhanced polynomial
390: . -ksp_bcgsl_mrpoly - use standard polynomial
392: Level: intermediate
394: .keywords: KSP, BiCGStab(L), set, polynomial
396: .seealso: @()
397: @*/
398: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
399: {
400: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
404: if (!ksp->setupcalled) {
405: bcgsl->bConvex = uMROR;
406: } else if (bcgsl->bConvex != uMROR) {
407: /* free the data structures,
408: then create them again
409: */
410: bcgsl_cleanup_i(ksp);
411: bcgsl->bConvex = uMROR;
412: ksp->setupcalled = 0;
413: }
414: return(0);
415: }
420: /*@C
421: KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
423: Collective on KSP
425: Input Parameters:
426: + ksp - iterative context obtained from KSPCreate
427: - ell - number of search directions
429: Options Database Keys:
431: . -ksp_bcgsl_ell ell
433: Level: intermediate
435: .keywords: KSP, BiCGStab(L), set, exact residuals,
437: .seealso: @()
438: @*/
439: PetscErrorCode KSPBCGSLSetEll(KSP ksp, int ell)
440: {
441: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
445: if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
447: if (!ksp->setupcalled) {
448: bcgsl->ell = ell;
449: } else if (bcgsl->ell != ell) {
450: /* free the data structures, then create them again */
451: bcgsl_cleanup_i(ksp);
452: bcgsl->ell = ell;
453: ksp->setupcalled = 0;
454: }
455: return(0);
456: }
460: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
461: {
462: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
463: PetscErrorCode ierr;
464: PetscTruth isascii, isstring;
467: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
469: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);
471: if (isascii) {
472: PetscViewerASCIIPrintf(viewer, " BCGSL: Ell = %D\n", bcgsl->ell);
473: PetscViewerASCIIPrintf(viewer, " BCGSL: Delta = %lg\n", bcgsl->delta);
474: } else {
475: SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
476: }
477: return(0);
478: }
481: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
482: {
483: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
485: PetscInt this_ell;
486: PetscReal delta;
487: PetscTruth flga, flg;
490: /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
491: don't need to be called here.
492: */
493: PetscOptionsHead("KSP BiCGStab(L) Options");
495: /* Set number of search directions */
496: PetscOptionsInt("-ksp_bcgsl_ell", "Number of Krylov search directions", "KSPBCGSLSetEll", bcgsl->ell, &this_ell, &flg);
497: if (flg) {
498: KSPBCGSLSetEll(ksp, this_ell);
499: }
501: /* Set polynomial type */
502: PetscOptionsName("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", &flga);
503: if (flga) {
504: KSPBCGSLSetPol(ksp, PETSC_TRUE);
505: } else {
506: PetscOptionsName("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", &flg);
507: KSPBCGSLSetPol(ksp, PETSC_FALSE);
508: }
510: /* Will computed residual be refreshed? */
511: PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
512: if (flg) {
513: KSPBCGSLSetXRes(ksp, delta);
514: }
515: PetscOptionsTail();
516: return(0);
517: }
520: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
521: {
525: /* Support left preconditioners only */
526: if (ksp->pc_side == PC_SYMMETRIC) {
527: SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
528: } else if (ksp->pc_side == PC_RIGHT) {
529: SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
530: }
532: bcgsl_setup_i(ksp);
533: return(0);
534: }
535: /*MC
536: KSPBCGSL - Implements a slight variant of the Enhanced
537: BiCGStab(L) algorithm in (3) and (2). The variation
538: concerns cases when either kappa0**2 or kappa1**2 is
539: negative due to round-off. Kappa0 has also been pulled
540: out of the denominator in the formula for ghat.
542: References:
543: 1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
544: approaches for the stable computation of hybrid BiCG
545: methods", Applied Numerical Mathematics: Transactions
546: f IMACS, 19(3), pp 235-54, 1996.
547: 2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
548: "BiCGStab(L) and other hybrid Bi-CG methods",
549: Numerical Algorithms, 7, pp 75-109, 1994.
550: 3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
551: for solving linear systems of equations", preprint
552: from www.citeseer.com.
554: Contributed by: Joel M. Malard, email jm.malard@pnl.gov
556: Options Database Keys:
557: + -ksp_bcgsl_ell <ell> Number of Krylov search directions
558: - -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
559: - -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals
561: Level: beginner
563: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS
565: M*/
569: PetscErrorCode KSPCreate_BCGSL(KSP ksp)
570: {
572: KSP_BiCGStabL *bcgsl;
575: /* allocate BiCGStab(L) context */
576: PetscNew(KSP_BiCGStabL, &bcgsl);
577: PetscMemzero(bcgsl, sizeof(KSP_BiCGStabL));
578: ksp->data = (void*)bcgsl;
580: ksp->pc_side = PC_LEFT;
581: ksp->ops->setup = KSPSetUp_BCGSL;
582: ksp->ops->solve = KSPSolve_BCGSL;
583: ksp->ops->destroy = KSPDefaultDestroy;
584: ksp->ops->buildsolution = KSPDefaultBuildSolution;
585: ksp->ops->buildresidual = KSPDefaultBuildResidual;
586: ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
587: ksp->ops->view = KSPView_BCGSL;
589: /* Let the user redefine the number of directions vectors */
590: bcgsl->ell = 2;
591: PetscObjectComposeFunctionDynamic((PetscObject)ksp, "KSPBCGSLSetEll_C", "KSP_BCGS_SetEll", KSPBCGSLSetEll);
593: /*Choose between a single MR step or an averaged MR/OR */
594: bcgsl->bConvex = PETSC_FALSE;
595: PetscObjectComposeFunctionDynamic((PetscObject)ksp, "KSPBCGSLSetPol_C", "KSP_BCGS_SetPol", KSPBCGSLSetPol);
597: /* Set the threshold for when exact residuals will be used */
598: bcgsl->delta = 0.0;
599: PetscObjectComposeFunctionDynamic((PetscObject)ksp, "KSPBCGSLSetXRes_C", "KSP_BCGS_SetXRes", KSPBCGSLSetXRes);
600: return(0);
601: }