Actual source code: dgmres.c
petsc-3.4.3 2013-10-15
2: /*
3: This file implements the deflated GMRES.
4: References:
5: [1]J. Erhel, K. Burrage and B. Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
6: [2] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
8: */
10: #include ../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h
12: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
14: #define GMRES_DELTA_DIRECTIONS 10
15: #define GMRES_DEFAULT_MAXK 30
16: static PetscErrorCode KSPDGMRESGetNewVectors(KSP,PetscInt);
17: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
18: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
22: PetscErrorCode KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
23: {
27: PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
28: return(0);
29: }
32: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
33: {
37: PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
38: return(0);
39: }
42: PetscErrorCode KSPDGMRESForce(KSP ksp,PetscInt force)
43: {
47: PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscInt),(ksp,force));
48: return(0);
49: }
52: PetscErrorCode KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
53: {
57: PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
58: return(0);
59: }
62: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
63: {
67: PetscTryMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
68: return(0);
69: }
72: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp)
73: {
77: PetscTryMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP),(ksp));
78: return(0);
79: }
82: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
83: {
87: PetscTryMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
88: return(0);
89: }
93: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
94: {
98: PetscTryMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
99: return(0);
100: }
104: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
105: {
107: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
108: PetscInt neig = dgmres->neig+EIG_OFFSET;
109: PetscInt max_k = dgmres->max_k+1;
112: KSPSetUp_GMRES(ksp);
113: if (!dgmres->neig) return(0);
115: /* Allocate workspace for the Schur vectors*/
116: PetscMalloc((neig) *max_k*sizeof(PetscReal), &SR);
117: dgmres->wr = NULL;
118: dgmres->wi = NULL;
119: dgmres->perm = NULL;
120: dgmres->modul = NULL;
121: dgmres->Q = NULL;
122: dgmres->Z = NULL;
124: UU = NULL;
125: XX = NULL;
126: MX = NULL;
127: AUU = NULL;
128: XMX = NULL;
129: XMU = NULL;
130: UMX = NULL;
131: AUAU = NULL;
132: TT = NULL;
133: TTF = NULL;
134: INVP = NULL;
135: X1 = NULL;
136: X2 = NULL;
137: MU = NULL;
138: return(0);
139: }
141: /*
142: Run GMRES, possibly with restart. Return residual history if requested.
143: input parameters:
145: . gmres - structure containing parameters and work areas
147: output parameters:
148: . nres - residuals (from preconditioned system) at each step.
149: If restarting, consider passing nres+it. If null,
150: ignored
151: . itcount - number of iterations used. nres[0] to nres[itcount]
152: are defined. If null, ignored.
154: Notes:
155: On entry, the value in vector VEC_VV(0) should be the initial residual
156: (this allows shortcuts where the initial preconditioned residual is 0).
157: */
160: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
161: {
162: KSP_DGMRES *dgmres = (KSP_DGMRES*)(ksp->data);
163: PetscReal res_norm,res,hapbnd,tt;
165: PetscInt it = 0;
166: PetscInt max_k = dgmres->max_k;
167: PetscBool hapend = PETSC_FALSE;
168: PetscReal res_old;
169: PetscInt test = 0;
172: VecNormalize(VEC_VV(0),&res_norm);
173: res = res_norm;
174: *GRS(0) = res_norm;
176: /* check for the convergence */
177: PetscObjectAMSTakeAccess((PetscObject)ksp);
178: ksp->rnorm = res;
179: PetscObjectAMSGrantAccess((PetscObject)ksp);
180: dgmres->it = (it - 1);
181: KSPLogResidualHistory(ksp,res);
182: KSPMonitor(ksp,ksp->its,res);
183: if (!res) {
184: if (itcount) *itcount = 0;
185: ksp->reason = KSP_CONVERGED_ATOL;
186: PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
187: return(0);
188: }
189: /* record the residual norm to test if deflation is needed */
190: res_old = res;
192: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
193: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
194: if (it) {
195: KSPLogResidualHistory(ksp,res);
196: KSPMonitor(ksp,ksp->its,res);
197: }
198: dgmres->it = (it - 1);
199: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
200: KSPDGMRESGetNewVectors(ksp,it+1);
201: }
202: if (dgmres->r > 0) {
203: if (ksp->pc_side == PC_LEFT) {
204: /* Apply the first preconditioner */
205: KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
206: /* Then apply Deflation as a preconditioner */
207: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
208: } else if (ksp->pc_side == PC_RIGHT) {
209: KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
210: KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
211: }
212: } else {
213: KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
214: }
215: dgmres->matvecs += 1;
216: /* update hessenberg matrix and do Gram-Schmidt */
217: (*dgmres->orthog)(ksp,it);
219: /* vv(i+1) . vv(i+1) */
220: VecNormalize(VEC_VV(it+1),&tt);
221: /* save the magnitude */
222: *HH(it+1,it) = tt;
223: *HES(it+1,it) = tt;
225: /* check for the happy breakdown */
226: hapbnd = PetscAbsScalar(tt / *GRS(it));
227: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
228: if (tt < hapbnd) {
229: PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %G tt = %G\n",hapbnd,tt);
230: hapend = PETSC_TRUE;
231: }
232: KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);
234: it++;
235: dgmres->it = (it-1); /* For converged */
236: ksp->its++;
237: ksp->rnorm = res;
238: if (ksp->reason) break;
240: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
242: /* Catch error in happy breakdown and signal convergence and break from loop */
243: if (hapend) {
244: if (!ksp->reason) {
245: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
246: else {
247: ksp->reason = KSP_DIVERGED_BREAKDOWN;
248: break;
249: }
250: }
251: }
252: }
254: /* Monitor if we know that we will not return for a restart */
255: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
256: KSPLogResidualHistory(ksp,res);
257: KSPMonitor(ksp,ksp->its,res);
258: }
259: if (itcount) *itcount = it;
261: /*
262: Down here we have to solve for the "best" coefficients of the Krylov
263: columns, add the solution values together, and possibly unwind the
264: preconditioning from the solution
265: */
266: /* Form the solution (or the solution so far) */
267: KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
269: /* Compute data for the deflation to be used during the next restart */
270: if (!ksp->reason && ksp->its < ksp->max_it) {
271: test = max_k *log(ksp->rtol/res) /log(res/res_old);
272: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
273: if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
274: KSPDGMRESComputeDeflationData(ksp);
275: }
276: }
277: return(0);
278: }
282: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
283: {
285: PetscInt its,itcount;
286: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
287: PetscBool guess_zero = ksp->guess_zero;
288: PetscBool flag;
291: if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
293: PetscObjectAMSTakeAccess((PetscObject)ksp);
294: ksp->its = 0;
295: dgmres->matvecs = 0;
296: PetscObjectAMSGrantAccess((PetscObject)ksp);
298: itcount = 0;
299: ksp->reason = KSP_CONVERGED_ITERATING;
300: while (!ksp->reason) {
301: KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
302: if (ksp->pc_side == PC_LEFT) {
303: dgmres->matvecs += 1;
304: if (dgmres->r > 0) {
305: KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
306: VecCopy(VEC_TEMP, VEC_VV(0));
307: }
308: }
310: KSPDGMRESCycle(&its,ksp);
311: itcount += its;
312: if (itcount >= ksp->max_it) {
313: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
314: break;
315: }
316: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
317: }
318: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
320: PetscOptionsHasName(((PetscObject)ksp)->prefix,"-ksp_dgmres_view_deflation_vecs",&flag);
321: if (flag) {
322: PetscInt i;
324: for (i = 0; i < dgmres->r; i++) {
325: VecViewFromOptions(UU[i],((PetscObject)ksp)->prefix,"-ksp_dgmres_view_deflation_vecs");
326: }
327: }
328: return(0);
329: }
334: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
335: {
337: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
338: PetscInt neig1 = dgmres->neig+EIG_OFFSET;
339: PetscInt max_neig = dgmres->max_neig;
342: if (dgmres->r) {
343: VecDestroyVecs(max_neig, &UU);
344: VecDestroyVecs(max_neig, &MU);
345: if (XX) {
346: VecDestroyVecs(neig1, &XX);
347: VecDestroyVecs(neig1, &MX);
348: }
350: PetscFree(TT);
351: PetscFree(TTF);
352: PetscFree(INVP);
354: PetscFree(XMX);
355: PetscFree(UMX);
356: PetscFree(XMU);
357: PetscFree(X1);
358: PetscFree(X2);
359: PetscFree(dgmres->work);
360: PetscFree(dgmres->iwork);
361: PetscFree(dgmres->wr);
362: PetscFree(dgmres->wi);
363: PetscFree(dgmres->modul);
364: PetscFree(dgmres->Q);
365: PetscFree(ORTH);
366: PetscFree(AUAU);
367: PetscFree(AUU);
368: PetscFree(SR2);
369: }
370: PetscFree(SR);
371: KSPDestroy_GMRES(ksp);
372: return(0);
373: }
374: /*
375: KSPDGMRESBuildSoln - create the solution from the starting vector and the
376: current iterates.
378: Input parameters:
379: nrs - work area of size it + 1.
380: vs - index of initial guess
381: vdest - index of result. Note that vs may == vdest (replace
382: guess with the solution).
384: This is an internal routine that knows about the GMRES internals.
385: */
388: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
389: {
390: PetscScalar tt;
392: PetscInt ii,k,j;
393: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
395: /* Solve for solution vector that minimizes the residual */
398: /* If it is < 0, no gmres steps have been performed */
399: if (it < 0) {
400: VecCopy(vs,vdest); /* VecCopy() is smart, exists immediately if vguess == vdest */
401: return(0);
402: }
403: if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %G",it,PetscAbsScalar(*GRS(it)));
404: if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
405: else nrs[it] = 0.0;
407: for (ii=1; ii<=it; ii++) {
408: k = it - ii;
409: tt = *GRS(k);
410: for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
411: if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
412: nrs[k] = tt / *HH(k,k);
413: }
415: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
416: VecSet(VEC_TEMP,0.0);
417: VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));
419: /* Apply deflation */
420: if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
421: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
422: VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
423: }
424: KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);
426: /* add solution to previous solution */
427: if (vdest != vs) {
428: VecCopy(vs,vdest);
429: }
430: VecAXPY(vdest,1.0,VEC_TEMP);
431: return(0);
432: }
433: /*
434: Do the scalar work for the orthogonalization. Return new residual norm.
435: */
438: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
439: {
440: PetscScalar *hh,*cc,*ss,tt;
441: PetscInt j;
442: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
445: hh = HH(0,it);
446: cc = CC(0);
447: ss = SS(0);
449: /* Apply all the previously computed plane rotations to the new column
450: of the Hessenberg matrix */
451: for (j=1; j<=it; j++) {
452: tt = *hh;
453: *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
454: hh++;
455: *hh = *cc++ * *hh -(*ss++ * tt);
456: }
458: /*
459: compute the new plane rotation, and apply it to:
460: 1) the right-hand-side of the Hessenberg system
461: 2) the new column of the Hessenberg matrix
462: thus obtaining the updated value of the residual
463: */
464: if (!hapend) {
465: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
466: if (tt == 0.0) {
467: ksp->reason = KSP_DIVERGED_NULL;
468: return(0);
469: }
470: *cc = *hh / tt;
471: *ss = *(hh+1) / tt;
472: *GRS(it+1) = -(*ss * *GRS(it));
473: *GRS(it) = PetscConj(*cc) * *GRS(it);
474: *hh = PetscConj(*cc) * *hh + *ss * *(hh+1);
475: *res = PetscAbsScalar(*GRS(it+1));
476: } else {
477: /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
478: another rotation matrix (so RH doesn't change). The new residual is
479: always the new sine term times the residual from last time (GRS(it)),
480: but now the new sine rotation would be zero...so the residual should
481: be zero...so we will multiply "zero" by the last residual. This might
482: not be exactly what we want to do here -could just return "zero". */
484: *res = 0.0;
485: }
486: return(0);
487: }
488: /*
489: This routine allocates more work vectors, starting from VEC_VV(it).
490: */
493: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
494: {
495: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
497: PetscInt nwork = dgmres->nwork_alloc,k,nalloc;
500: nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
501: /* Adjust the number to allocate to make sure that we don't exceed the
502: number of available slots */
503: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
504: nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
505: }
506: if (!nalloc) return(0);
508: dgmres->vv_allocated += nalloc;
510: KSPGetVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
511: PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);
513: dgmres->mwork_alloc[nwork] = nalloc;
514: for (k=0; k<nalloc; k++) {
515: dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
516: }
517: dgmres->nwork_alloc++;
518: return(0);
519: }
523: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
524: {
525: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
529: if (!ptr) {
530: if (!dgmres->sol_temp) {
531: VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
532: PetscLogObjectParent(ksp,dgmres->sol_temp);
533: }
534: ptr = dgmres->sol_temp;
535: }
536: if (!dgmres->nrs) {
537: /* allocate the work area */
538: PetscMalloc(dgmres->max_k*sizeof(PetscScalar),&dgmres->nrs);
539: PetscLogObjectMemory(ksp,dgmres->max_k*sizeof(PetscScalar));
540: }
542: KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
543: if (result) *result = ptr;
544: return(0);
545: }
549: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
550: {
551: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
553: PetscBool iascii,isharmonic;
556: KSPView_GMRES(ksp,viewer);
557: PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
558: if (iascii) {
559: if (dgmres->force) PetscViewerASCIIPrintf(viewer, " DGMRES: Adaptive strategy is used: FALSE\n");
560: else PetscViewerASCIIPrintf(viewer, " DGMRES: Adaptive strategy is used: TRUE\n");
561: PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &isharmonic);
562: if (isharmonic) {
563: PetscViewerASCIIPrintf(viewer, " DGMRES: Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
564: } else {
565: PetscViewerASCIIPrintf(viewer, " DGMRES: Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
566: }
567: PetscViewerASCIIPrintf(viewer, " DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
568: PetscViewerASCIIPrintf(viewer, " DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
569: PetscViewerASCIIPrintf(viewer, " DGMRES: relaxation parameter for the adaptive strategy(smv) = %g\n", dgmres->smv);
570: PetscViewerASCIIPrintf(viewer, " DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
571: }
572: return(0);
573: }
575: /* New DGMRES functions */
579: static PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
580: {
581: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
584: if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
585: dgmres->neig=neig;
586: return(0);
587: }
591: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
592: {
593: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
596: if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
597: dgmres->max_neig=max_neig;
598: return(0);
599: }
603: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
604: {
605: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
608: if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
609: dgmres->smv=ratio;
610: return(0);
611: }
615: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp,PetscInt force)
616: {
617: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
620: if (force != 0 && force != 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"Value must be 0 or 1");
621: dgmres->force = 1;
622: return(0);
623: }
625: extern PetscErrorCode KSPSetFromOptions_GMRES(KSP);
629: PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp)
630: {
632: PetscInt neig;
633: PetscInt max_neig;
634: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
635: PetscBool flg;
636: PetscReal smv;
637: PetscInt input;
640: KSPSetFromOptions_GMRES(ksp);
641: PetscOptionsHead("KSP DGMRES Options");
642: PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
643: if (flg) {
644: KSPDGMRESSetEigen(ksp, neig);
645: }
646: PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
647: if (flg) {
648: KSPDGMRESSetMaxEigen(ksp, max_neig);
649: }
650: PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &smv, &flg);
651: if (flg) dgmres->smv = smv;
652: PetscOptionsInt("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &input, &flg);
653: if (flg) dgmres->improve = input;
654: PetscOptionsInt("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &input, &flg);
655: if (flg) dgmres->force = input;
656: PetscOptionsTail();
657: return(0);
658: }
662: static PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
663: {
664: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
666: PetscInt i,j, k;
667: PetscBLASInt nr, bmax;
668: PetscInt r = dgmres->r;
669: PetscInt neig; /* number of eigenvalues to extract at each restart */
670: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
671: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
672: PetscInt N = dgmres->max_k+1;
673: PetscInt n = dgmres->it+1;
674: PetscReal alpha;
677: PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
678: if (dgmres->neig == 0) return(0);
679: if (max_neig < (r+neig1) && !dgmres->improve) {
680: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
681: return(0);
682: }
684: KSPDGMRESComputeSchurForm(ksp, &neig);
685: /* Form the extended Schur vectors X=VV*Sr */
686: if (!XX) {
687: VecDuplicateVecs(VEC_VV(0), neig1, &XX);
688: }
689: for (j = 0; j<neig; j++) {
690: VecZeroEntries(XX[j]);
691: VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
692: }
694: /* Orthogonalize X against U */
695: if (!ORTH) {
696: PetscMalloc(max_neig*sizeof(PetscReal), &ORTH);
697: }
698: if (r > 0) {
699: /* modified Gram-Schmidt */
700: for (j = 0; j<neig; j++) {
701: for (i=0; i<r; i++) {
702: /* First, compute U'*X[j] */
703: VecDot(XX[j], UU[i], &alpha);
704: /* Then, compute X(j)=X(j)-U*U'*X(j) */
705: VecAXPY(XX[j], -alpha, UU[i]);
706: }
707: }
708: }
709: /* Compute MX = M^{-1}*A*X */
710: if (!MX) {
711: VecDuplicateVecs(VEC_VV(0), neig1, &MX);
712: }
713: for (j = 0; j<neig; j++) {
714: KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
715: }
716: dgmres->matvecs += neig;
718: if ((r+neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
719: KSPDGMRESImproveEig(ksp, neig);
720: return(0); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
721: }
723: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
724: if (!XMX) {
725: PetscMalloc(neig1*neig1*sizeof(PetscReal), &XMX);
726: }
727: for (j = 0; j < neig; j++) {
728: VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
729: }
731: if (r > 0) {
732: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
733: if (!UMX) {
734: PetscMalloc(max_neig*neig1*sizeof(PetscReal), &UMX);
735: }
736: for (j = 0; j < neig; j++) {
737: VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
738: }
739: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
740: if (!XMU) {
741: PetscMalloc(max_neig*neig1*sizeof(PetscReal), &XMU);
742: }
743: for (j = 0; j<r; j++) {
744: VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
745: }
746: }
748: /* Form the new matrix T = [T UMX; XMU XMX]; */
749: if (!TT) {
750: PetscMalloc(max_neig*max_neig*sizeof(PetscReal), &TT);
751: }
752: if (r > 0) {
753: /* Add XMU to T */
754: for (j = 0; j < r; j++) {
755: PetscMemcpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig*sizeof(PetscReal));
756: }
757: /* Add [UMX; XMX] to T */
758: for (j = 0; j < neig; j++) {
759: k = r+j;
760: PetscMemcpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r*sizeof(PetscReal));
761: PetscMemcpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
762: }
763: } else { /* Add XMX to T */
764: for (j = 0; j < neig; j++) {
765: PetscMemcpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
766: }
767: }
769: dgmres->r += neig;
770: r = dgmres->r;
771: PetscBLASIntCast(r,&nr);
772: /*LU Factorize T with Lapack xgetrf routine */
774: PetscBLASIntCast(max_neig,&bmax);
775: if (!TTF) {
776: PetscMalloc(bmax*bmax*sizeof(PetscReal), &TTF);
777: }
778: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
779: if (!INVP) {
780: PetscMalloc(bmax*sizeof(PetscBLASInt), &INVP);
781: }
782: #if defined(PETSC_MISSING_LAPACK_GETRF)
783: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
784: #else
785: {
786: PetscBLASInt info;
787: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
788: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
789: }
790: #endif
792: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
793: if (!UU) {
794: VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
795: VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
796: }
797: for (j=0; j<neig; j++) {
798: VecCopy(XX[j], UU[r-neig+j]);
799: VecCopy(MX[j], MU[r-neig+j]);
800: }
801: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
802: return(0);
803: }
807: static PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
808: {
809: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
811: PetscInt N = dgmres->max_k + 1, n=dgmres->it+1;
812: PetscBLASInt bn, bN;
813: PetscReal *A;
814: PetscBLASInt ihi;
815: PetscBLASInt ldA; /* leading dimension of A */
816: PetscBLASInt ldQ; /* leading dimension of Q */
817: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
818: PetscReal *work; /* working vector */
819: PetscBLASInt lwork; /* size of the working vector */
820: PetscInt *perm; /* Permutation vector to sort eigenvalues */
821: PetscInt i, j;
822: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
823: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
824: PetscBLASInt *select;
825: PetscBLASInt *iwork;
826: PetscBLASInt liwork;
827: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
828: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
829: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
830: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
833: PetscBLASIntCast(n,&bn);
834: PetscBLASIntCast(N,&bN);
835: ihi = ldQ = bn;
836: ldA = bN;
837: PetscBLASIntCast(5*N,&lwork);
839: #if defined(PETSC_USE_COMPLEX)
840: SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
841: #endif
843: PetscMalloc(ldA*ldA*sizeof(PetscReal), &A);
844: PetscMalloc(ldQ*n*sizeof(PetscReal), &Q);
845: PetscMalloc(lwork*sizeof(PetscReal), &work);
846: if (!dgmres->wr) {
847: PetscMalloc(n*sizeof(PetscReal), &dgmres->wr);
848: PetscMalloc(n*sizeof(PetscReal), &dgmres->wi);
849: }
850: wr = dgmres->wr;
851: wi = dgmres->wi;
852: PetscMalloc(n*sizeof(PetscReal),&modul);
853: PetscMalloc(n*sizeof(PetscInt),&perm);
854: /* copy the Hessenberg matrix to work space */
855: PetscMemcpy(A, dgmres->hes_origin, ldA*ldA*sizeof(PetscReal));
856: PetscOptionsHasName(NULL, "-ksp_dgmres_harmonic_ritz", &flag);
857: if (flag) {
858: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
859: /* Transpose the Hessenberg matrix */
860: PetscMalloc(bn*bn*sizeof(PetscScalar), &Ht);
861: for (i = 0; i < bn; i++) {
862: for (j = 0; j < bn; j++) {
863: Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
864: }
865: }
867: /* Solve the system H^T*t = h_{m+1,m}e_m */
868: PetscMalloc(bn*sizeof(PetscScalar), &t);
869: PetscMemzero(t, bn*sizeof(PetscScalar));
870: t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
871: PetscMalloc(bn*sizeof(PetscBLASInt), &ipiv);
872: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
873: #if defined(PETSC_MISSING_LAPACK_GESV)
874: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
875: #else
876: {
877: PetscBLASInt info;
878: PetscBLASInt nrhs = 1;
879: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
880: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
881: }
882: #endif
883: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
884: for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
885: PetscFree(t);
886: PetscFree(Ht);
887: }
888: /* Compute eigenvalues with the Schur form */
889: #if defined(PETSC_MISSING_LAPACK_HSEQR)
890: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
891: #else
892: {
893: PetscBLASInt info;
894: PetscBLASInt ilo = 1;
895: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
896: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
897: }
898: #endif
899: PetscFree(work);
901: /* sort the eigenvalues */
902: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
903: for (i=0; i<n; i++) perm[i] = i;
905: PetscSortRealWithPermutation(n, modul, perm);
906: /* save the complex modulus of the largest eigenvalue in magnitude */
907: if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
908: /* count the number of extracted eigenvalues (with complex conjugates) */
909: NbrEig = 0;
910: while (NbrEig < dgmres->neig) {
911: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
912: else NbrEig += 1;
913: }
914: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
916: PetscMalloc(n * sizeof(PetscBLASInt), &select);
917: PetscMemzero(select, n * sizeof(PetscBLASInt));
919: if (!dgmres->GreatestEig) {
920: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
921: } else {
922: for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
923: }
924: /* call Lapack dtrsen */
925: lwork = PetscMax(1, 4 * NbrEig *(bn-NbrEig));
926: liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
927: PetscMalloc(lwork * sizeof(PetscScalar), &work);
928: PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
929: #if defined(PETSC_MISSING_LAPACK_TRSEN)
930: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
931: #else
932: {
933: PetscBLASInt info;
934: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
935: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
936: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
937: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
938: }
939: #endif
940: PetscFree(select);
942: /* Extract the Schur vectors */
943: for (j = 0; j < NbrEig; j++) {
944: PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
945: }
946: *neig = NbrEig;
947: PetscFree(A);
948: PetscFree(work);
949: PetscFree(perm);
950: PetscFree(work);
951: PetscFree(iwork);
952: PetscFree(modul);
953: PetscFree(Q);
954: return(0);
955: }
959: static PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
960: {
961: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
962: PetscInt i, r = dgmres->r;
964: PetscReal alpha = 1.0;
965: PetscInt max_neig = dgmres->max_neig;
966: PetscBLASInt br,bmax;
967: PetscReal lambda = dgmres->lambdaN;
970: PetscBLASIntCast(r,&br);
971: PetscBLASIntCast(max_neig,&bmax);
972: PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
973: if (!r) {
974: VecCopy(x,y);
975: return(0);
976: }
977: /* Compute U'*x */
978: if (!X1) {
979: PetscMalloc(bmax*sizeof(PetscReal), &X1);
980: PetscMalloc(bmax*sizeof(PetscReal), &X2);
981: }
982: VecMDot(x, r, UU, X1);
984: /* Solve T*X1=X2 for X1*/
985: PetscMemcpy(X2, X1, br*sizeof(PetscReal));
986: #if defined(PETSC_MISSING_LAPACK_GETRS)
987: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
988: #else
989: {
990: PetscBLASInt info;
991: PetscBLASInt nrhs = 1;
992: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
993: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
994: }
995: #endif
996: /* Iterative refinement -- is it really necessary ?? */
997: if (!WORK) {
998: PetscMalloc(3*bmax*sizeof(PetscReal), &WORK);
999: PetscMalloc(bmax*sizeof(PetscBLASInt), &IWORK);
1000: }
1001: #if defined(PETSC_MISSING_LAPACK_GERFS)
1002: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
1003: #else
1004: {
1005: PetscBLASInt info;
1006: PetscReal berr, ferr;
1007: PetscBLASInt nrhs = 1;
1008: PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
1009: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
1010: }
1011: #endif
1013: for (i = 0; i < r; i++) X2[i] = X1[i]/lambda - X2[i];
1015: /* Compute X2=U*X2 */
1016: VecZeroEntries(y);
1017: VecMAXPY(y, r, X2, UU);
1018: VecAXPY(y, alpha, x);
1020: PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1021: return(0);
1022: }
1026: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
1027: {
1028: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
1029: PetscInt j,r_old, r = dgmres->r;
1030: PetscBLASInt i = 0;
1031: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
1032: PetscInt bmax = dgmres->max_neig;
1033: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
1034: PetscInt aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
1035: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
1036: PetscBLASInt N; /* size of AUAU */
1037: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
1038: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
1039: PetscReal *work; /* working vector */
1040: PetscBLASInt lwork; /* size of the working vector */
1041: PetscInt *perm; /* Permutation vector to sort eigenvalues */
1042: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
1043: PetscInt ierr;
1044: PetscBLASInt NbrEig = 0,nr,bm;
1045: PetscBLASInt *select;
1046: PetscBLASInt liwork, *iwork;
1049: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1050: if (!AUU) {
1051: PetscMalloc(aug1*aug1*sizeof(PetscReal), &AUU);
1052: PetscMalloc(aug1*aug1*sizeof(PetscReal), &AUAU);
1053: }
1054: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1055: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1056: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1057: for (j=0; j < r; j++) {
1058: VecMDot(UU[j], r, MU, &AUU[j*aug1]);
1059: }
1060: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1061: for (j = 0; j < neig; j++) {
1062: VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
1063: }
1064: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1065: for (j = 0; j < r; j++) {
1066: VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
1067: }
1068: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
1069: for (j = 0; j < neig; j++) {
1070: VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
1071: }
1073: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1074: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1075: for (j=0; j < r; j++) {
1076: VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
1077: }
1078: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1079: for (j = 0; j < neig; j++) {
1080: VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
1081: }
1082: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1083: for (j = 0; j < r; j++) {
1084: VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
1085: }
1086: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
1087: for (j = 0; j < neig; j++) {
1088: VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
1089: }
1091: /* Computation of the eigenvectors */
1092: PetscBLASIntCast(aug1,&ldA);
1093: PetscBLASIntCast(aug,&N);
1094: lwork = 8 * N + 20; /* sizeof the working space */
1095: PetscMalloc(N*sizeof(PetscReal), &wr);
1096: PetscMalloc(N*sizeof(PetscReal), &wi);
1097: PetscMalloc(N*sizeof(PetscReal), &beta);
1098: PetscMalloc(N*sizeof(PetscReal), &modul);
1099: PetscMalloc(N*sizeof(PetscInt), &perm);
1100: PetscMalloc(N*N*sizeof(PetscReal), &Q);
1101: PetscMalloc(N*N*sizeof(PetscReal), &Z);
1102: PetscMalloc(lwork*sizeof(PetscReal), &work);
1103: #if defined(PETSC_MISSING_LAPACK_GGES)
1104: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1105: #else
1106: {
1107: PetscBLASInt info;
1108: PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1109: if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1110: }
1111: #endif
1112: for (i=0; i<N; i++) {
1113: if (beta[i] !=0.0) {
1114: wr[i] /=beta[i];
1115: wi[i] /=beta[i];
1116: }
1117: }
1118: /* sort the eigenvalues */
1119: for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1120: for (i=0; i<N; i++) perm[i] = i;
1121: PetscSortRealWithPermutation(N, modul, perm);
1122: /* Save the norm of the largest eigenvalue */
1123: if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1124: /* Allocate space to extract the first r schur vectors */
1125: if (!SR2) {
1126: PetscMalloc(aug1*bmax*sizeof(PetscReal), &SR2);
1127: }
1128: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1129: while (NbrEig < bmax) {
1130: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1131: else NbrEig += 2;
1132: }
1133: if (NbrEig > bmax) NbrEig = bmax - 1;
1134: r_old = r; /* previous size of r */
1135: dgmres->r = r = NbrEig;
1137: /* Select the eigenvalues to reorder */
1138: PetscMalloc(N * sizeof(PetscBLASInt), &select);
1139: PetscMemzero(select, N * sizeof(PetscBLASInt));
1140: if (!dgmres->GreatestEig) {
1141: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1142: } else {
1143: for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1144: }
1145: /* Reorder and extract the new <r> schur vectors */
1146: lwork = PetscMax(4 * N + 16, 2 * NbrEig *(N - NbrEig));
1147: liwork = PetscMax(N + 6, 2 * NbrEig *(N - NbrEig));
1148: PetscFree(work);
1149: PetscMalloc(lwork * sizeof(PetscReal), &work);
1150: PetscMalloc(liwork * sizeof(PetscBLASInt), &iwork);
1151: #if defined(PETSC_MISSING_LAPACK_TGSEN)
1152: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1153: #else
1154: {
1155: PetscBLASInt info;
1156: PetscReal Dif[2];
1157: PetscBLASInt ijob = 2;
1158: PetscBLASInt wantQ = 1, wantZ = 1;
1159: PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1160: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1161: }
1162: #endif
1163: PetscFree(select);
1165: for (j=0; j<r; j++) {
1166: PetscMemcpy(&SR2[j*aug1], &(Z[j*N]), N*sizeof(PetscReal));
1167: }
1169: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
1170: -- save it temporarily in MU */
1171: for (j = 0; j < r; j++) {
1172: VecZeroEntries(MU[j]);
1173: VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1174: VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1175: }
1176: /* Form T = U'*MU*U */
1177: for (j = 0; j < r; j++) {
1178: VecCopy(MU[j], UU[j]);
1179: KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1180: }
1181: dgmres->matvecs += r;
1182: for (j = 0; j < r; j++) {
1183: VecMDot(MU[j], r, UU, &TT[j*bmax]);
1184: }
1185: /* Factorize T */
1186: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
1187: PetscBLASIntCast(r,&nr);
1188: PetscBLASIntCast(bmax,&bm);
1189: #if defined(PETSC_MISSING_LAPACK_GETRF)
1190: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1191: #else
1192: {
1193: PetscBLASInt info;
1194: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1195: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1196: }
1197: #endif
1198: /* Free Memory */
1199: PetscFree(wr);
1200: PetscFree(wi);
1201: PetscFree(beta);
1202: PetscFree(modul);
1203: PetscFree(perm);
1204: PetscFree(Q);
1205: PetscFree(Z);
1206: PetscFree(work);
1207: PetscFree(iwork);
1208: return(0);
1209: }
1211: /* end new DGMRES functions */
1213: /*MC
1214: KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1215: In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1216: stagnation occurs.
1218: Options Database Keys:
1219: GMRES Options (inherited):
1220: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1221: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1222: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1223: vectors are allocated as needed)
1224: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1225: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1226: . -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1227: stability of the classical Gram-Schmidt orthogonalization.
1228: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1230: DGMRES Options Database Keys:
1231: + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1232: . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1233: process
1234: . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1235: - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1236: parsed by PetscOptionsGetViewer(). If neig > 1, viewerspec should
1237: end with ":append". No vectors will be viewed if the adaptive
1238: strategy chooses not to deflate, so -ksp_dgmres_force should also
1239: be given.
1240: The deflation vectors span a subspace that may be a good
1241: approximation of the subspace of smallest eigenvectors of the
1242: preconditioned operator, so this option can aid in understanding
1243: the performance of a preconditioner.
1245: Level: beginner
1247: Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported
1249: References:
1251: [1]Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996), 303-318.
1252: [2]On the performance of various adaptive preconditioned GMRES strategies, 5(1998), 101-121.
1253: [3] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids, In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
1255: Contributed by: Desire NUENTSA WAKAM,INRIA
1257: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1258: KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1259: KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1260: KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
1262: M*/
1266: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1267: {
1268: KSP_DGMRES *dgmres;
1272: PetscNewLog(ksp,KSP_DGMRES,&dgmres);
1273: ksp->data = (void*) dgmres;
1275: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
1276: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,1);
1278: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1279: ksp->ops->setup = KSPSetUp_DGMRES;
1280: ksp->ops->solve = KSPSolve_DGMRES;
1281: ksp->ops->destroy = KSPDestroy_DGMRES;
1282: ksp->ops->view = KSPView_DGMRES;
1283: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1284: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1285: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1287: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1288: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1289: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1290: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1291: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1292: /* -- New functions defined in DGMRES -- */
1293: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1294: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1295: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1296: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1297: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1298: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1299: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1300: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);
1302: PetscLogEventRegister("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1303: PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);
1305: dgmres->haptol = 1.0e-30;
1306: dgmres->q_preallocate = 0;
1307: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1308: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1309: dgmres->nrs = 0;
1310: dgmres->sol_temp = 0;
1311: dgmres->max_k = GMRES_DEFAULT_MAXK;
1312: dgmres->Rsvd = 0;
1313: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1314: dgmres->orthogwork = 0;
1316: /* Default values for the deflation */
1317: dgmres->r = 0;
1318: dgmres->neig = DGMRES_DEFAULT_EIG;
1319: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG-1;
1320: dgmres->lambdaN = 0.0;
1321: dgmres->smv = SMV;
1322: dgmres->force = 0;
1323: dgmres->matvecs = 0;
1324: dgmres->improve = 0;
1325: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1326: dgmres->HasSchur = PETSC_FALSE;
1327: return(0);
1328: }