Actual source code: dgmres.c
petsc-master 2020-08-25
1: /*
2: This file implements the deflated GMRES.
4: */
6: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>
8: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
10: #define GMRES_DELTA_DIRECTIONS 10
11: #define GMRES_DEFAULT_MAXK 30
12: static PetscErrorCode KSPDGMRESGetNewVectors(KSP,PetscInt);
13: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
14: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
16: PetscErrorCode KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
17: {
21: PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
22: return(0);
23: }
24: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
25: {
29: PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
30: return(0);
31: }
32: PetscErrorCode KSPDGMRESForce(KSP ksp,PetscBool force)
33: {
37: PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscBool),(ksp,force));
38: return(0);
39: }
40: PetscErrorCode KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
41: {
45: PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
46: return(0);
47: }
48: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
49: {
53: PetscUseMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
54: return(0);
55: }
56: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp,PetscInt *curneigh)
57: {
61: PetscUseMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP,PetscInt*),(ksp,curneigh));
62: return(0);
63: }
64: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
65: {
69: PetscUseMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
70: return(0);
71: }
73: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
74: {
78: PetscUseMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
79: return(0);
80: }
82: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
83: {
85: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
86: PetscInt neig = dgmres->neig+EIG_OFFSET;
87: PetscInt max_k = dgmres->max_k+1;
90: KSPSetUp_GMRES(ksp);
91: if (!dgmres->neig) return(0);
93: /* Allocate workspace for the Schur vectors*/
94: PetscMalloc1(neig*max_k, &SR);
95: dgmres->wr = NULL;
96: dgmres->wi = NULL;
97: dgmres->perm = NULL;
98: dgmres->modul = NULL;
99: dgmres->Q = NULL;
100: dgmres->Z = NULL;
102: UU = NULL;
103: XX = NULL;
104: MX = NULL;
105: AUU = NULL;
106: XMX = NULL;
107: XMU = NULL;
108: UMX = NULL;
109: AUAU = NULL;
110: TT = NULL;
111: TTF = NULL;
112: INVP = NULL;
113: X1 = NULL;
114: X2 = NULL;
115: MU = NULL;
116: return(0);
117: }
119: /*
120: Run GMRES, possibly with restart. Return residual history if requested.
121: input parameters:
123: . gmres - structure containing parameters and work areas
125: output parameters:
126: . nres - residuals (from preconditioned system) at each step.
127: If restarting, consider passing nres+it. If null,
128: ignored
129: . itcount - number of iterations used. nres[0] to nres[itcount]
130: are defined. If null, ignored.
132: Notes:
133: On entry, the value in vector VEC_VV(0) should be the initial residual
134: (this allows shortcuts where the initial preconditioned residual is 0).
135: */
136: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
137: {
138: KSP_DGMRES *dgmres = (KSP_DGMRES*)(ksp->data);
139: PetscReal res_norm,res,hapbnd,tt;
141: PetscInt it = 0;
142: PetscInt max_k = dgmres->max_k;
143: PetscBool hapend = PETSC_FALSE;
144: PetscReal res_old;
145: PetscInt test = 0;
148: VecNormalize(VEC_VV(0),&res_norm);
149: KSPCheckNorm(ksp,res_norm);
150: res = res_norm;
151: *GRS(0) = res_norm;
153: /* check for the convergence */
154: PetscObjectSAWsTakeAccess((PetscObject)ksp);
155: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
156: else ksp->rnorm = 0.0;
157: PetscObjectSAWsGrantAccess((PetscObject)ksp);
158: dgmres->it = (it - 1);
159: KSPLogResidualHistory(ksp,ksp->rnorm);
160: KSPMonitor(ksp,ksp->its,ksp->rnorm);
161: if (!res) {
162: if (itcount) *itcount = 0;
163: ksp->reason = KSP_CONVERGED_ATOL;
164: PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
165: return(0);
166: }
167: /* record the residual norm to test if deflation is needed */
168: res_old = res;
170: (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);
171: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
172: if (it) {
173: KSPLogResidualHistory(ksp,ksp->rnorm);
174: KSPMonitor(ksp,ksp->its,ksp->rnorm);
175: }
176: dgmres->it = (it - 1);
177: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
178: KSPDGMRESGetNewVectors(ksp,it+1);
179: }
180: if (dgmres->r > 0) {
181: if (ksp->pc_side == PC_LEFT) {
182: /* Apply the first preconditioner */
183: KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
184: /* Then apply Deflation as a preconditioner */
185: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
186: } else if (ksp->pc_side == PC_RIGHT) {
187: KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
188: KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
189: }
190: } else {
191: KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
192: }
193: dgmres->matvecs += 1;
194: /* update hessenberg matrix and do Gram-Schmidt */
195: (*dgmres->orthog)(ksp,it);
197: /* vv(i+1) . vv(i+1) */
198: VecNormalize(VEC_VV(it+1),&tt);
199: /* save the magnitude */
200: *HH(it+1,it) = tt;
201: *HES(it+1,it) = tt;
203: /* check for the happy breakdown */
204: hapbnd = PetscAbsScalar(tt / *GRS(it));
205: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
206: if (tt < hapbnd) {
207: PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %g tt = %g\n",(double)hapbnd,(double)tt);
208: hapend = PETSC_TRUE;
209: }
210: KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);
212: it++;
213: dgmres->it = (it-1); /* For converged */
214: ksp->its++;
215: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
216: else ksp->rnorm = 0.0;
217: if (ksp->reason) break;
219: (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);
221: /* Catch error in happy breakdown and signal convergence and break from loop */
222: if (hapend) {
223: if (!ksp->reason) {
224: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res);
225: else {
226: ksp->reason = KSP_DIVERGED_BREAKDOWN;
227: break;
228: }
229: }
230: }
231: }
233: /* Monitor if we know that we will not return for a restart */
234: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
235: KSPLogResidualHistory(ksp,ksp->rnorm);
236: KSPMonitor(ksp,ksp->its,ksp->rnorm);
237: }
238: if (itcount) *itcount = it;
240: /*
241: Down here we have to solve for the "best" coefficients of the Krylov
242: columns, add the solution values together, and possibly unwind the
243: preconditioning from the solution
244: */
245: /* Form the solution (or the solution so far) */
246: KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
248: /* Compute data for the deflation to be used during the next restart */
249: if (!ksp->reason && ksp->its < ksp->max_it) {
250: test = max_k *PetscLogReal(ksp->rtol/res) /PetscLogReal(res/res_old);
251: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
252: if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
253: KSPDGMRESComputeDeflationData(ksp,NULL);
254: }
255: }
256: return(0);
257: }
259: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
260: {
262: PetscInt i,its,itcount;
263: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
264: PetscBool guess_zero = ksp->guess_zero;
267: if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
269: PetscObjectSAWsTakeAccess((PetscObject)ksp);
270: ksp->its = 0;
271: dgmres->matvecs = 0;
272: PetscObjectSAWsGrantAccess((PetscObject)ksp);
274: itcount = 0;
275: ksp->reason = KSP_CONVERGED_ITERATING;
276: while (!ksp->reason) {
277: KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
278: if (ksp->pc_side == PC_LEFT) {
279: dgmres->matvecs += 1;
280: if (dgmres->r > 0) {
281: KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
282: VecCopy(VEC_TEMP, VEC_VV(0));
283: }
284: }
286: KSPDGMRESCycle(&its,ksp);
287: itcount += its;
288: if (itcount >= ksp->max_it) {
289: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
290: break;
291: }
292: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
293: }
294: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
296: for (i = 0; i < dgmres->r; i++) {
297: VecViewFromOptions(UU[i],(PetscObject)ksp,"-ksp_dgmres_view_deflation_vecs");
298: }
299: return(0);
300: }
302: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
303: {
305: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
306: PetscInt neig1 = dgmres->neig+EIG_OFFSET;
307: PetscInt max_neig = dgmres->max_neig;
310: if (dgmres->r) {
311: VecDestroyVecs(max_neig, &UU);
312: VecDestroyVecs(max_neig, &MU);
313: if (XX) {
314: VecDestroyVecs(neig1, &XX);
315: VecDestroyVecs(neig1, &MX);
316: }
318: PetscFree(TT);
319: PetscFree(TTF);
320: PetscFree(INVP);
322: PetscFree(XMX);
323: PetscFree(UMX);
324: PetscFree(XMU);
325: PetscFree(X1);
326: PetscFree(X2);
327: PetscFree(dgmres->work);
328: PetscFree(dgmres->iwork);
329: PetscFree(dgmres->wr);
330: PetscFree(dgmres->wi);
331: PetscFree(dgmres->modul);
332: PetscFree(dgmres->Q);
333: PetscFree(ORTH);
334: PetscFree(AUAU);
335: PetscFree(AUU);
336: PetscFree(SR2);
337: }
338: PetscFree(SR);
339: KSPDestroy_GMRES(ksp);
340: return(0);
341: }
342: /*
343: KSPDGMRESBuildSoln - create the solution from the starting vector and the
344: current iterates.
346: Input parameters:
347: nrs - work area of size it + 1.
348: vs - index of initial guess
349: vdest - index of result. Note that vs may == vdest (replace
350: guess with the solution).
352: This is an internal routine that knows about the GMRES internals.
353: */
354: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
355: {
356: PetscScalar tt;
358: PetscInt ii,k,j;
359: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
361: /* Solve for solution vector that minimizes the residual */
364: /* If it is < 0, no gmres steps have been performed */
365: if (it < 0) {
366: VecCopy(vs,vdest); /* VecCopy() is smart, exists immediately if vguess == vdest */
367: return(0);
368: }
369: if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %g",it,(double)PetscAbsScalar(*GRS(it)));
370: if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
371: else nrs[it] = 0.0;
373: for (ii=1; ii<=it; ii++) {
374: k = it - ii;
375: tt = *GRS(k);
376: for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
377: if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
378: nrs[k] = tt / *HH(k,k);
379: }
381: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
382: VecSet(VEC_TEMP,0.0);
383: VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));
385: /* Apply deflation */
386: if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
387: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
388: VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
389: }
390: KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);
392: /* add solution to previous solution */
393: if (vdest != vs) {
394: VecCopy(vs,vdest);
395: }
396: VecAXPY(vdest,1.0,VEC_TEMP);
397: return(0);
398: }
399: /*
400: Do the scalar work for the orthogonalization. Return new residual norm.
401: */
402: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
403: {
404: PetscScalar *hh,*cc,*ss,tt;
405: PetscInt j;
406: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
409: hh = HH(0,it);
410: cc = CC(0);
411: ss = SS(0);
413: /* Apply all the previously computed plane rotations to the new column
414: of the Hessenberg matrix */
415: for (j=1; j<=it; j++) {
416: tt = *hh;
417: *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
418: hh++;
419: *hh = *cc++ * *hh -(*ss++ * tt);
420: }
422: /*
423: compute the new plane rotation, and apply it to:
424: 1) the right-hand-side of the Hessenberg system
425: 2) the new column of the Hessenberg matrix
426: thus obtaining the updated value of the residual
427: */
428: if (!hapend) {
429: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
430: if (tt == 0.0) {
431: ksp->reason = KSP_DIVERGED_NULL;
432: return(0);
433: }
434: *cc = *hh / tt;
435: *ss = *(hh+1) / tt;
436: *GRS(it+1) = -(*ss * *GRS(it));
437: *GRS(it) = PetscConj(*cc) * *GRS(it);
438: *hh = PetscConj(*cc) * *hh + *ss * *(hh+1);
439: *res = PetscAbsScalar(*GRS(it+1));
440: } else {
441: /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
442: another rotation matrix (so RH doesn't change). The new residual is
443: always the new sine term times the residual from last time (GRS(it)),
444: but now the new sine rotation would be zero...so the residual should
445: be zero...so we will multiply "zero" by the last residual. This might
446: not be exactly what we want to do here -could just return "zero". */
448: *res = 0.0;
449: }
450: return(0);
451: }
452: /*
453: This routine allocates more work vectors, starting from VEC_VV(it).
454: */
455: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
456: {
457: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
459: PetscInt nwork = dgmres->nwork_alloc,k,nalloc;
462: nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
463: /* Adjust the number to allocate to make sure that we don't exceed the
464: number of available slots */
465: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
466: nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
467: }
468: if (!nalloc) return(0);
470: dgmres->vv_allocated += nalloc;
472: KSPCreateVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
473: PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);
475: dgmres->mwork_alloc[nwork] = nalloc;
476: for (k=0; k<nalloc; k++) {
477: dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
478: }
479: dgmres->nwork_alloc++;
480: return(0);
481: }
483: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
484: {
485: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
489: if (!ptr) {
490: if (!dgmres->sol_temp) {
491: VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
492: PetscLogObjectParent((PetscObject)ksp,(PetscObject)dgmres->sol_temp);
493: }
494: ptr = dgmres->sol_temp;
495: }
496: if (!dgmres->nrs) {
497: /* allocate the work area */
498: PetscMalloc1(dgmres->max_k,&dgmres->nrs);
499: PetscLogObjectMemory((PetscObject)ksp,dgmres->max_k*sizeof(PetscScalar));
500: }
502: KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
503: if (result) *result = ptr;
504: return(0);
505: }
507: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
508: {
509: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
511: PetscBool iascii,isharmonic;
514: KSPView_GMRES(ksp,viewer);
515: PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
516: if (iascii) {
517: if (dgmres->force) PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: FALSE\n");
518: else PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: TRUE\n");
519: PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic);
520: if (isharmonic) {
521: PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
522: } else {
523: PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
524: }
525: PetscViewerASCIIPrintf(viewer, " Total number of extracted eigenvalues = %D\n", dgmres->r);
526: PetscViewerASCIIPrintf(viewer, " Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
527: PetscViewerASCIIPrintf(viewer, " relaxation parameter for the adaptive strategy(smv) = %g\n", dgmres->smv);
528: PetscViewerASCIIPrintf(viewer, " Number of matvecs : %D\n", dgmres->matvecs);
529: }
530: return(0);
531: }
533: /* New DGMRES functions */
535: PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
536: {
537: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
540: if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
541: dgmres->neig=neig;
542: return(0);
543: }
545: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
546: {
547: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
550: if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
551: dgmres->max_neig=max_neig;
552: return(0);
553: }
555: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
556: {
557: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
560: if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
561: dgmres->smv=ratio;
562: return(0);
563: }
565: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)
566: {
567: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
570: dgmres->force = force;
571: return(0);
572: }
574: PetscErrorCode KSPSetFromOptions_DGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
575: {
577: PetscInt neig;
578: PetscInt max_neig;
579: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
580: PetscBool flg;
583: KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);
584: PetscOptionsHead(PetscOptionsObject,"KSP DGMRES Options");
585: PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
586: if (flg) {
587: KSPDGMRESSetEigen(ksp, neig);
588: }
589: PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
590: if (flg) {
591: KSPDGMRESSetMaxEigen(ksp, max_neig);
592: }
593: PetscOptionsReal("-ksp_dgmres_ratio","Relaxation parameter for the smaller number of matrix-vectors product allowed","KSPDGMRESSetRatio",dgmres->smv,&dgmres->smv,NULL);
594: PetscOptionsBool("-ksp_dgmres_improve","Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)",NULL,dgmres->improve,&dgmres->improve,NULL);
595: PetscOptionsBool("-ksp_dgmres_force","Sets DGMRES always at restart active, i.e do not use the adaptive strategy","KSPDGMRESForce",dgmres->force,&dgmres->force,NULL);
596: PetscOptionsTail();
597: return(0);
598: }
600: PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
601: {
602: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
604: PetscInt i,j, k;
605: PetscBLASInt nr, bmax;
606: PetscInt r = dgmres->r;
607: PetscInt neig; /* number of eigenvalues to extract at each restart */
608: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
609: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
610: PetscInt N = dgmres->max_k+1;
611: PetscInt n = dgmres->it+1;
612: PetscReal alpha;
615: PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
616: if (dgmres->neig == 0 || (max_neig < (r+neig1) && !dgmres->improve)) {
617: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
618: return(0);
619: }
621: KSPDGMRESComputeSchurForm(ksp, &neig);
622: /* Form the extended Schur vectors X=VV*Sr */
623: if (!XX) {
624: VecDuplicateVecs(VEC_VV(0), neig1, &XX);
625: }
626: for (j = 0; j<neig; j++) {
627: VecZeroEntries(XX[j]);
628: VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
629: }
631: /* Orthogonalize X against U */
632: if (!ORTH) {
633: PetscMalloc1(max_neig, &ORTH);
634: }
635: if (r > 0) {
636: /* modified Gram-Schmidt */
637: for (j = 0; j<neig; j++) {
638: for (i=0; i<r; i++) {
639: /* First, compute U'*X[j] */
640: VecDot(XX[j], UU[i], &alpha);
641: /* Then, compute X(j)=X(j)-U*U'*X(j) */
642: VecAXPY(XX[j], -alpha, UU[i]);
643: }
644: }
645: }
646: /* Compute MX = M^{-1}*A*X */
647: if (!MX) {
648: VecDuplicateVecs(VEC_VV(0), neig1, &MX);
649: }
650: for (j = 0; j<neig; j++) {
651: KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
652: }
653: dgmres->matvecs += neig;
655: if ((r+neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
656: KSPDGMRESImproveEig(ksp, neig);
657: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
658: return(0); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
659: }
661: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
662: if (!XMX) {
663: PetscMalloc1(neig1*neig1, &XMX);
664: }
665: for (j = 0; j < neig; j++) {
666: VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
667: }
669: if (r > 0) {
670: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
671: if (!UMX) {
672: PetscMalloc1(max_neig*neig1, &UMX);
673: }
674: for (j = 0; j < neig; j++) {
675: VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
676: }
677: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
678: if (!XMU) {
679: PetscMalloc1(max_neig*neig1, &XMU);
680: }
681: for (j = 0; j<r; j++) {
682: VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
683: }
684: }
686: /* Form the new matrix T = [T UMX; XMU XMX]; */
687: if (!TT) {
688: PetscMalloc1(max_neig*max_neig, &TT);
689: }
690: if (r > 0) {
691: /* Add XMU to T */
692: for (j = 0; j < r; j++) {
693: PetscArraycpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig);
694: }
695: /* Add [UMX; XMX] to T */
696: for (j = 0; j < neig; j++) {
697: k = r+j;
698: PetscArraycpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r);
699: PetscArraycpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig);
700: }
701: } else { /* Add XMX to T */
702: for (j = 0; j < neig; j++) {
703: PetscArraycpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig);
704: }
705: }
707: dgmres->r += neig;
708: r = dgmres->r;
709: PetscBLASIntCast(r,&nr);
710: /*LU Factorize T with Lapack xgetrf routine */
712: PetscBLASIntCast(max_neig,&bmax);
713: if (!TTF) {
714: PetscMalloc1(bmax*bmax, &TTF);
715: }
716: PetscArraycpy(TTF, TT, bmax*r);
717: if (!INVP) {
718: PetscMalloc1(bmax, &INVP);
719: }
720: {
721: PetscBLASInt info;
722: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
723: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
724: }
726: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
727: if (!UU) {
728: VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
729: VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
730: }
731: for (j=0; j<neig; j++) {
732: VecCopy(XX[j], UU[r-neig+j]);
733: VecCopy(MX[j], MU[r-neig+j]);
734: }
735: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
736: return(0);
737: }
739: PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
740: {
741: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
743: PetscInt N = dgmres->max_k + 1, n=dgmres->it+1;
744: PetscBLASInt bn, bN;
745: PetscReal *A;
746: PetscBLASInt ihi;
747: PetscBLASInt ldA; /* leading dimension of A */
748: PetscBLASInt ldQ; /* leading dimension of Q */
749: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
750: PetscReal *work; /* working vector */
751: PetscBLASInt lwork; /* size of the working vector */
752: PetscInt *perm; /* Permutation vector to sort eigenvalues */
753: PetscInt i, j;
754: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
755: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
756: PetscBLASInt *select;
757: PetscBLASInt *iwork;
758: PetscBLASInt liwork;
759: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
760: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
761: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
762: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
765: PetscBLASIntCast(n,&bn);
766: PetscBLASIntCast(N,&bN);
767: ihi = ldQ = bn;
768: ldA = bN;
769: PetscBLASIntCast(5*N,&lwork);
771: #if defined(PETSC_USE_COMPLEX)
772: SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
773: #endif
775: PetscMalloc1(ldA*ldA, &A);
776: PetscMalloc1(ldQ*n, &Q);
777: PetscMalloc1(lwork, &work);
778: if (!dgmres->wr) {
779: PetscMalloc1(n, &dgmres->wr);
780: PetscMalloc1(n, &dgmres->wi);
781: }
782: wr = dgmres->wr;
783: wi = dgmres->wi;
784: PetscMalloc1(n,&modul);
785: PetscMalloc1(n,&perm);
786: /* copy the Hessenberg matrix to work space */
787: PetscArraycpy(A, dgmres->hes_origin, ldA*ldA);
788: PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag);
789: if (flag) {
790: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
791: /* Transpose the Hessenberg matrix */
792: PetscMalloc1(bn*bn, &Ht);
793: for (i = 0; i < bn; i++) {
794: for (j = 0; j < bn; j++) {
795: Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
796: }
797: }
799: /* Solve the system H^T*t = h_{m+1,m}e_m */
800: PetscCalloc1(bn, &t);
801: t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
802: PetscMalloc1(bn, &ipiv);
803: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
804: {
805: PetscBLASInt info;
806: PetscBLASInt nrhs = 1;
807: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
808: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
809: }
810: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
811: for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
812: PetscFree(t);
813: PetscFree(Ht);
814: }
815: /* Compute eigenvalues with the Schur form */
816: {
817: PetscBLASInt info=0;
818: PetscBLASInt ilo = 1;
819: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
820: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
821: }
822: PetscFree(work);
824: /* sort the eigenvalues */
825: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
826: for (i=0; i<n; i++) perm[i] = i;
828: PetscSortRealWithPermutation(n, modul, perm);
829: /* save the complex modulus of the largest eigenvalue in magnitude */
830: if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
831: /* count the number of extracted eigenvalues (with complex conjugates) */
832: NbrEig = 0;
833: while (NbrEig < dgmres->neig) {
834: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
835: else NbrEig += 1;
836: }
837: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
839: PetscCalloc1(n, &select);
841: if (!dgmres->GreatestEig) {
842: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
843: } else {
844: for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
845: }
846: /* call Lapack dtrsen */
847: lwork = PetscMax(1, 4 * NbrEig *(bn-NbrEig));
848: liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
849: PetscMalloc1(lwork, &work);
850: PetscMalloc1(liwork, &iwork);
851: {
852: PetscBLASInt info=0;
853: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
854: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
855: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
856: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
857: }
858: PetscFree(select);
860: /* Extract the Schur vectors */
861: for (j = 0; j < NbrEig; j++) {
862: PetscArraycpy(&SR[j*N], &(Q[j*ldQ]), n);
863: }
864: *neig = NbrEig;
865: PetscFree(A);
866: PetscFree(work);
867: PetscFree(perm);
868: PetscFree(work);
869: PetscFree(iwork);
870: PetscFree(modul);
871: PetscFree(Q);
872: return(0);
873: }
875: PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
876: {
877: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
878: PetscInt i, r = dgmres->r;
880: PetscReal alpha = 1.0;
881: PetscInt max_neig = dgmres->max_neig;
882: PetscBLASInt br,bmax;
883: PetscReal lambda = dgmres->lambdaN;
886: PetscBLASIntCast(r,&br);
887: PetscBLASIntCast(max_neig,&bmax);
888: PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
889: if (!r) {
890: VecCopy(x,y);
891: return(0);
892: }
893: /* Compute U'*x */
894: if (!X1) {
895: PetscMalloc1(bmax, &X1);
896: PetscMalloc1(bmax, &X2);
897: }
898: VecMDot(x, r, UU, X1);
900: /* Solve T*X1=X2 for X1*/
901: PetscArraycpy(X2, X1, br);
902: {
903: PetscBLASInt info;
904: PetscBLASInt nrhs = 1;
905: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
906: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
907: }
908: /* Iterative refinement -- is it really necessary ?? */
909: if (!WORK) {
910: PetscMalloc1(3*bmax, &WORK);
911: PetscMalloc1(bmax, &IWORK);
912: }
913: {
914: PetscBLASInt info;
915: PetscReal berr, ferr;
916: PetscBLASInt nrhs = 1;
917: PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
918: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
919: }
921: for (i = 0; i < r; i++) X2[i] = X1[i]/lambda - X2[i];
923: /* Compute X2=U*X2 */
924: VecZeroEntries(y);
925: VecMAXPY(y, r, X2, UU);
926: VecAXPY(y, alpha, x);
928: PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
929: return(0);
930: }
932: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
933: {
934: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
935: PetscInt j,r_old, r = dgmres->r;
936: PetscBLASInt i = 0;
937: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
938: PetscInt bmax = dgmres->max_neig;
939: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
940: PetscInt aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
941: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
942: PetscBLASInt N; /* size of AUAU */
943: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
944: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
945: PetscReal *work; /* working vector */
946: PetscBLASInt lwork; /* size of the working vector */
947: PetscInt *perm; /* Permutation vector to sort eigenvalues */
948: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
949: PetscInt ierr;
950: PetscBLASInt NbrEig = 0,nr,bm;
951: PetscBLASInt *select;
952: PetscBLASInt liwork, *iwork;
955: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
956: if (!AUU) {
957: PetscMalloc1(aug1*aug1, &AUU);
958: PetscMalloc1(aug1*aug1, &AUAU);
959: }
960: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
961: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
962: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
963: for (j=0; j < r; j++) {
964: VecMDot(UU[j], r, MU, &AUU[j*aug1]);
965: }
966: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
967: for (j = 0; j < neig; j++) {
968: VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
969: }
970: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
971: for (j = 0; j < r; j++) {
972: VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
973: }
974: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
975: for (j = 0; j < neig; j++) {
976: VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
977: }
979: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
980: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
981: for (j=0; j < r; j++) {
982: VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
983: }
984: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
985: for (j = 0; j < neig; j++) {
986: VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
987: }
988: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
989: for (j = 0; j < r; j++) {
990: VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
991: }
992: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
993: for (j = 0; j < neig; j++) {
994: VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
995: }
997: /* Computation of the eigenvectors */
998: PetscBLASIntCast(aug1,&ldA);
999: PetscBLASIntCast(aug,&N);
1000: lwork = 8 * N + 20; /* sizeof the working space */
1001: PetscMalloc1(N, &wr);
1002: PetscMalloc1(N, &wi);
1003: PetscMalloc1(N, &beta);
1004: PetscMalloc1(N, &modul);
1005: PetscMalloc1(N, &perm);
1006: PetscMalloc1(N*N, &Q);
1007: PetscMalloc1(N*N, &Z);
1008: PetscMalloc1(lwork, &work);
1009: {
1010: PetscBLASInt info=0;
1011: PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1012: if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1013: }
1014: for (i=0; i<N; i++) {
1015: if (beta[i] !=0.0) {
1016: wr[i] /=beta[i];
1017: wi[i] /=beta[i];
1018: }
1019: }
1020: /* sort the eigenvalues */
1021: for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1022: for (i=0; i<N; i++) perm[i] = i;
1023: PetscSortRealWithPermutation(N, modul, perm);
1024: /* Save the norm of the largest eigenvalue */
1025: if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1026: /* Allocate space to extract the first r schur vectors */
1027: if (!SR2) {
1028: PetscMalloc1(aug1*bmax, &SR2);
1029: }
1030: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1031: while (NbrEig < bmax) {
1032: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1033: else NbrEig += 2;
1034: }
1035: if (NbrEig > bmax) NbrEig = bmax - 1;
1036: r_old = r; /* previous size of r */
1037: dgmres->r = r = NbrEig;
1039: /* Select the eigenvalues to reorder */
1040: PetscCalloc1(N, &select);
1041: if (!dgmres->GreatestEig) {
1042: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1043: } else {
1044: for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1045: }
1046: /* Reorder and extract the new <r> schur vectors */
1047: lwork = PetscMax(4 * N + 16, 2 * NbrEig *(N - NbrEig));
1048: liwork = PetscMax(N + 6, 2 * NbrEig *(N - NbrEig));
1049: PetscFree(work);
1050: PetscMalloc1(lwork, &work);
1051: PetscMalloc1(liwork, &iwork);
1052: {
1053: PetscBLASInt info=0;
1054: PetscReal Dif[2];
1055: PetscBLASInt ijob = 2;
1056: PetscBLASInt wantQ = 1, wantZ = 1;
1057: PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1058: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1059: }
1060: PetscFree(select);
1062: for (j=0; j<r; j++) {
1063: PetscArraycpy(&SR2[j*aug1], &(Z[j*N]), N);
1064: }
1066: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
1067: -- save it temporarily in MU */
1068: for (j = 0; j < r; j++) {
1069: VecZeroEntries(MU[j]);
1070: VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1071: VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1072: }
1073: /* Form T = U'*MU*U */
1074: for (j = 0; j < r; j++) {
1075: VecCopy(MU[j], UU[j]);
1076: KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1077: }
1078: dgmres->matvecs += r;
1079: for (j = 0; j < r; j++) {
1080: VecMDot(MU[j], r, UU, &TT[j*bmax]);
1081: }
1082: /* Factorize T */
1083: PetscArraycpy(TTF, TT, bmax*r);
1084: PetscBLASIntCast(r,&nr);
1085: PetscBLASIntCast(bmax,&bm);
1086: {
1087: PetscBLASInt info;
1088: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1089: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1090: }
1091: /* Free Memory */
1092: PetscFree(wr);
1093: PetscFree(wi);
1094: PetscFree(beta);
1095: PetscFree(modul);
1096: PetscFree(perm);
1097: PetscFree(Q);
1098: PetscFree(Z);
1099: PetscFree(work);
1100: PetscFree(iwork);
1101: return(0);
1102: }
1104: /* end new DGMRES functions */
1106: /*MC
1107: KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1108: In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1109: stagnation occurs.
1111: Options Database Keys:
1112: GMRES Options (inherited):
1113: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1114: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1115: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1116: vectors are allocated as needed)
1117: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1118: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1119: . -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
1120: stability of the classical Gram-Schmidt orthogonalization.
1121: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1123: DGMRES Options Database Keys:
1124: + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1125: . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1126: process
1127: . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1128: - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1129: parsed by PetscOptionsGetViewer(). If neig > 1, viewerspec should
1130: end with ":append". No vectors will be viewed if the adaptive
1131: strategy chooses not to deflate, so -ksp_dgmres_force should also
1132: be given.
1133: The deflation vectors span a subspace that may be a good
1134: approximation of the subspace of smallest eigenvectors of the
1135: preconditioned operator, so this option can aid in understanding
1136: the performance of a preconditioner.
1138: Level: beginner
1140: Notes:
1141: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported
1143: References:
1144: + 1. - J. Erhel, K. Burrage and B. Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1145: - 2. - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1146: In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
1148: Contributed by: Desire NUENTSA WAKAM,INRIA
1150: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1151: KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1152: KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1153: KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
1155: M*/
1157: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1158: {
1159: KSP_DGMRES *dgmres;
1163: PetscNewLog(ksp,&dgmres);
1164: ksp->data = (void*) dgmres;
1166: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
1167: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
1168: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);
1170: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1171: ksp->ops->setup = KSPSetUp_DGMRES;
1172: ksp->ops->solve = KSPSolve_DGMRES;
1173: ksp->ops->destroy = KSPDestroy_DGMRES;
1174: ksp->ops->view = KSPView_DGMRES;
1175: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1176: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1177: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1179: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1180: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1181: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1182: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1183: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1184: /* -- New functions defined in DGMRES -- */
1185: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1186: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1187: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1188: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1189: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1190: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1191: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1192: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);
1194: PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1195: PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);
1197: dgmres->haptol = 1.0e-30;
1198: dgmres->q_preallocate = 0;
1199: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1200: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1201: dgmres->nrs = NULL;
1202: dgmres->sol_temp = NULL;
1203: dgmres->max_k = GMRES_DEFAULT_MAXK;
1204: dgmres->Rsvd = NULL;
1205: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1206: dgmres->orthogwork = NULL;
1208: /* Default values for the deflation */
1209: dgmres->r = 0;
1210: dgmres->neig = DGMRES_DEFAULT_EIG;
1211: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG-1;
1212: dgmres->lambdaN = 0.0;
1213: dgmres->smv = SMV;
1214: dgmres->matvecs = 0;
1215: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1216: dgmres->HasSchur = PETSC_FALSE;
1217: return(0);
1218: }