Actual source code: asa.c
1: #define PETSCKSP_DLL
3: /* --------------------------------------------------------------------
5: Contributed by Arvid Bessen, Columbia University, June 2007
6:
7: This file implements a ASA preconditioner in PETSc as part of PC.
9: The adaptive smoothed aggregation algorithm is described in the paper
10: "Adaptive Smoothed Aggregation (ASA)", M. Brezina, R. Falgout, S. MacLachlan,
11: T. Manteuffel, S. McCormick, and J. Ruge, SIAM Journal on Scientific Computing,
12: SISC Volume 25 Issue 6, Pages 1896-1920.
14: For an example usage of this preconditioner, see, e.g.
15: $PETSC_DIR/src/ksp/ksp/examples/tutorials/ex38.c ex39.c
16: and other files in that directory.
18: This code is still somewhat experimental. A number of improvements would be
19: - keep vectors allocated on each level, instead of destroying them
20: (see mainly PCApplyVcycleOnLevel_ASA)
21: - in PCCreateTransferOp_ASA we get all of the submatrices at once, this could
22: be optimized by differentiating between local and global matrices
23: - the code does not handle it gracefully if there is just one level
24: - if relaxation is sufficient, exit of PCInitializationStage_ASA is not
25: completely clean
26: - default values could be more reasonable, especially for parallel solves,
27: where we need a parallel LU or similar
28: - the richardson scaling parameter is somewhat special, should be treated in a
29: good default way
30: - a number of parameters for smoother (sor_omega, etc.) that we store explicitly
31: could be kept in the respective smoothers themselves
32: - some parameters have to be set via command line options, there are no direct
33: function calls available
34: - numerous other stuff
36: Example runs in parallel would be with parameters like
37: mpiexec ./program -pc_asa_coarse_mat_type aijmumps -pc_asa_direct_solver 200
38: -pc_asa_max_cand_vecs 4 -pc_asa_mu_initial 50 -pc_asa_richardson_scale 1.0
39: -pc_asa_rq_improve 0.9 -asa_smoother_pc_type asm -asa_smoother_sub_pc_type sor
41: -------------------------------------------------------------------- */
43: /*
44: This defines the data structures for the smoothed aggregation procedure
45: */
46: #include src/ksp/pc/impls/asa/asa.h
48: /*
49: We need the QR algorithm from LAPACK
50: */
51: #include petscblaslapack.h
53: /* -------------------------------------------------------------------------- */
55: /* Event logging */
57: PetscEvent PC_InitializationStage_ASA, PC_GeneralSetupStage_ASA;
58: PetscEvent PC_CreateTransferOp_ASA, PC_CreateVcycle_ASA;
59: PetscTruth asa_events_registered = PETSC_FALSE;
64: /*@C
65: PCASASetDM - Sets the coarse grid information for the grids
67: Collective on PC
69: Input Parameter:
70: + pc - the context
71: - dm - the DA or ADDA or VecPack object
73: Level: advanced
75: @*/
76: PetscErrorCode PCASASetDM(PC pc,DM dm)
77: {
78: PetscErrorCode ierr,(*f)(PC,DM);
82: PetscObjectQueryFunction((PetscObject)pc,"PCASASetDM_C",(void (**)(void))&f);
83: if (f) {
84: (*f)(pc,dm);
85: }
86: return(0);
87: }
91: PetscErrorCode PCASASetDM_ASA(PC pc, DM dm)
92: {
94: PC_ASA *asa = (PC_ASA *) pc->data;
97: PetscObjectReference((PetscObject)dm);
98: asa->dm = dm;
99: return(0);
100: }
104: /*@C
105: PCASASetTolerances - Sets the convergence thresholds for ASA algorithm
107: Collective on PC
109: Input Parameter:
110: + pc - the context
111: . rtol - the relative convergence tolerance
112: (relative decrease in the residual norm)
113: . abstol - the absolute convergence tolerance
114: (absolute size of the residual norm)
115: . dtol - the divergence tolerance
116: (amount residual can increase before KSPDefaultConverged()
117: concludes that the method is diverging)
118: - maxits - maximum number of iterations to use
120: Notes:
121: Use PETSC_DEFAULT to retain the default value of any of the tolerances.
123: Level: advanced
124: @*/
125: PetscErrorCode PCASASetTolerances(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
126: {
127: PetscErrorCode ierr,(*f)(PC,PetscReal,PetscReal,PetscReal,PetscInt);
131: PetscObjectQueryFunction((PetscObject)pc,"PCASASetTolerances_C",(void (**)(void))&f);
132: if (f) {
133: (*f)(pc,rtol,abstol,dtol,maxits);
134: }
135: return(0);
136: }
140: PetscErrorCode PCASASetTolerances_ASA(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
141: {
142: PC_ASA *asa = (PC_ASA *) pc->data;
146: if (rtol != PETSC_DEFAULT) asa->rtol = rtol;
147: if (abstol != PETSC_DEFAULT) asa->abstol = abstol;
148: if (dtol != PETSC_DEFAULT) asa->divtol = dtol;
149: if (maxits != PETSC_DEFAULT) asa->max_it = maxits;
150: return(0);
151: }
155: /*
156: PCCreateLevel_ASA - Creates one level for the ASA algorithm
158: Input Parameters:
159: + level - current level
160: . comm - MPI communicator object
161: . next - pointer to next level
162: . prev - pointer to previous level
163: . ksptype - the KSP type for the smoothers on this level
164: - pctype - the PC type for the smoothers on this level
166: Output Parameters:
167: . new_asa_lev - the newly created level
169: .keywords: ASA, create, levels, multigrid
170: */
171: PetscErrorCode PCCreateLevel_ASA(PC_ASA_level **new_asa_lev, int level,MPI_Comm comm, PC_ASA_level *prev,
172: PC_ASA_level *next,KSPType ksptype, PCType pctype)
173: {
175: PC_ASA_level *asa_lev;
176:
178: PetscMalloc(sizeof(PC_ASA_level), &asa_lev);
180: asa_lev->level = level;
181: asa_lev->size = 0;
183: asa_lev->A = 0;
184: asa_lev->B = 0;
185: asa_lev->x = 0;
186: asa_lev->b = 0;
187: asa_lev->r = 0;
188:
189: asa_lev->dm = 0;
190: asa_lev->aggnum = 0;
191: asa_lev->agg = 0;
192: asa_lev->loc_agg_dofs = 0;
193: asa_lev->agg_corr = 0;
194: asa_lev->bridge_corr = 0;
195:
196: asa_lev->P = 0;
197: asa_lev->Pt = 0;
198: asa_lev->smP = 0;
199: asa_lev->smPt = 0;
201: asa_lev->comm = comm;
203: asa_lev->smoothd = 0;
204: asa_lev->smoothu = 0;
206: asa_lev->prev = prev;
207: asa_lev->next = next;
208:
209: *new_asa_lev = asa_lev;
210: return(0);
211: }
215: PetscErrorCode SafeMatDestroy(Mat *m)
216: {
217: PetscErrorCode 0;
220: if (m && *m) {MatDestroy(*m); *m=0;}
221: PetscFunctionReturn(ierr);
222: }
226: PetscErrorCode SafeVecDestroy(Vec *v)
227: {
228: PetscErrorCode 0;
231: if (v && *v) {VecDestroy(*v); *v=0;}
232: PetscFunctionReturn(ierr);
233: }
237: PetscErrorCode PrintResNorm(Mat A, Vec x, Vec b, Vec r)
238: {
240: PetscTruth destroyr = PETSC_FALSE;
241: PetscReal resnorm;
242: MPI_Comm Acomm;
245: if (!r) {
246: MatGetVecs(A, PETSC_NULL, &r);
247: destroyr = PETSC_TRUE;
248: }
249: MatMult(A, x, r);
250: VecAYPX(r, -1.0, b);
251: VecNorm(r, NORM_2, &resnorm);
252: PetscObjectGetComm((PetscObject) A, &Acomm);
253: PetscPrintf(Acomm, "Residual norm is %f.\n", resnorm);
255: if (destroyr) {
256: VecDestroy(r);
257: }
258:
259: return(0);
260: }
264: PetscErrorCode PrintEnergyNormOfDiff(Mat A, Vec x, Vec y)
265: {
267: Vec vecdiff, Avecdiff;
268: PetscScalar dotprod;
269: PetscReal dotabs;
270: MPI_Comm Acomm;
272: VecDuplicate(x, &vecdiff);
273: VecWAXPY(vecdiff, -1.0, x, y);
274: MatGetVecs(A, PETSC_NULL, &Avecdiff);
275: MatMult(A, vecdiff, Avecdiff);
276: VecDot(vecdiff, Avecdiff, &dotprod);
277: dotabs = PetscAbsScalar(dotprod);
278: PetscObjectGetComm((PetscObject) A, &Acomm);
279: PetscPrintf(Acomm, "Energy norm %f.\n", dotabs);
280: VecDestroy(vecdiff);
281: VecDestroy(Avecdiff);
282: return(0);
283: }
285: /* -------------------------------------------------------------------------- */
286: /*
287: PCDestroyLevel_ASA - Destroys one level of the ASA preconditioner
289: Input Parameter:
290: . asa_lev - pointer to level that should be destroyed
292: */
295: PetscErrorCode PCDestroyLevel_ASA(PC_ASA_level *asa_lev)
296: {
300: SafeMatDestroy(&(asa_lev->A));
301: SafeMatDestroy(&(asa_lev->B));
302: SafeVecDestroy(&(asa_lev->x));
303: SafeVecDestroy(&(asa_lev->b));
304: SafeVecDestroy(&(asa_lev->r));
306: if (asa_lev->dm) {DMDestroy(asa_lev->dm);}
308: SafeMatDestroy(&(asa_lev->agg));
309: PetscFree(asa_lev->loc_agg_dofs);
310: SafeMatDestroy(&(asa_lev->agg_corr));
311: SafeMatDestroy(&(asa_lev->bridge_corr));
313: SafeMatDestroy(&(asa_lev->P));
314: SafeMatDestroy(&(asa_lev->Pt));
315: SafeMatDestroy(&(asa_lev->smP));
316: SafeMatDestroy(&(asa_lev->smPt));
318: if (asa_lev->smoothd != asa_lev->smoothu) {
319: if (asa_lev->smoothd) {KSPDestroy(asa_lev->smoothd);}
320: }
321: if (asa_lev->smoothu) {KSPDestroy(asa_lev->smoothu);}
323: PetscFree(asa_lev);
324: return(0);
325: }
327: /* -------------------------------------------------------------------------- */
328: /*
329: PCComputeSpectralRadius_ASA - Computes the spectral radius of asa_lev->A
330: and stores it it asa_lev->spec_rad
332: Input Parameters:
333: . asa_lev - the level we are treating
335: Compute spectral radius with sqrt(||A||_1 ||A||_inf) >= ||A||_2 >= rho(A)
337: */
340: PetscErrorCode PCComputeSpectralRadius_ASA(PC_ASA_level *asa_lev)
341: {
343: PetscReal norm_1, norm_inf;
346: MatNorm(asa_lev->A, NORM_1, &norm_1);
347: MatNorm(asa_lev->A, NORM_INFINITY, &norm_inf);
348: asa_lev->spec_rad = sqrt(norm_1*norm_inf);
349: return(0);
350: }
354: PetscErrorCode PCSetRichardsonScale_ASA(KSP ksp, PetscReal spec_rad, PetscReal richardson_scale) {
356: PC pc;
357: PetscTruth flg;
358: PetscReal spec_rad_inv;
361: KSPSetInitialGuessNonzero(ksp, PETSC_TRUE);
362: if (richardson_scale != PETSC_DECIDE) {
363: KSPRichardsonSetScale(ksp, richardson_scale);
364: } else {
365: KSPGetPC(ksp, &pc);
366: PetscTypeCompare((PetscObject)(pc), PCNONE, &flg);
367: if (flg) {
368: /* WORK: this is just an educated guess. Any number between 0 and 2/rho(A)
369: should do. asa_lev->spec_rad has to be an upper bound on rho(A). */
370: spec_rad_inv = 1.0/spec_rad;
371: KSPRichardsonSetScale(ksp, spec_rad_inv);
372: } else {
373: SETERRQ(PETSC_ERR_SUP, "Unknown PC type for smoother. Please specify scaling factor with -pc_asa_richardson_scale\n");
374: }
375: }
376: return(0);
377: }
381: PetscErrorCode PCSetSORomega_ASA(PC pc, PetscReal sor_omega)
382: {
386: PCSORSetSymmetric(pc, SOR_SYMMETRIC_SWEEP);
387: if (sor_omega != PETSC_DECIDE) {
388: PCSORSetOmega(pc, sor_omega);
389: }
390: return(0);
391: }
394: /* -------------------------------------------------------------------------- */
395: /*
396: PCSetupSmoothersOnLevel_ASA - Creates the smoothers of the level.
397: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
399: Input Parameters:
400: + asa - the data structure for the ASA preconditioner
401: . asa_lev - the level we are treating
402: - maxits - maximum number of iterations to use
403: */
406: PetscErrorCode PCSetupSmoothersOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
407: {
408: PetscErrorCode ierr;
409: PetscTruth flg;
410: PC pc;
413: /* destroy old smoothers */
414: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
415: KSPDestroy(asa_lev->smoothu);
416: }
417: asa_lev->smoothu = 0;
418: if (asa_lev->smoothd) {
419: KSPDestroy(asa_lev->smoothd);
420: }
421: asa_lev->smoothd = 0;
422: /* create smoothers */
423: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
424: KSPSetType(asa_lev->smoothd, asa->ksptype_smooth);
425: KSPGetPC(asa_lev->smoothd,&pc);
426: PCSetType(pc,asa->pctype_smooth);
428: /* set up problems for smoothers */
429: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
430: KSPSetTolerances(asa_lev->smoothd, asa->smoother_rtol, asa->smoother_abstol, asa->smoother_dtol, maxits);
431: PetscTypeCompare((PetscObject)(asa_lev->smoothd), KSPRICHARDSON, &flg);
432: if (flg) {
433: /* special parameters for certain smoothers */
434: KSPSetInitialGuessNonzero(asa_lev->smoothd, PETSC_TRUE);
435: KSPGetPC(asa_lev->smoothd, &pc);
436: PetscTypeCompare((PetscObject)pc, PCSOR, &flg);
437: if (flg) {
438: PCSetSORomega_ASA(pc, asa->sor_omega);
439: } else {
440: /* just set asa->richardson_scale to get some very basic smoother */
441: PCSetRichardsonScale_ASA(asa_lev->smoothd, asa_lev->spec_rad, asa->richardson_scale);
442: }
443: /* this would be the place to add support for other preconditioners */
444: }
445: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_smoother_");
446: KSPSetFromOptions(asa_lev->smoothd);
447: /* set smoothu equal to smoothd, this could change later */
448: asa_lev->smoothu = asa_lev->smoothd;
449: return(0);
450: }
452: /* -------------------------------------------------------------------------- */
453: /*
454: PCSetupDirectSolversOnLevel_ASA - Creates the direct solvers on the coarsest level.
455: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
457: Input Parameters:
458: + asa - the data structure for the ASA preconditioner
459: . asa_lev - the level we are treating
460: - maxits - maximum number of iterations to use
461: */
464: PetscErrorCode PCSetupDirectSolversOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
465: {
466: PetscErrorCode ierr;
467: PetscTruth flg;
468: PetscMPIInt comm_size;
469: PC pc;
472: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
473: KSPDestroy(asa_lev->smoothu);
474: }
475: asa_lev->smoothu = 0;
476: if (asa_lev->smoothd) {
477: KSPDestroy(asa_lev->smoothd);
478: asa_lev->smoothd = 0;
479: }
480: PetscStrcmp(asa->ksptype_direct, KSPPREONLY, &flg);
481: if (flg) {
482: PetscStrcmp(asa->pctype_direct, PCLU, &flg);
483: if (flg) {
484: MPI_Comm_size(asa_lev->comm, &comm_size);
485: if (comm_size > 1) {
486: /* the LU PC will call MatSolve, we may have to set the correct type for the matrix
487: to have support for this in parallel */
488: MatConvert(asa_lev->A, asa->coarse_mat_type, MAT_REUSE_MATRIX, &(asa_lev->A));
489: }
490: }
491: }
492: /* create new solvers */
493: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
494: KSPSetType(asa_lev->smoothd, asa->ksptype_direct);
495: KSPGetPC(asa_lev->smoothd,&pc);
496: PCSetType(pc,asa->pctype_direct);
497: /* set up problems for direct solvers */
498: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
499: KSPSetTolerances(asa_lev->smoothd, asa->direct_rtol, asa->direct_abstol, asa->direct_dtol, maxits);
500: /* user can set any option by using -pc_asa_direct_xxx */
501: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_coarse_");
502: KSPSetFromOptions(asa_lev->smoothd);
503: /* set smoothu equal to 0, not used */
504: asa_lev->smoothu = 0;
505: return(0);
506: }
508: /* -------------------------------------------------------------------------- */
509: /*
510: PCCreateAggregates_ASA - Creates the aggregates
512: Input Parameters:
513: . asa_lev - the level for which we should create the projection matrix
515: */
518: PetscErrorCode PCCreateAggregates_ASA(PC_ASA_level *asa_lev)
519: {
520: PetscInt m,n, m_loc,n_loc;
521: PetscInt m_loc_s, m_loc_e;
522: const PetscScalar one = 1.0;
523: PetscErrorCode ierr;
526: /* Create nodal aggregates A_i^l */
527: /* we use the DM grid information for that */
528: if (asa_lev->dm) {
529: /* coarsen DM and get the restriction matrix */
530: DMCoarsen(asa_lev->dm, PETSC_NULL, &(asa_lev->next->dm));
531: DMGetAggregates(asa_lev->next->dm, asa_lev->dm, &(asa_lev->agg));
532: MatGetSize(asa_lev->agg, &m, &n);
533: MatGetLocalSize(asa_lev->agg, &m_loc, &n_loc);
534: if (n!=asa_lev->size) SETERRQ(PETSC_ERR_ARG_SIZ,"DM interpolation matrix has incorrect size!\n");
535: asa_lev->next->size = m;
536: asa_lev->aggnum = m;
537: /* create the correlators, right now just identity matrices */
538: MatCreateMPIAIJ(asa_lev->comm, n_loc, n_loc, n, n, 1, PETSC_NULL, 1, PETSC_NULL,&(asa_lev->agg_corr));
539: MatGetOwnershipRange(asa_lev->agg_corr, &m_loc_s, &m_loc_e);
540: for (m=m_loc_s; m<m_loc_e; m++) {
541: MatSetValues(asa_lev->agg_corr, 1, &m, 1, &m, &one, INSERT_VALUES);
542: }
543: MatAssemblyBegin(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
544: MatAssemblyEnd(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
545: /* MatShift(asa_lev->agg_corr, 1.0); */
546: } else {
547: /* somehow define the aggregates without knowing the geometry */
548: /* future WORK */
549: SETERRQ(PETSC_ERR_SUP, "Currently pure algebraic coarsening is not supported!");
550: }
551: return(0);
552: }
554: /* -------------------------------------------------------------------------- */
555: /*
556: PCCreateTransferOp_ASA - Creates the transfer operator P_{l+1}^l for current level
558: Input Parameters:
559: + asa_lev - the level for which should create the transfer operator
560: - construct_bridge - true, if we should construct a bridge operator, false for normal prolongator
562: If we add a second, third, ... candidate vector (i.e. more than one column in B), we
563: have to relate the additional dimensions to the original aggregates. This is done through
564: the "aggregate correlators" agg_corr and bridge_corr.
565: The aggregate that is used in the construction is then given by
566: asa_lev->agg * asa_lev->agg_corr
567: for the regular prolongator construction and
568: asa_lev->agg * asa_lev->bridge_corr
569: for the bridging prolongator constructions.
570: */
573: PetscErrorCode PCCreateTransferOp_ASA(PC_ASA_level *asa_lev, PetscTruth construct_bridge)
574: {
577: const PetscReal Ca = 1e-3;
578: PetscReal cutoff;
579: PetscInt nodes_on_lev;
581: Mat logical_agg;
582: PetscInt mat_agg_loc_start, mat_agg_loc_end, mat_agg_loc_size;
583: PetscInt a;
584: const PetscInt *agg = 0;
585: PetscInt **agg_arr = 0;
587: IS *idxm_is_B_arr = 0;
588: PetscInt *idxn_B = 0;
589: IS idxn_is_B, *idxn_is_B_arr = 0;
591: Mat *b_submat_arr = 0;
593: PetscScalar *b_submat = 0, *b_submat_tp = 0;
594: PetscInt *idxm = 0, *idxn = 0;
595: PetscInt cand_vecs_num;
596: PetscInt *cand_vec_length = 0;
597: PetscInt max_cand_vec_length = 0;
598: PetscScalar **b_orth_arr = 0;
600: PetscInt i,j;
602: PetscScalar *tau = 0, *work = 0;
603: PetscBLASInt info,b1,b2;
605: PetscInt max_cand_vecs_to_add;
606: PetscInt *new_loc_agg_dofs = 0;
608: PetscInt total_loc_cols = 0;
609: PetscReal norm;
611: PetscInt a_loc_m, a_loc_n;
612: PetscInt mat_loc_col_start, mat_loc_col_end, mat_loc_col_size;
613: PetscInt loc_agg_dofs_sum;
614: PetscInt row, col;
615: PetscScalar val;
616: PetscMPIInt comm_size, comm_rank;
617: PetscInt *loc_cols = 0;
622: MatGetSize(asa_lev->B, &nodes_on_lev, PETSC_NULL);
624: /* If we add another candidate vector, we want to be able to judge, how much the new candidate
625: improves our current projection operators and whether it is worth adding it.
626: This is the precomputation necessary for implementing Notes (4.1) to (4.7).
627: We require that all candidate vectors x stored in B are normalized such that
628: <A x, x> = 1 and we thus do not have to compute this.
629: For each aggregate A we can now test condition (4.5) and (4.6) by computing
630: || quantity to check ||_{A}^2 <= cutoff * card(A)/N_l */
631: cutoff = Ca/(asa_lev->spec_rad);
633: /* compute logical aggregates by using the correlators */
634: if (construct_bridge) {
635: /* construct bridging operator */
636: MatMatMult(asa_lev->agg, asa_lev->bridge_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
637: } else {
638: /* construct "regular" prolongator */
639: MatMatMult(asa_lev->agg, asa_lev->agg_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
640: }
642: /* destroy correlator matrices for next level, these will be rebuilt in this routine */
643: if (asa_lev->next) {
644: SafeMatDestroy(&(asa_lev->next->agg_corr));
645: SafeMatDestroy(&(asa_lev->next->bridge_corr));
646: }
648: /* find out the correct local row indices */
649: MatGetOwnershipRange(logical_agg, &mat_agg_loc_start, &mat_agg_loc_end);
650: mat_agg_loc_size = mat_agg_loc_end-mat_agg_loc_start;
651:
652: cand_vecs_num = asa_lev->cand_vecs;
654: /* construct column indices idxn_B for reading from B */
655: PetscMalloc(sizeof(PetscInt)*(cand_vecs_num), &idxn_B);
656: for (i=0; i<cand_vecs_num; i++) {
657: idxn_B[i] = i;
658: }
659: ISCreateGeneral(asa_lev->comm, asa_lev->cand_vecs, idxn_B, &idxn_is_B);
660: PetscFree(idxn_B);
661: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxn_is_B_arr);
662: for (a=0; a<mat_agg_loc_size; a++) {
663: idxn_is_B_arr[a] = idxn_is_B;
664: }
665: /* allocate storage for row indices idxm_B */
666: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxm_is_B_arr);
668: /* Storage for the orthogonalized submatrices of B and their sizes */
669: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &cand_vec_length);
670: PetscMalloc(sizeof(PetscScalar*)*mat_agg_loc_size, &b_orth_arr);
671: /* Storage for the information about each aggregate */
672: PetscMalloc(sizeof(PetscInt*)*mat_agg_loc_size, &agg_arr);
673: /* Storage for the number of candidate vectors that are orthonormal and used in each submatrix */
674: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &new_loc_agg_dofs);
676: /* loop over local aggregates */
677: for (a=0; a<mat_agg_loc_size; a++) {
678: /* get info about current aggregate, this gives the rows we have to get from B */
679: MatGetRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
680: /* copy aggregate information */
681: PetscMalloc(sizeof(PetscInt)*cand_vec_length[a], &(agg_arr[a]));
682: PetscMemcpy(agg_arr[a], agg, sizeof(PetscInt)*cand_vec_length[a]);
683: /* restore row */
684: MatRestoreRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
685:
686: /* create index sets */
687: ISCreateGeneral(PETSC_COMM_SELF, cand_vec_length[a], agg_arr[a], &(idxm_is_B_arr[a]));
688: /* maximum candidate vector length */
689: if (cand_vec_length[a] > max_cand_vec_length) { max_cand_vec_length = cand_vec_length[a]; }
690: }
691: /* destroy logical_agg, no longer needed */
692: SafeMatDestroy(&logical_agg);
694: /* get the entries for aggregate from B */
695: MatGetSubMatrices(asa_lev->B, mat_agg_loc_size, idxm_is_B_arr, idxn_is_B_arr, MAT_INITIAL_MATRIX, &b_submat_arr);
696:
697: /* clean up all the index sets */
698: for (a=0; a<mat_agg_loc_size; a++) { ISDestroy(idxm_is_B_arr[a]); }
699: PetscFree(idxm_is_B_arr);
700: ISDestroy(idxn_is_B);
701: PetscFree(idxn_is_B_arr);
702:
703: /* storage for the values from each submatrix */
704: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat);
705: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat_tp);
706: PetscMalloc(sizeof(PetscInt)*max_cand_vec_length, &idxm);
707: for (i=0; i<max_cand_vec_length; i++) { idxm[i] = i; }
708: PetscMalloc(sizeof(PetscInt)*cand_vecs_num, &idxn);
709: for (i=0; i<cand_vecs_num; i++) { idxn[i] = i; }
710: /* work storage for QR algorithm */
711: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length, &tau);
712: PetscMalloc(sizeof(PetscScalar)*cand_vecs_num, &work);
714: /* orthogonalize all submatrices and store them in b_orth_arr */
715: for (a=0; a<mat_agg_loc_size; a++) {
716: /* Get the entries for aggregate from B. This is row ordered (although internally
717: it is column ordered and we will waste some energy transposing it).
718: WORK: use something like MatGetArray(b_submat_arr[a], &b_submat) but be really
719: careful about all the different matrix types */
720: MatGetValues(b_submat_arr[a], cand_vec_length[a], idxm, cand_vecs_num, idxn, b_submat);
722: if (construct_bridge) {
723: /* if we are constructing a bridging restriction/interpolation operator, we have
724: to use the same number of dofs as in our previous construction */
725: max_cand_vecs_to_add = asa_lev->loc_agg_dofs[a];
726: } else {
727: /* for a normal restriction/interpolation operator, we should make sure that we
728: do not create linear dependence by accident */
729: max_cand_vecs_to_add = PetscMin(cand_vec_length[a], cand_vecs_num);
730: }
732: /* We use LAPACK to compute the QR decomposition of b_submat. For LAPACK we have to
733: transpose the matrix. We might throw out some column vectors during this process.
734: We are keeping count of the number of column vectors that we use (and therefore the
735: number of dofs on the lower level) in new_loc_agg_dofs[a]. */
736: new_loc_agg_dofs[a] = 0;
737: for (j=0; j<max_cand_vecs_to_add; j++) {
738: /* check for condition (4.5) */
739: norm = 0.0;
740: for (i=0; i<cand_vec_length[a]; i++) {
741: norm += PetscRealPart(b_submat[i*cand_vecs_num+j])*PetscRealPart(b_submat[i*cand_vecs_num+j])
742: + PetscImaginaryPart(b_submat[i*cand_vecs_num+j])*PetscImaginaryPart(b_submat[i*cand_vecs_num+j]);
743: }
744: /* only add candidate vector if bigger than cutoff or first candidate */
745: if ((!j) || (norm > cutoff*((PetscReal) cand_vec_length[a])/((PetscReal) nodes_on_lev))) {
746: /* passed criterion (4.5), we have not implemented criterion (4.6) yet */
747: for (i=0; i<cand_vec_length[a]; i++) {
748: b_submat_tp[new_loc_agg_dofs[a]*cand_vec_length[a]+i] = b_submat[i*cand_vecs_num+j];
749: }
750: new_loc_agg_dofs[a]++;
751: }
752: /* #ifdef PCASA_VERBOSE */
753: else {
754: PetscPrintf(asa_lev->comm, "Cutoff criteria invoked\n");
755: }
756: /* #endif */
757: }
759: CHKMEMQ;
760: /* orthogonalize b_submat_tp using the QR algorithm from LAPACK */
761: b1 = (PetscBLASInt) *(cand_vec_length+a);
762: b2 = (PetscBLASInt) *(new_loc_agg_dofs+a);
763: LAPACKgeqrf_(&b1, &b2, b_submat_tp, &b1, tau, work, &b2, &info);
764: if (info) SETERRQ(PETSC_ERR_LIB, "LAPACKgeqrf_ LAPACK routine failed");
765: #if !defined(PETSC_MISSING_LAPACK_ORGQR)
766: LAPACKungqr_(&b1, &b2, &b2, b_submat_tp, &b1, tau, work, &b2, &info);
767: #else
768: SETERRQ(PETSC_ERR_SUP,"ORGQR - Lapack routine is unavailable\nIf linking with ESSL you MUST also link with full LAPACK, for example\nuse config/configure.py with --with-blas-lib=libessl.a --with-lapack-lib=/usr/local/lib/liblapack.a'");
769: #endif
770: if (info) SETERRQ(PETSC_ERR_LIB, "LAPACKungqr_ LAPACK routine failed");
772: /* Transpose b_submat_tp and store it in b_orth_arr[a]. If we are constructing a
773: bridging restriction/interpolation operator, we could end up with less dofs than
774: we previously had. We fill those up with zeros. */
775: if (!construct_bridge) {
776: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*new_loc_agg_dofs[a], b_orth_arr+a);
777: for (j=0; j<new_loc_agg_dofs[a]; j++) {
778: for (i=0; i<cand_vec_length[a]; i++) {
779: b_orth_arr[a][i*new_loc_agg_dofs[a]+j] = b_submat_tp[j*cand_vec_length[a]+i];
780: }
781: }
782: } else {
783: /* bridge, might have to fill up */
784: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*max_cand_vecs_to_add, b_orth_arr+a);
785: for (j=0; j<new_loc_agg_dofs[a]; j++) {
786: for (i=0; i<cand_vec_length[a]; i++) {
787: b_orth_arr[a][i*max_cand_vecs_to_add+j] = b_submat_tp[j*cand_vec_length[a]+i];
788: }
789: }
790: for (j=new_loc_agg_dofs[a]; j<max_cand_vecs_to_add; j++) {
791: for (i=0; i<cand_vec_length[a]; i++) {
792: b_orth_arr[a][i*max_cand_vecs_to_add+j] = 0.0;
793: }
794: }
795: new_loc_agg_dofs[a] = max_cand_vecs_to_add;
796: }
797: /* the number of columns in asa_lev->P that are local to this process */
798: total_loc_cols += new_loc_agg_dofs[a];
799: } /* end of loop over local aggregates */
801: /* destroy the submatrices, also frees all allocated space */
802: MatDestroyMatrices(mat_agg_loc_size, &b_submat_arr);
803: /* destroy all other workspace */
804: PetscFree(b_submat);
805: PetscFree(b_submat_tp);
806: PetscFree(idxm);
807: PetscFree(idxn);
808: PetscFree(tau);
809: PetscFree(work);
811: /* destroy old matrix P, Pt */
812: SafeMatDestroy(&(asa_lev->P));
813: SafeMatDestroy(&(asa_lev->Pt));
815: MatGetLocalSize(asa_lev->A, &a_loc_m, &a_loc_n);
817: /* determine local range */
818: MPI_Comm_size(asa_lev->comm, &comm_size);
819: MPI_Comm_rank(asa_lev->comm, &comm_rank);
820: PetscMalloc(comm_size*sizeof(PetscInt), &loc_cols);
821: MPI_Allgather(&total_loc_cols, 1, MPI_INT, loc_cols, 1, MPI_INT, asa_lev->comm);
822: mat_loc_col_start = 0;
823: for (i=0;i<comm_rank;i++) {
824: mat_loc_col_start += loc_cols[i];
825: }
826: mat_loc_col_end = mat_loc_col_start + loc_cols[i];
827: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
828: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_ERR_COR, "Local size does not match matrix size");
829: PetscFree(loc_cols);
831: /* we now have enough information to create asa_lev->P */
832: MatCreateMPIAIJ(asa_lev->comm, a_loc_n, total_loc_cols, asa_lev->size, PETSC_DETERMINE,
833: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->P));
834: /* create asa_lev->Pt */
835: MatCreateMPIAIJ(asa_lev->comm, total_loc_cols, a_loc_n, PETSC_DETERMINE, asa_lev->size,
836: max_cand_vec_length, PETSC_NULL, max_cand_vec_length, PETSC_NULL, &(asa_lev->Pt));
837: if (asa_lev->next) {
838: /* create correlator for aggregates of next level */
839: MatCreateMPIAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
840: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->next->agg_corr));
841: /* create asa_lev->next->bridge_corr matrix */
842: MatCreateMPIAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
843: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->next->bridge_corr));
844: }
846: /* this is my own hack, but it should give the columns that we should write to */
847: MatGetOwnershipRangeColumn(asa_lev->P, &mat_loc_col_start, &mat_loc_col_end);
848: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
849: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_ERR_ARG_SIZ, "The number of local columns in asa_lev->P assigned to this processor does not match the local vector size");
851: loc_agg_dofs_sum = 0;
852: /* construct P, Pt, agg_corr, bridge_corr */
853: for (a=0; a<mat_agg_loc_size; a++) {
854: /* store b_orth_arr[a] in P */
855: for (i=0; i<cand_vec_length[a]; i++) {
856: row = agg_arr[a][i];
857: for (j=0; j<new_loc_agg_dofs[a]; j++) {
858: col = mat_loc_col_start + loc_agg_dofs_sum + j;
859: val = b_orth_arr[a][i*new_loc_agg_dofs[a] + j];
860: MatSetValues(asa_lev->P, 1, &row, 1, &col, &val, INSERT_VALUES);
861: val = PetscConj(val);
862: MatSetValues(asa_lev->Pt, 1, &col, 1, &row, &val, INSERT_VALUES);
863: }
864: }
866: /* compute aggregate correlation matrices */
867: if (asa_lev->next) {
868: row = a+mat_agg_loc_start;
869: for (i=0; i<new_loc_agg_dofs[a]; i++) {
870: col = mat_loc_col_start + loc_agg_dofs_sum + i;
871: val = 1.0;
872: MatSetValues(asa_lev->next->agg_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
873: /* for the bridge operator we leave out the newest candidates, i.e.
874: we set bridge_corr to 1.0 for all columns up to asa_lev->loc_agg_dofs[a] and to
875: 0.0 between asa_lev->loc_agg_dofs[a] and new_loc_agg_dofs[a] */
876: if (!(asa_lev->loc_agg_dofs && (i >= asa_lev->loc_agg_dofs[a]))) {
877: MatSetValues(asa_lev->next->bridge_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
878: }
879: }
880: }
882: /* move to next entry point col */
883: loc_agg_dofs_sum += new_loc_agg_dofs[a];
884: } /* end of loop over local aggregates */
886: MatAssemblyBegin(asa_lev->P,MAT_FINAL_ASSEMBLY);
887: MatAssemblyEnd(asa_lev->P,MAT_FINAL_ASSEMBLY);
888: MatAssemblyBegin(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
889: MatAssemblyEnd(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
890: if (asa_lev->next) {
891: MatAssemblyBegin(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
892: MatAssemblyEnd(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
893: MatAssemblyBegin(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
894: MatAssemblyEnd(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
895: }
897: /* if we are not constructing a bridging operator, switch asa_lev->loc_agg_dofs
898: and new_loc_agg_dofs */
899: if (construct_bridge) {
900: PetscFree(new_loc_agg_dofs);
901: } else {
902: if (asa_lev->loc_agg_dofs) {
903: PetscFree(asa_lev->loc_agg_dofs);
904: }
905: asa_lev->loc_agg_dofs = new_loc_agg_dofs;
906: }
908: /* clean up */
909: for (a=0; a<mat_agg_loc_size; a++) {
910: PetscFree(b_orth_arr[a]);
911: PetscFree(agg_arr[a]);
912: }
913: PetscFree(cand_vec_length);
914: PetscFree(b_orth_arr);
915: PetscFree(agg_arr);
918: return(0);
919: }
921: /* -------------------------------------------------------------------------- */
922: /*
923: PCSmoothProlongator_ASA - Computes the smoothed prolongators I and It on the level
925: Input Parameters:
926: . asa_lev - the level for which the smoothed prolongator is constructed
927: */
930: PetscErrorCode PCSmoothProlongator_ASA(PC_ASA_level *asa_lev)
931: {
935: SafeMatDestroy(&(asa_lev->smP));
936: SafeMatDestroy(&(asa_lev->smPt));
937: /* compute prolongator I_{l+1}^l = S_l P_{l+1}^l */
938: /* step 1: compute I_{l+1}^l = A_l P_{l+1}^l */
939: MatMatMult(asa_lev->A, asa_lev->P, MAT_INITIAL_MATRIX, 1, &(asa_lev->smP));
940: MatMatMult(asa_lev->Pt, asa_lev->A, MAT_INITIAL_MATRIX, 1, &(asa_lev->smPt));
941: /* step 2: shift and scale to get I_{l+1}^l = P_{l+1}^l - 4/(3/rho) A_l P_{l+1}^l */
942: MatAYPX(asa_lev->smP, -4./(3.*(asa_lev->spec_rad)), asa_lev->P, SUBSET_NONZERO_PATTERN);
943: MatAYPX(asa_lev->smPt, -4./(3.*(asa_lev->spec_rad)), asa_lev->Pt, SUBSET_NONZERO_PATTERN);
945: return(0);
946: }
949: /* -------------------------------------------------------------------------- */
950: /*
951: PCCreateVcycle_ASA - Creates the V-cycle, when aggregates are already defined
953: Input Parameters:
954: . asa - the preconditioner context
955: */
958: PetscErrorCode PCCreateVcycle_ASA(PC_ASA *asa)
959: {
961: PC_ASA_level *asa_lev, *asa_next_lev;
962: Mat AI;
967: if (!asa) SETERRQ(PETSC_ERR_ARG_NULL, "asa pointer is NULL");
968: if (!(asa->levellist)) SETERRQ(PETSC_ERR_ARG_NULL, "no levels found");
969: asa_lev = asa->levellist;
970: PCComputeSpectralRadius_ASA(asa_lev);
971: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->nu);
973: while(asa_lev->next) {
974: asa_next_lev = asa_lev->next;
975: /* (a) aggregates are already constructed */
977: /* (b) construct B_{l+1} and P_{l+1}^l using (2.11) */
978: /* construct P_{l+1}^l */
979: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
981: /* construct B_{l+1} */
982: SafeMatDestroy(&(asa_next_lev->B));
983: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
984: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
986: /* (c) construct smoothed prolongator */
987: PCSmoothProlongator_ASA(asa_lev);
988:
989: /* (d) construct coarse matrix */
990: /* Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
991: SafeMatDestroy(&(asa_next_lev->A));
992: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
993: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
994: SafeMatDestroy(&AI);
995: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
996: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
997: PCComputeSpectralRadius_ASA(asa_next_lev);
998: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->nu);
999: /* create corresponding vectors x_{l+1}, b_{l+1}, r_{l+1} */
1000: SafeVecDestroy(&(asa_next_lev->x));
1001: SafeVecDestroy(&(asa_next_lev->b));
1002: SafeVecDestroy(&(asa_next_lev->r));
1003: MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), &(asa_next_lev->b));
1004: MatGetVecs(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->r));
1006: /* go to next level */
1007: asa_lev = asa_lev->next;
1008: } /* end of while loop over the levels */
1009: /* asa_lev now points to the coarsest level, set up direct solver there */
1010: PCComputeSpectralRadius_ASA(asa_lev);
1011: PCSetupDirectSolversOnLevel_ASA(asa, asa_lev, asa->nu);
1014: return(0);
1015: }
1017: /* -------------------------------------------------------------------------- */
1018: /*
1019: PCAddCandidateToB_ASA - Inserts a candidate vector in B
1021: Input Parameters:
1022: + B - the matrix to insert into
1023: . col_idx - the column we should insert to
1024: . x - the vector to insert
1025: - A - system matrix
1027: Function will insert normalized x into B, such that <A x, x> = 1
1028: (x itself is not changed). If B is projected down then this property
1029: is kept. If <A_l x_l, x_l> = 1 and the next level is defined by
1030: x_{l+1} = Pt x_l and A_{l+1} = Pt A_l P then
1031: <A_{l+1} x_{l+1}, x_l> = <Pt A_l P Pt x_l, Pt x_l>
1032: = <A_l P Pt x_l, P Pt x_l> = <A_l x_l, x_l> = 1
1033: because of the definition of P in (2.11).
1034: */
1037: PetscErrorCode PCAddCandidateToB_ASA(Mat B, PetscInt col_idx, Vec x, Mat A)
1038: {
1040: Vec Ax;
1041: PetscScalar dotprod;
1042: PetscReal norm;
1043: PetscInt i, loc_start, loc_end;
1044: PetscScalar val, *vecarray;
1047: MatGetVecs(A, PETSC_NULL, &Ax);
1048: MatMult(A, x, Ax);
1049: VecDot(Ax, x, &dotprod);
1050: norm = PetscAbsScalar(PetscSqrtScalar(PetscAbsScalar(dotprod))); /* there has to be a better way */
1051: VecGetOwnershipRange(x, &loc_start, &loc_end);
1052: VecGetArray(x, &vecarray);
1053: for (i=loc_start; i<loc_end; i++) {
1054: val = vecarray[i-loc_start]/norm;
1055: MatSetValues(B, 1, &i, 1, &col_idx, &val, INSERT_VALUES);
1056: }
1057: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1058: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1059: VecRestoreArray(x, &vecarray);
1060: VecDestroy(Ax);
1061: return(0);
1062: }
1064: /* -------------------------------------------------------------------------- */
1065: /*
1066: - x - a starting guess for a hard to approximate vector, if PETSC_NULL, will be generated
1067: */
1070: PetscErrorCode PCInitializationStage_ASA(PC_ASA *asa, Vec x)
1071: {
1073: PetscInt l;
1074: PC_ASA_level *asa_lev, *asa_next_lev;
1075: PetscRandom rctx; /* random number generator context */
1077: Vec ax;
1078: PetscScalar tmp;
1079: PetscReal prevnorm, norm;
1081: PetscTruth skip_steps_f_i = PETSC_FALSE;
1082: PetscTruth sufficiently_coarsened = PETSC_FALSE;
1084: PetscInt vec_size, vec_loc_size;
1085: PetscInt loc_vec_low, loc_vec_high;
1086: PetscInt i,j;
1088: /* Vec xhat = 0; */
1090: Mat AI;
1092: Vec cand_vec, cand_vec_new;
1093: PetscTruth isrichardson;
1094: PC coarse_pc;
1098: l=1;
1099: /* create first level */
1100: PCCreateLevel_ASA(&(asa->levellist), l, asa->comm, 0, 0, asa->ksptype_smooth, asa->pctype_smooth);
1101: asa_lev = asa->levellist;
1103: /* Set matrix */
1104: asa_lev->A = asa->A;
1105: MatGetSize(asa_lev->A, &i, &j);
1106: asa_lev->size = i;
1107: PCComputeSpectralRadius_ASA(asa_lev);
1108: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu_initial);
1110: /* Set DM */
1111: asa_lev->dm = asa->dm;
1112: PetscObjectReference((PetscObject)asa->dm);
1114: PetscPrintf(asa_lev->comm, "Initialization stage\n");
1116: if (x) {
1117: /* use starting guess */
1118: SafeVecDestroy(&(asa_lev->x));
1119: VecDuplicate(x, &(asa_lev->x));
1120: VecCopy(x, asa_lev->x);
1121: } else {
1122: /* select random starting vector */
1123: SafeVecDestroy(&(asa_lev->x));
1124: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1125: PetscRandomCreate(asa_lev->comm,&rctx);
1126: PetscRandomSetFromOptions(rctx);
1127: VecSetRandom(asa_lev->x, rctx);
1128: PetscRandomDestroy(rctx);
1129: }
1131: /* create right hand side */
1132: SafeVecDestroy(&(asa_lev->b));
1133: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1134: VecSet(asa_lev->b, 0.0);
1136: /* relax and check whether that's enough already */
1137: /* compute old norm */
1138: MatGetVecs(asa_lev->A, 0, &ax);
1139: MatMult(asa_lev->A, asa_lev->x, ax);
1140: VecDot(asa_lev->x, ax, &tmp);
1141: prevnorm = PetscAbsScalar(tmp);
1142: PetscPrintf(asa_lev->comm, "Residual norm of starting guess: %f\n", prevnorm);
1144: /* apply mu_initial relaxations */
1145: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1146: /* compute new norm */
1147: MatMult(asa_lev->A, asa_lev->x, ax);
1148: VecDot(asa_lev->x, ax, &tmp);
1149: norm = PetscAbsScalar(tmp);
1150: SafeVecDestroy(&(ax));
1151: PetscPrintf(asa_lev->comm, "Residual norm of relaxation after %g %d relaxations: %g %g\n", asa->epsilon,asa->mu_initial, norm,prevnorm);
1153: /* Check if it already converges by itself */
1154: if (norm/prevnorm <= PetscAbsScalar(PetscPowScalar(asa->epsilon, asa->mu_initial))) {
1155: /* converges by relaxation alone */
1156: SETERRQ(PETSC_ERR_SUP, "Relaxation should be sufficient to treat this problem. "
1157: "Use relaxation or decrease epsilon with -pc_asa_epsilon");
1158: } else {
1159: /* set the number of relaxations to asa->mu from asa->mu_initial */
1160: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu);
1162: /* Let's do some multigrid ! */
1163: sufficiently_coarsened = PETSC_FALSE;
1165: /* do the whole initialization stage loop */
1166: while (!sufficiently_coarsened) {
1167: PetscPrintf(asa_lev->comm, "Initialization stage: creating level %d\n", asa_lev->level+1);
1169: /* (a) Set candidate matrix B_l = x_l */
1170: /* get the correct vector sizes and data */
1171: VecGetSize(asa_lev->x, &vec_size);
1172: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1173: vec_loc_size = loc_vec_high - loc_vec_low;
1175: /* create matrix for candidates */
1176: MatCreateMPIDense(asa_lev->comm, vec_loc_size, PETSC_DECIDE, vec_size, asa->max_cand_vecs, PETSC_NULL, &(asa_lev->B));
1177: /* set the first column */
1178: PCAddCandidateToB_ASA(asa_lev->B, 0, asa_lev->x, asa_lev->A);
1179: asa_lev->cand_vecs = 1;
1181: /* create next level */
1182: PCCreateLevel_ASA(&(asa_lev->next), asa_lev->level+1, asa_lev->comm, asa_lev, PETSC_NULL, asa->ksptype_smooth, asa->pctype_smooth);
1183: asa_next_lev = asa_lev->next;
1185: /* (b) Create nodal aggregates A_i^l */
1186: PCCreateAggregates_ASA(asa_lev);
1187:
1188: /* (c) Define tentatative prolongator P_{l+1}^l and candidate matrix B_{l+1}
1189: using P_{l+1}^l B_{l+1} = B_l and (P_{l+1}^l)^T P_{l+1}^l = I */
1190: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1192: /* future WORK: set correct fill ratios for all the operations below */
1193: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
1194: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
1196: /* (d) Define prolongator I_{l+1}^l = S_l P_{l+1}^l */
1197: PCSmoothProlongator_ASA(asa_lev);
1199: /* (e) Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1200: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1201: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1202: SafeMatDestroy(&AI);
1203: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1204: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
1205: PCComputeSpectralRadius_ASA(asa_next_lev);
1206: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1208: /* coarse enough for direct solver? */
1209: MatGetSize(asa_next_lev->A, &i, &j);
1210: if (PetscMax(i,j) <= asa->direct_solver) {
1211: PetscPrintf(asa_lev->comm, "Level %d can be treated directly.\n"
1212: "Algorithm will use %d levels.\n", asa_next_lev->level,
1213: asa_next_lev->level);
1214: break; /* go to step 5 */
1215: }
1217: if (skip_steps_f_i == PETSC_FALSE) {
1218: /* (f) Set x_{l+1} = B_{l+1}, we just compute it again */
1219: SafeVecDestroy(&(asa_next_lev->x));
1220: MatGetVecs(asa_lev->P, &(asa_next_lev->x), 0);
1221: MatMult(asa_lev->Pt, asa_lev->x, asa_next_lev->x);
1223: /* /\* (g) Make copy \hat{x}_{l+1} = x_{l+1} *\/ */
1224: /* VecDuplicate(asa_next_lev->x, &xhat); */
1225: /* VecCopy(asa_next_lev->x, xhat); */
1226:
1227: /* Create b_{l+1} */
1228: SafeVecDestroy(&(asa_next_lev->b));
1229: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), 0);
1230: VecSet(asa_next_lev->b, 0.0);
1232: /* (h) Relax mu times on A_{l+1} x = 0 */
1233: /* compute old norm */
1234: MatGetVecs(asa_next_lev->A, 0, &ax);
1235: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1236: VecDot(asa_next_lev->x, ax, &tmp);
1237: prevnorm = PetscAbsScalar(tmp);
1238: PetscPrintf(asa_next_lev->comm, "Residual norm of starting guess on level %d: %f\n", asa_next_lev->level, prevnorm);
1239: /* apply mu relaxations: WORK, make sure that mu is set correctly */
1240: KSPSolve(asa_next_lev->smoothd, asa_next_lev->b, asa_next_lev->x);
1241: /* compute new norm */
1242: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1243: VecDot(asa_next_lev->x, ax, &tmp);
1244: norm = PetscAbsScalar(tmp);
1245: SafeVecDestroy(&(ax));
1246: PetscPrintf(asa_next_lev->comm, "Residual norm after Richardson iteration on level %d: %f\n", asa_next_lev->level, norm);
1247: /* (i) Check if it already converges by itself */
1248: if (norm/prevnorm <= PetscAbsScalar(PetscPowScalar(asa->epsilon, asa->mu))) {
1249: /* relaxation reduces error sufficiently */
1250: skip_steps_f_i = PETSC_TRUE;
1251: }
1252: }
1253: /* (j) go to next coarser level */
1254: l++;
1255: asa_lev = asa_next_lev;
1256: }
1257: /* Step 5. */
1258: asa->levels = asa_next_lev->level; /* WORK: correct? */
1260: /* Set up direct solvers on coarsest level */
1261: if (asa_next_lev->smoothd != asa_next_lev->smoothu) {
1262: if (asa_next_lev->smoothu) { KSPDestroy(asa_next_lev->smoothu); }
1263: }
1264: KSPSetType(asa_next_lev->smoothd, asa->ksptype_direct);
1265: PetscTypeCompare((PetscObject)(asa_next_lev->smoothd), KSPRICHARDSON, &isrichardson);
1266: if (isrichardson) {
1267: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_TRUE);
1268: } else {
1269: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_FALSE);
1270: }
1271: KSPGetPC(asa_next_lev->smoothd, &coarse_pc);
1272: PCSetType(coarse_pc, asa->pctype_direct);
1273: asa_next_lev->smoothu = asa_next_lev->smoothd;
1274: PCSetupDirectSolversOnLevel_ASA(asa, asa_next_lev, asa->nu);
1276: /* update finest-level candidate matrix B_1 = I_2^1 I_3^2 ... I_{L-1}^{L-2} x_{L-1} */
1277: if (!asa_lev->prev) {
1278: /* just one relaxation level */
1279: VecDuplicate(asa_lev->x, &cand_vec);
1280: VecCopy(asa_lev->x, cand_vec);
1281: } else {
1282: /* interpolate up the chain */
1283: cand_vec = asa_lev->x;
1284: asa_lev->x = 0;
1285: while(asa_lev->prev) {
1286: /* interpolate to higher level */
1287: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1288: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1289: SafeVecDestroy(&(cand_vec));
1290: cand_vec = cand_vec_new;
1291:
1292: /* destroy all working vectors on the way */
1293: SafeVecDestroy(&(asa_lev->x));
1294: SafeVecDestroy(&(asa_lev->b));
1296: /* move to next higher level */
1297: asa_lev = asa_lev->prev;
1298: }
1299: }
1300: /* set the first column of B1 */
1301: PCAddCandidateToB_ASA(asa_lev->B, 0, cand_vec, asa_lev->A);
1302: SafeVecDestroy(&(cand_vec));
1304: /* Step 6. Create V-cycle */
1305: PCCreateVcycle_ASA(asa);
1306: }
1308: return(0);
1309: }
1311: /* -------------------------------------------------------------------------- */
1312: /*
1313: PCApplyVcycleOnLevel_ASA - Applies current V-cycle
1315: Input Parameters:
1316: + asa_lev - the current level we should recurse on
1317: - gamma - the number of recursive cycles we should run
1319: */
1322: PetscErrorCode PCApplyVcycleOnLevel_ASA(PC_ASA_level *asa_lev, PetscInt gamma)
1323: {
1325: PC_ASA_level *asa_next_lev;
1326: PetscInt g;
1329: if (!asa_lev) SETERRQ(PETSC_ERR_ARG_NULL, "Level is empty in PCApplyVcycleOnLevel_ASA");
1330: asa_next_lev = asa_lev->next;
1332: if (asa_next_lev) {
1333: /* 1. Presmoothing */
1334: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1335: /* 2. Coarse grid corrections */
1336: /* MatGetVecs(asa_lev->A, 0, &tmp); */
1337: /* MatGetVecs(asa_lev->smP, &(asa_next_lev->b), 0); */
1338: /* MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), 0); */
1339: for (g=0; g<gamma; g++) {
1340: /* (a) get coarsened b_{l+1} = (I_{l+1}^l)^T (b_l - A_l x_l) */
1341: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1342: VecAYPX(asa_lev->r, -1.0, asa_lev->b);
1343: MatMult(asa_lev->smPt, asa_lev->r, asa_next_lev->b);
1345: /* (b) Set x_{l+1} = 0 and recurse */
1346: VecSet(asa_next_lev->x, 0.0);
1347: PCApplyVcycleOnLevel_ASA(asa_next_lev, gamma);
1349: /* (c) correct solution x_l = x_l + I_{l+1}^l x_{l+1} */
1350: MatMultAdd(asa_lev->smP, asa_next_lev->x, asa_lev->x, asa_lev->x);
1351: }
1352: /* SafeVecDestroy(&(asa_lev->r)); */
1353: /* /\* discard x_{l+1}, b_{l+1} *\/ */
1354: /* SafeVecDestroy(&(asa_next_lev->x)); */
1355: /* SafeVecDestroy(&(asa_next_lev->b)); */
1356:
1357: /* 3. Postsmoothing */
1358: KSPSolve(asa_lev->smoothu, asa_lev->b, asa_lev->x);
1359: } else {
1360: /* Base case: solve directly */
1361: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1362: }
1363: return(0);
1364: }
1367: /* -------------------------------------------------------------------------- */
1368: /*
1369: PCGeneralSetupStage_ASA - Applies the ASA preconditioner to a vector. Algorithm
1370: 4 from the ASA paper
1372: Input Parameters:
1373: + asa - the data structure for the ASA algorithm
1374: - cand - a possible candidate vector, if PETSC_NULL, will be constructed randomly
1376: Output Parameters:
1377: . cand_added - PETSC_TRUE, if new candidate vector added, PETSC_FALSE otherwise
1378: */
1381: PetscErrorCode PCGeneralSetupStage_ASA(PC_ASA *asa, Vec cand, PetscTruth *cand_added)
1382: {
1384: PC_ASA_level *asa_lev, *asa_next_lev;
1386: PetscRandom rctx; /* random number generator context */
1387: PetscReal r;
1388: PetscScalar rs;
1389: PetscTruth nd_fast;
1391: Vec ax;
1392: PetscScalar tmp;
1393: PetscReal norm, prevnorm = 0.0;
1394: PetscInt c;
1396: PetscInt loc_vec_low, loc_vec_high;
1397: PetscInt i;
1399: PetscTruth skip_steps_d_j = PETSC_FALSE;
1401: PetscInt *idxm, *idxn;
1402: PetscScalar *v;
1404: Mat AI;
1406: Vec cand_vec, cand_vec_new;
1409: *cand_added = PETSC_FALSE;
1410:
1411: asa_lev = asa->levellist;
1412: if (asa_lev == 0) SETERRQ(PETSC_ERR_ARG_NULL, "No levels found in PCGeneralSetupStage_ASA");
1413: asa_next_lev = asa_lev->next;
1414: if (asa_next_lev == 0) SETERRQ(PETSC_ERR_ARG_NULL, "Just one level, not implemented yet");
1415:
1416: PetscPrintf(asa_lev->comm, "General setup stage\n");
1420: /* 1. If max. dof per node on level 2 equals K, stop */
1421: if (asa_next_lev->cand_vecs >= asa->max_dof_lev_2) {
1422: PetscPrintf(PETSC_COMM_WORLD,
1423: "Maximum dof on level 2 reached: %d\n"
1424: "Consider increasing this limit by setting it with -pc_asa_max_dof_lev_2\n",
1425: asa->max_dof_lev_2);
1426: return(0);
1427: }
1429: /* 2. Create copy of B_1 (skipped, we just replace the last column in step 8.) */
1430:
1431: if (!cand) {
1432: /* 3. Select a random x_1 */
1433: SafeVecDestroy(&(asa_lev->x));
1434: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1435: PetscRandomCreate(asa_lev->comm,&rctx);
1436: PetscRandomSetFromOptions(rctx);
1437: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1438: for (i=loc_vec_low; i<loc_vec_high; i++) {
1439: PetscRandomGetValueReal(rctx, &r);
1440: rs = r;
1441: VecSetValues(asa_lev->x, 1, &i, &rs, INSERT_VALUES);
1442: }
1443: VecAssemblyBegin(asa_lev->x);
1444: VecAssemblyEnd(asa_lev->x);
1445: PetscRandomDestroy(rctx);
1446: } else {
1447: SafeVecDestroy(&(asa_lev->x));
1448: VecDuplicate(cand, &(asa_lev->x));
1449: VecCopy(cand, asa_lev->x);
1450: }
1452: /* create right hand side */
1453: SafeVecDestroy(&(asa_lev->b));
1454: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1455: VecSet(asa_lev->b, 0.0);
1456:
1457: /* Apply mu iterations of current V-cycle */
1458: nd_fast = PETSC_FALSE;
1459: MatGetVecs(asa_lev->A, 0, &ax);
1460: for (c=0; c<asa->mu; c++) {
1461: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1462:
1463: MatMult(asa_lev->A, asa_lev->x, ax);
1464: VecDot(asa_lev->x, ax, &tmp);
1465: norm = PetscAbsScalar(tmp);
1466: if (c>0) {
1467: if (norm/prevnorm < asa->epsilon) {
1468: nd_fast = PETSC_TRUE;
1469: break;
1470: }
1471: }
1472: prevnorm = norm;
1473: }
1474: SafeVecDestroy(&(ax));
1476: /* 4. If energy norm decreases sufficiently fast, then stop */
1477: if (nd_fast) {
1478: PetscPrintf(asa_lev->comm, "nd_fast is true\n");
1479: return(0);
1480: }
1482: /* 5. Update B_1, by adding new column x_1 */
1483: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1484: SETERRQ(PETSC_ERR_MEM, "Number of candidate vectors will exceed allocated storage space");
1485: } else {
1486: PetscPrintf(asa_lev->comm, "Adding candidate vector %d\n", asa_lev->cand_vecs+1);
1487: }
1488: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs, asa_lev->x, asa_lev->A);
1489: *cand_added = PETSC_TRUE;
1490: asa_lev->cand_vecs++;
1492: /* 6. loop over levels */
1493: while(asa_next_lev && asa_next_lev->next) {
1494: PetscPrintf(asa_lev->comm, "General setup stage: processing level %d\n", asa_next_lev->level);
1495: /* (a) define B_{l+1} and P_{l+1}^L */
1496: /* construct P_{l+1}^l */
1497: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1499: /* construct B_{l+1} */
1500: SafeMatDestroy(&(asa_next_lev->B));
1501: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->B));
1502: /* do not increase asa_next_lev->cand_vecs until step (j) */
1503:
1504: /* (b) construct prolongator I_{l+1}^l = S_l P_{l+1}^l */
1505: PCSmoothProlongator_ASA(asa_lev);
1506:
1507: /* (c) construct coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1508: SafeMatDestroy(&(asa_next_lev->A));
1509: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1510: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1511: SafeMatDestroy(&AI);
1512: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1513: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
1514: PCComputeSpectralRadius_ASA(asa_next_lev);
1515: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1517: if (! skip_steps_d_j) {
1518: /* (d) get vector x_{l+1} from last column in B_{l+1} */
1519: SafeVecDestroy(&(asa_next_lev->x));
1520: MatGetVecs(asa_next_lev->B, 0, &(asa_next_lev->x));
1522: VecGetOwnershipRange(asa_next_lev->x, &loc_vec_low, &loc_vec_high);
1523: PetscMalloc(sizeof(PetscInt)*(loc_vec_high-loc_vec_low), &idxm);
1524: for (i=loc_vec_low; i<loc_vec_high; i++)
1525: idxm[i-loc_vec_low] = i;
1526: PetscMalloc(sizeof(PetscInt)*1, &idxn);
1527: idxn[0] = asa_next_lev->cand_vecs;
1529: PetscMalloc(sizeof(PetscScalar)*(loc_vec_high-loc_vec_low), &v);
1530: MatGetValues(asa_next_lev->B, loc_vec_high-loc_vec_low, idxm, 1, idxn, v);
1532: VecSetValues(asa_next_lev->x, loc_vec_high-loc_vec_low, idxm, v, INSERT_VALUES);
1533: VecAssemblyBegin(asa_next_lev->x);
1534: VecAssemblyEnd(asa_next_lev->x);
1536: PetscFree(v);
1537: PetscFree(idxm);
1538: PetscFree(idxn);
1539:
1540: /* (e) create bridge transfer operator P_{l+2}^{l+1}, by using the previously
1541: computed candidates */
1542: PCCreateTransferOp_ASA(asa_next_lev, PETSC_TRUE);
1544: /* (f) construct bridging prolongator I_{l+2}^{l+1} = S_{l+1} P_{l+2}^{l+1} */
1545: PCSmoothProlongator_ASA(asa_next_lev);
1547: /* (g) compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1548: MatGetVecs(asa_next_lev->A, 0, &ax);
1549: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1550: VecDot(asa_next_lev->x, ax, &tmp);
1551: prevnorm = PetscAbsScalar(tmp);
1552: SafeVecDestroy(&(ax));
1554: /* (h) apply mu iterations of current V-cycle */
1555: /* set asa_next_lev->b */
1556: SafeVecDestroy(&(asa_next_lev->b));
1557: SafeVecDestroy(&(asa_next_lev->r));
1558: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), &(asa_next_lev->r));
1559: VecSet(asa_next_lev->b, 0.0);
1560: /* apply V-cycle */
1561: for (c=0; c<asa->mu; c++) {
1562: PCApplyVcycleOnLevel_ASA(asa_next_lev, asa->gamma);
1563: }
1565: /* (i) check convergence */
1566: /* compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1567: MatGetVecs(asa_next_lev->A, 0, &ax);
1568: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1569: VecDot(asa_next_lev->x, ax, &tmp);
1570: norm = PetscAbsScalar(tmp);
1571: SafeVecDestroy(&(ax));
1573: if (norm/prevnorm <= PetscAbsScalar(PetscPowScalar(asa->epsilon, asa->mu))) skip_steps_d_j = PETSC_TRUE;
1574:
1575: /* (j) update candidate B_{l+1} */
1576: PCAddCandidateToB_ASA(asa_next_lev->B, asa_next_lev->cand_vecs, asa_next_lev->x, asa_next_lev->A);
1577: asa_next_lev->cand_vecs++;
1578: }
1579: /* go to next level */
1580: asa_lev = asa_lev->next;
1581: asa_next_lev = asa_next_lev->next;
1582: }
1584: /* 7. update the fine-level candidate */
1585: if (! asa_lev->prev) {
1586: /* just one coarsening level */
1587: VecDuplicate(asa_lev->x, &cand_vec);
1588: VecCopy(asa_lev->x, cand_vec);
1589: } else {
1590: cand_vec = asa_lev->x;
1591: asa_lev->x = 0;
1592: while(asa_lev->prev) {
1593: /* interpolate to higher level */
1594: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1595: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1596: SafeVecDestroy(&(cand_vec));
1597: cand_vec = cand_vec_new;
1599: /* destroy all working vectors on the way */
1600: SafeVecDestroy(&(asa_lev->x));
1601: SafeVecDestroy(&(asa_lev->b));
1603: /* move to next higher level */
1604: asa_lev = asa_lev->prev;
1605: }
1606: }
1607: /* 8. update B_1 by setting the last column of B_1 */
1608: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs-1, cand_vec, asa_lev->A);
1609: SafeVecDestroy(&(cand_vec));
1611: /* 9. create V-cycle */
1612: PCCreateVcycle_ASA(asa);
1613:
1615: return(0);
1616: }
1618: /* -------------------------------------------------------------------------- */
1619: /*
1620: PCConstructMultigrid_ASA - creates the multigrid preconditionier, this is a fairly
1621: involved process, which runs extensive testing to compute good candidate vectors
1623: Input Parameters:
1624: . pc - the preconditioner context
1626: */
1629: PetscErrorCode PCConstructMultigrid_ASA(PC pc)
1630: {
1632: PC_ASA *asa = (PC_ASA*)pc->data;
1633: PC_ASA_level *asa_lev;
1634: PetscInt i, ls, le;
1635: PetscScalar *d;
1636: PetscTruth zeroflag = PETSC_FALSE;
1637: PetscReal rnorm, rnorm_start;
1638: PetscReal rq, rq_prev;
1639: PetscScalar rq_nom, rq_denom;
1640: PetscTruth cand_added;
1641: PetscRandom rctx;
1645: /* check if we should scale with diagonal */
1646: if (asa->scale_diag) {
1647: /* Get diagonal scaling factors */
1648: MatGetVecs(pc->pmat,&(asa->invsqrtdiag),0);
1649: MatGetDiagonal(pc->pmat,asa->invsqrtdiag);
1650: /* compute (inverse) sqrt of diagonal */
1651: VecGetOwnershipRange(asa->invsqrtdiag, &ls, &le);
1652: VecGetArray(asa->invsqrtdiag, &d);
1653: for (i=0; i<le-ls; i++) {
1654: if (d[i] == 0.0) {
1655: d[i] = 1.0;
1656: zeroflag = PETSC_TRUE;
1657: } else {
1658: d[i] = 1./sqrt(PetscAbsScalar(d[i]));
1659: }
1660: }
1661: VecRestoreArray(asa->invsqrtdiag,&d);
1662: VecAssemblyBegin(asa->invsqrtdiag);
1663: VecAssemblyEnd(asa->invsqrtdiag);
1664: if (zeroflag) {
1665: PetscInfo(pc,"Zero detected in diagonal of matrix, using 1 at those locations\n");
1666: }
1667:
1668: /* scale the matrix and store it: D^{-1/2} A D^{-1/2} */
1669: MatDuplicate(pc->pmat, MAT_COPY_VALUES, &(asa->A)); /* probably inefficient */
1670: MatDiagonalScale(asa->A, asa->invsqrtdiag, asa->invsqrtdiag);
1671: } else {
1672: /* don't scale */
1673: asa->A = pc->pmat;
1674: }
1675: /* Initialization stage */
1676: PCInitializationStage_ASA(asa, PETSC_NULL);
1677:
1678: /* get first level */
1679: asa_lev = asa->levellist;
1681: PetscRandomCreate(asa->comm,&rctx);
1682: PetscRandomSetFromOptions(rctx);
1683: VecSetRandom(asa_lev->x,rctx);
1685: /* compute starting residual */
1686: SafeVecDestroy(&(asa_lev->r));
1687: MatGetVecs(asa_lev->A, PETSC_NULL, &(asa_lev->r));
1688: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1689: /* starting residual norm */
1690: VecNorm(asa_lev->r, NORM_2, &rnorm_start);
1691: /* compute Rayleigh quotients */
1692: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1693: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1694: rq_prev = PetscAbsScalar(rq_nom / rq_denom);
1696: /* check if we have to add more candidates */
1697: for (i=0; i<asa->max_it; i++) {
1698: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1699: /* reached limit for candidate vectors */
1700: break;
1701: }
1702: /* apply V-cycle */
1703: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1704: /* check convergence */
1705: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1706: VecNorm(asa_lev->r, NORM_2, &rnorm);
1707: PetscPrintf(asa->comm, "After %d iterations residual norm is %f\n", i+1, rnorm);
1708: if (rnorm < rnorm_start*(asa->rtol) || rnorm < asa->abstol) {
1709: /* convergence */
1710: break;
1711: }
1712: /* compute new Rayleigh quotient */
1713: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1714: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1715: rq = PetscAbsScalar(rq_nom / rq_denom);
1716: PetscPrintf(asa->comm, "After %d iterations Rayleigh quotient of residual is %f\n", i+1, rq);
1717: /* test Rayleigh quotient decrease and add more candidate vectors if necessary */
1718: if (i && (rq > asa->rq_improve*rq_prev)) {
1719: /* improve interpolation by adding another candidate vector */
1720: PCGeneralSetupStage_ASA(asa, asa_lev->r, &cand_added);
1721: if (!cand_added) {
1722: /* either too many candidates for storage or cycle is already effective */
1723: PetscPrintf(asa->comm, "either too many candidates for storage or cycle is already effective\n");
1724: break;
1725: }
1726: VecSetRandom(asa_lev->x, rctx);
1727: rq_prev = rq*10000.; /* give the new V-cycle some grace period */
1728: } else {
1729: rq_prev = rq;
1730: }
1731: }
1733: SafeVecDestroy(&(asa_lev->x));
1734: SafeVecDestroy(&(asa_lev->b));
1735: PetscRandomDestroy(rctx);
1736: asa->multigrid_constructed = PETSC_TRUE;
1737: return(0);
1738: }
1740: /* -------------------------------------------------------------------------- */
1741: /*
1742: PCApply_ASA - Applies the ASA preconditioner to a vector.
1744: Input Parameters:
1745: . pc - the preconditioner context
1746: . x - input vector
1748: Output Parameter:
1749: . y - output vector
1751: Application Interface Routine: PCApply()
1752: */
1755: PetscErrorCode PCApply_ASA(PC pc,Vec x,Vec y)
1756: {
1757: PC_ASA *asa = (PC_ASA*)pc->data;
1758: PC_ASA_level *asa_lev;
1763: if (!asa->multigrid_constructed) {
1764: PCConstructMultigrid_ASA(pc);
1765: }
1767: /* get first level */
1768: asa_lev = asa->levellist;
1770: /* set the right hand side */
1771: VecDuplicate(x, &(asa->b));
1772: VecCopy(x, asa->b);
1773: /* set starting vector */
1774: SafeVecDestroy(&(asa->x));
1775: MatGetVecs(asa->A, &(asa->x), PETSC_NULL);
1776: VecSet(asa->x, 0.0);
1777:
1778: /* set vectors */
1779: asa_lev->x = asa->x;
1780: asa_lev->b = asa->b;
1782: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1783:
1784: /* Return solution */
1785: VecCopy(asa->x, y);
1787: /* delete working vectors */
1788: SafeVecDestroy(&(asa->x));
1789: SafeVecDestroy(&(asa->b));
1790: asa_lev->x = PETSC_NULL;
1791: asa_lev->b = PETSC_NULL;
1793: return(0);
1794: }
1796: /* -------------------------------------------------------------------------- */
1797: /*
1798: PCApplyRichardson_ASA - Applies the ASA iteration to solve a linear system
1800: Input Parameters:
1801: . pc - the preconditioner context
1802: . b - the right hand side
1804: Output Parameter:
1805: . x - output vector
1807: DOES NOT WORK!!!!!
1809: */
1812: PetscErrorCode PCApplyRichardson_ASA(PC pc,Vec b,Vec x,Vec w,PetscReal rtol,PetscReal abstol, PetscReal dtol,PetscInt its)
1813: {
1814: PC_ASA *asa = (PC_ASA*)pc->data;
1815: PC_ASA_level *asa_lev;
1816: PetscInt i;
1817: PetscReal rnorm, rnorm_start;
1822: if (! asa->multigrid_constructed) {
1823: PCConstructMultigrid_ASA(pc);
1824: }
1826: /* get first level */
1827: asa_lev = asa->levellist;
1829: /* set the right hand side */
1830: VecDuplicate(b, &(asa->b));
1831: if (asa->scale_diag) {
1832: VecPointwiseMult(asa->b, asa->invsqrtdiag, b);
1833: } else {
1834: VecCopy(b, asa->b);
1835: }
1836: /* set starting vector */
1837: VecDuplicate(x, &(asa->x));
1838: VecCopy(x, asa->x);
1839:
1840: /* compute starting residual */
1841: SafeVecDestroy(&(asa->r));
1842: MatGetVecs(asa->A, &(asa->r), PETSC_NULL);
1843: MatMult(asa->A, asa->x, asa->r);
1844: VecAYPX(asa->r, -1.0, asa->b);
1845: /* starting residual norm */
1846: VecNorm(asa->r, NORM_2, &rnorm_start);
1848: /* set vectors */
1849: asa_lev->x = asa->x;
1850: asa_lev->b = asa->b;
1852: /* **************** Full algorithm loop *********************************** */
1853: for (i=0; i<its; i++) {
1854: /* apply V-cycle */
1855: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1856: /* check convergence */
1857: MatMult(asa->A, asa->x, asa->r);
1858: VecAYPX(asa->r, -1.0, asa->b);
1859: VecNorm(asa->r, NORM_2, &rnorm);
1860: PetscPrintf(asa->comm, "After %d iterations residual norm is %f\n", i+1, rnorm);
1861: if (rnorm < rnorm_start*(rtol) || rnorm < asa->abstol) {
1862: /* convergence */
1863: break;
1864: }
1865: if (rnorm > rnorm_start*(dtol)) {
1866: /* divergence */
1867: break;
1868: }
1869: }
1870:
1871: /* Return solution */
1872: if (asa->scale_diag) {
1873: VecPointwiseMult(x, asa->x, asa->invsqrtdiag);
1874: } else {
1875: VecCopy(x, asa->x);
1876: }
1878: /* delete working vectors */
1879: SafeVecDestroy(&(asa->x));
1880: SafeVecDestroy(&(asa->b));
1881: SafeVecDestroy(&(asa->r));
1882: asa_lev->x = PETSC_NULL;
1883: asa_lev->b = PETSC_NULL;
1884: return(0);
1885: }
1887: /* -------------------------------------------------------------------------- */
1888: /*
1889: PCDestroy_ASA - Destroys the private context for the ASA preconditioner
1890: that was created with PCCreate_ASA().
1892: Input Parameter:
1893: . pc - the preconditioner context
1895: Application Interface Routine: PCDestroy()
1896: */
1899: static PetscErrorCode PCDestroy_ASA(PC pc)
1900: {
1901: PC_ASA *asa;
1902: PC_ASA_level *asa_lev;
1903: PC_ASA_level *asa_next_level;
1908: asa = (PC_ASA*)pc->data;
1909: asa_lev = asa->levellist;
1911: /* Delete top level data */
1912: PetscFree(asa->ksptype_smooth);
1913: PetscFree(asa->pctype_smooth);
1914: PetscFree(asa->ksptype_direct);
1915: PetscFree(asa->pctype_direct);
1916: PetscFree(asa->coarse_mat_type);
1918: /* this is destroyed by the levels below */
1919: /* SafeMatDestroy(&(asa->A)); */
1920: SafeVecDestroy(&(asa->invsqrtdiag));
1921: SafeVecDestroy(&(asa->b));
1922: SafeVecDestroy(&(asa->x));
1923: SafeVecDestroy(&(asa->r));
1925: if (asa->dm) {DMDestroy(asa->dm);}
1927: /* Destroy each of the levels */
1928: while(asa_lev) {
1929: asa_next_level = asa_lev->next;
1930: PCDestroyLevel_ASA(asa_lev);
1931: asa_lev = asa_next_level;
1932: }
1934: PetscFree(asa);
1935: return(0);
1936: }
1940: static PetscErrorCode PCSetFromOptions_ASA(PC pc)
1941: {
1942: PC_ASA *asa = (PC_ASA*)pc->data;
1943: PetscTruth flg;
1945: char type[20];
1950: PetscOptionsHead("ASA options");
1951: /* convergence parameters */
1952: PetscOptionsInt("-pc_asa_nu","Number of cycles to run smoother","No manual page yet",asa->nu,&(asa->nu),&flg);
1953: PetscOptionsInt("-pc_asa_gamma","Number of cycles to run coarse grid correction","No manual page yet",asa->gamma,&(asa->gamma),&flg);
1954: PetscOptionsReal("-pc_asa_epsilon","Tolerance for the relaxation method","No manual page yet",asa->epsilon,&(asa->epsilon),&flg);
1955: PetscOptionsInt("-pc_asa_mu","Number of cycles to relax in setup stages","No manual page yet",asa->mu,&(asa->mu),&flg);
1956: PetscOptionsInt("-pc_asa_mu_initial","Number of cycles to relax for generating first candidate vector","No manual page yet",asa->mu_initial,&(asa->mu_initial),&flg);
1957: PetscOptionsInt("-pc_asa_direct_solver","For which matrix size should we use the direct solver?","No manual page yet",asa->direct_solver,&(asa->direct_solver),&flg);
1958: PetscOptionsTruth("-pc_asa_scale_diag","Should we scale the matrix with the inverse of its diagonal?","No manual page yet",asa->scale_diag,&(asa->scale_diag),&flg);
1959: /* type of smoother used */
1960: PetscOptionsList("-pc_asa_smoother_ksp_type","The type of KSP to be used in the smoothers","No manual page yet",KSPList,asa->ksptype_smooth,type,20,&flg);
1961: if (flg) {
1962: PetscFree(asa->ksptype_smooth);
1963: PetscStrallocpy(type,&(asa->ksptype_smooth));
1964: }
1965: PetscOptionsList("-pc_asa_smoother_pc_type","The type of PC to be used in the smoothers","No manual page yet",PCList,asa->pctype_smooth,type,20,&flg);
1966: if (flg) {
1967: PetscFree(asa->pctype_smooth);
1968: PetscStrallocpy(type,&(asa->pctype_smooth));
1969: }
1970: PetscOptionsList("-pc_asa_direct_ksp_type","The type of KSP to be used in the direct solver","No manual page yet",KSPList,asa->ksptype_direct,type,20,&flg);
1971: if (flg) {
1972: PetscFree(asa->ksptype_direct);
1973: PetscStrallocpy(type,&(asa->ksptype_direct));
1974: }
1975: PetscOptionsList("-pc_asa_direct_pc_type","The type of PC to be used in the direct solver","No manual page yet",PCList,asa->pctype_direct,type,20,&flg);
1976: if (flg) {
1977: PetscFree(asa->pctype_direct);
1978: PetscStrallocpy(type,&(asa->pctype_direct));
1979: }
1980: /* options specific for certain smoothers */
1981: PetscOptionsReal("-pc_asa_richardson_scale","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->richardson_scale,&(asa->richardson_scale),&flg);
1982: PetscOptionsReal("-pc_asa_sor_omega","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->sor_omega,&(asa->sor_omega),&flg);
1983: /* options for direct solver */
1984: PetscOptionsString("-pc_asa_coarse_mat_type","The coarse level matrix type (e.g. SuperLU, MUMPS, ...)","No manual page yet",asa->coarse_mat_type, type,20,&flg);
1985: if (flg) {
1986: PetscFree(asa->coarse_mat_type);
1987: PetscStrallocpy(type,&(asa->coarse_mat_type));
1988: }
1989: /* storage allocation parameters */
1990: PetscOptionsInt("-pc_asa_max_cand_vecs","Maximum number of candidate vectors","No manual page yet",asa->max_cand_vecs,&(asa->max_cand_vecs),&flg);
1991: PetscOptionsInt("-pc_asa_max_dof_lev_2","The maximum number of degrees of freedom per node on level 2 (K in paper)","No manual page yet",asa->max_dof_lev_2,&(asa->max_dof_lev_2),&flg);
1992: /* construction parameters */
1993: PetscOptionsReal("-pc_asa_rq_improve","Threshold in RQ improvement for adding another candidate","No manual page yet",asa->rq_improve,&(asa->rq_improve),&flg);
1994: PetscOptionsTail();
1995: return(0);
1996: }
2000: static PetscErrorCode PCView_ASA(PC pc,PetscViewer viewer)
2001: {
2002: PC_ASA *asa = (PC_ASA*)pc->data;
2004: PetscTruth iascii;
2005: PC_ASA_level *asa_lev = asa->levellist;
2008: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
2009: if (iascii) {
2010: PetscViewerASCIIPrintf(viewer," ASA:\n");
2011: asa_lev = asa->levellist;
2012: while (asa_lev) {
2013: if (!asa_lev->next) {
2014: PetscViewerASCIIPrintf(viewer,"Coarse gride solver -- level %D -------------------------------\n",0);
2015: } else {
2016: PetscViewerASCIIPrintf(viewer,"Down solver (pre-smoother) on level ? -------------------------------\n");
2017: }
2018: PetscViewerASCIIPushTab(viewer);
2019: KSPView(asa_lev->smoothd,viewer);
2020: PetscViewerASCIIPopTab(viewer);
2021: if (asa_lev->next && asa_lev->smoothd == asa_lev->smoothu) {
2022: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) same as down solver (pre-smoother)\n");
2023: } else if (asa_lev->next){
2024: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) on level ? -------------------------------\n");
2025: PetscViewerASCIIPushTab(viewer);
2026: KSPView(asa_lev->smoothu,viewer);
2027: PetscViewerASCIIPopTab(viewer);
2028: }
2029: asa_lev = asa_lev->next;
2030: }
2031: } else {
2032: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASA",((PetscObject)viewer)->type_name);
2033: }
2034: return(0);
2035: }
2037: /* -------------------------------------------------------------------------- */
2038: /*
2039: PCCreate_ASA - Creates a ASA preconditioner context, PC_ASA,
2040: and sets this as the private data within the generic preconditioning
2041: context, PC, that was created within PCCreate().
2043: Input Parameter:
2044: . pc - the preconditioner context
2046: Application Interface Routine: PCCreate()
2047: */
2051: PetscErrorCode PCCreate_ASA(PC pc)
2052: {
2054: PC_ASA *asa;
2059: /*
2060: Set the pointers for the functions that are provided above.
2061: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
2062: are called, they will automatically call these functions. Note we
2063: choose not to provide a couple of these functions since they are
2064: not needed.
2065: */
2066: pc->ops->apply = PCApply_ASA;
2067: /* pc->ops->applytranspose = PCApply_ASA;*/
2068: pc->ops->applyrichardson = PCApplyRichardson_ASA;
2069: pc->ops->setup = 0;
2070: pc->ops->destroy = PCDestroy_ASA;
2071: pc->ops->setfromoptions = PCSetFromOptions_ASA;
2072: pc->ops->view = PCView_ASA;
2074: /* Set the data to pointer to 0 */
2075: pc->data = (void*)0;
2077: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASASetDM_C","PCASASetDM_ASA",PCASASetDM_ASA);
2078: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASASetTolerances_C","PCASASetTolerances_ASA",PCASASetTolerances_ASA);
2080: /* register events */
2081: if (! asa_events_registered) {
2086: asa_events_registered = PETSC_TRUE;
2087: }
2089: /* Create new PC_ASA object */
2090: PetscNewLog(pc,PC_ASA,&asa);
2091: pc->data = (void*)asa;
2093: /* WORK: find some better initial values */
2094: asa->nu = 3;
2095: asa->gamma = 1;
2096: asa->epsilon = 1e-4;
2097: asa->mu = 3;
2098: asa->mu_initial = 20;
2099: asa->direct_solver = 100;
2100: asa->scale_diag = PETSC_TRUE;
2101: PetscStrallocpy(KSPRICHARDSON, (char **) &(asa->ksptype_smooth));
2102: PetscStrallocpy(PCSOR, (char **) &(asa->pctype_smooth));
2103: asa->smoother_rtol = 1e-10;
2104: asa->smoother_abstol = 1e-20;
2105: asa->smoother_dtol = PETSC_DEFAULT;
2106: PetscStrallocpy(KSPPREONLY, (char **) &(asa->ksptype_direct));
2107: PetscStrallocpy(PCREDUNDANT, (char **) &(asa->pctype_direct));
2108: asa->direct_rtol = 1e-10;
2109: asa->direct_abstol = 1e-20;
2110: asa->direct_dtol = PETSC_DEFAULT;
2111: asa->richardson_scale = PETSC_DECIDE;
2112: asa->sor_omega = PETSC_DECIDE;
2113: PetscStrallocpy(MATSAME, (char **) &(asa->coarse_mat_type));
2115: asa->max_cand_vecs = 4;
2116: asa->max_dof_lev_2 = 640; /* I don't think this parameter really matters, 640 should be enough for everyone! */
2118: asa->multigrid_constructed = PETSC_FALSE;
2120: asa->rtol = 1e-10;
2121: asa->abstol = 1e-15;
2122: asa->divtol = 1e5;
2123: asa->max_it = 10000;
2124: asa->rq_improve = 0.9;
2125:
2126: asa->A = 0;
2127: asa->invsqrtdiag = 0;
2128: asa->b = 0;
2129: asa->x = 0;
2130: asa->r = 0;
2132: asa->dm = 0;
2133:
2134: asa->levels = 0;
2135: asa->levellist = 0;
2137: asa->comm = ((PetscObject)pc)->comm;
2138: return(0);
2139: }