Actual source code: ex52.c
2: static char help[] = "Solves a linear system in parallel with KSP. Modified from ex2.c \n\
3: Illustrate how to use external packages MUMPS, SUPERLU and STRUMPACK \n\
4: Input parameters include:\n\
5: -random_exact_sol : use a random exact solution vector\n\
6: -view_exact_sol : write exact solution vector to stdout\n\
7: -m <mesh_x> : number of mesh points in x-direction\n\
8: -n <mesh_y> : number of mesh points in y-direction\n\n";
10: #include <petscksp.h>
12: #if defined(PETSC_HAVE_MUMPS)
13: /* Subroutine contributed by Varun Hiremath */
14: PetscErrorCode printMumpsMemoryInfo(Mat F)
15: {
16: PetscInt maxMem, sumMem;
18: PetscFunctionBeginUser;
19: PetscCall(MatMumpsGetInfog(F, 16, &maxMem));
20: PetscCall(MatMumpsGetInfog(F, 17, &sumMem));
21: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(16) :: Max memory in MB = %" PetscInt_FMT, maxMem));
22: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(17) :: Sum memory in MB = %" PetscInt_FMT "\n", sumMem));
23: PetscFunctionReturn(PETSC_SUCCESS);
24: }
25: #endif
27: int main(int argc, char **args)
28: {
29: Vec x, b, u; /* approx solution, RHS, exact solution */
30: Mat A, F;
31: KSP ksp; /* linear solver context */
32: PC pc;
33: PetscRandom rctx; /* random number generator context */
34: PetscReal norm; /* norm of solution error */
35: PetscInt i, j, Ii, J, Istart, Iend, m = 8, n = 7, its;
36: PetscBool flg = PETSC_FALSE, flg_ilu = PETSC_FALSE, flg_ch = PETSC_FALSE;
37: #if defined(PETSC_HAVE_MUMPS)
38: PetscBool flg_mumps = PETSC_FALSE, flg_mumps_ch = PETSC_FALSE;
39: #endif
40: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
41: PetscBool flg_superlu = PETSC_FALSE;
42: #endif
43: #if defined(PETSC_HAVE_STRUMPACK)
44: PetscBool flg_strumpack = PETSC_FALSE;
45: #endif
46: PetscScalar v;
47: PetscMPIInt rank, size;
48: #if defined(PETSC_USE_LOG)
49: PetscLogStage stage;
50: #endif
52: PetscFunctionBeginUser;
53: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
54: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
55: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
56: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
57: PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
58: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
59: Compute the matrix and right-hand-side vector that define
60: the linear system, Ax = b.
61: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
62: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
63: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
64: PetscCall(MatSetFromOptions(A));
65: PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
66: PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
67: PetscCall(MatSetUp(A));
69: /*
70: Currently, all PETSc parallel matrix formats are partitioned by
71: contiguous chunks of rows across the processors. Determine which
72: rows of the matrix are locally owned.
73: */
74: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
76: /*
77: Set matrix elements for the 2-D, five-point stencil in parallel.
78: - Each processor needs to insert only elements that it owns
79: locally (but any non-local elements will be sent to the
80: appropriate processor during matrix assembly).
81: - Always specify global rows and columns of matrix entries.
83: Note: this uses the less common natural ordering that orders first
84: all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
85: instead of J = I +- m as you might expect. The more standard ordering
86: would first do all variables for y = h, then y = 2h etc.
88: */
89: PetscCall(PetscLogStageRegister("Assembly", &stage));
90: PetscCall(PetscLogStagePush(stage));
91: for (Ii = Istart; Ii < Iend; Ii++) {
92: v = -1.0;
93: i = Ii / n;
94: j = Ii - i * n;
95: if (i > 0) {
96: J = Ii - n;
97: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
98: }
99: if (i < m - 1) {
100: J = Ii + n;
101: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
102: }
103: if (j > 0) {
104: J = Ii - 1;
105: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
106: }
107: if (j < n - 1) {
108: J = Ii + 1;
109: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
110: }
111: v = 4.0;
112: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
113: }
115: /*
116: Assemble matrix, using the 2-step process:
117: MatAssemblyBegin(), MatAssemblyEnd()
118: Computations can be done while messages are in transition
119: by placing code between these two statements.
120: */
121: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
122: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
123: PetscCall(PetscLogStagePop());
125: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
126: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
128: /* Create parallel vectors */
129: PetscCall(MatCreateVecs(A, &u, &b));
130: PetscCall(VecDuplicate(u, &x));
132: /*
133: Set exact solution; then compute right-hand-side vector.
134: By default we use an exact solution of a vector with all
135: elements of 1.0; Alternatively, using the runtime option
136: -random_sol forms a solution vector with random components.
137: */
138: PetscCall(PetscOptionsGetBool(NULL, NULL, "-random_exact_sol", &flg, NULL));
139: if (flg) {
140: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
141: PetscCall(PetscRandomSetFromOptions(rctx));
142: PetscCall(VecSetRandom(u, rctx));
143: PetscCall(PetscRandomDestroy(&rctx));
144: } else {
145: PetscCall(VecSet(u, 1.0));
146: }
147: PetscCall(MatMult(A, u, b));
149: /*
150: View the exact solution vector if desired
151: */
152: flg = PETSC_FALSE;
153: PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
154: if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));
156: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
157: Create the linear solver and set various options
158: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
160: /*
161: Create linear solver context
162: */
163: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
164: PetscCall(KSPSetOperators(ksp, A, A));
166: /*
167: Example of how to use external package MUMPS
168: Note: runtime options
169: '-ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mumps -mat_mumps_icntl_7 3 -mat_mumps_icntl_1 0.0'
170: are equivalent to these procedural calls
171: */
172: #if defined(PETSC_HAVE_MUMPS)
173: flg_mumps = PETSC_FALSE;
174: flg_mumps_ch = PETSC_FALSE;
175: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_mumps_lu", &flg_mumps, NULL));
176: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_mumps_ch", &flg_mumps_ch, NULL));
177: if (flg_mumps || flg_mumps_ch) {
178: PetscCall(KSPSetType(ksp, KSPPREONLY));
179: PetscInt ival, icntl;
180: PetscReal val;
181: PetscCall(KSPGetPC(ksp, &pc));
182: if (flg_mumps) {
183: PetscCall(PCSetType(pc, PCLU));
184: } else if (flg_mumps_ch) {
185: PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE)); /* set MUMPS id%SYM=1 */
186: PetscCall(PCSetType(pc, PCCHOLESKY));
187: }
188: PetscCall(PCFactorSetMatSolverType(pc, MATSOLVERMUMPS));
189: PetscCall(PCFactorSetUpMatSolverType(pc)); /* call MatGetFactor() to create F */
190: PetscCall(PCFactorGetMatrix(pc, &F));
192: if (flg_mumps) {
193: /* Get memory estimates from MUMPS' MatLUFactorSymbolic(), e.g. INFOG(16), INFOG(17).
194: KSPSetUp() below will do nothing inside MatLUFactorSymbolic() */
195: MatFactorInfo info;
196: PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, &info));
197: flg = PETSC_FALSE;
198: PetscCall(PetscOptionsGetBool(NULL, NULL, "-print_mumps_memory", &flg, NULL));
199: if (flg) PetscCall(printMumpsMemoryInfo(F));
200: }
202: /* sequential ordering */
203: icntl = 7;
204: ival = 2;
205: PetscCall(MatMumpsSetIcntl(F, icntl, ival));
207: /* threshold for row pivot detection */
208: PetscCall(MatMumpsSetIcntl(F, 24, 1));
209: icntl = 3;
210: val = 1.e-6;
211: PetscCall(MatMumpsSetCntl(F, icntl, val));
213: /* compute determinant of A */
214: PetscCall(MatMumpsSetIcntl(F, 33, 1));
215: }
216: #endif
218: /*
219: Example of how to use external package SuperLU
220: Note: runtime options
221: '-ksp_type preonly -pc_type ilu -pc_factor_mat_solver_type superlu -mat_superlu_ilu_droptol 1.e-8'
222: are equivalent to these procedual calls
223: */
224: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
225: flg_ilu = PETSC_FALSE;
226: flg_superlu = PETSC_FALSE;
227: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_superlu_lu", &flg_superlu, NULL));
228: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_superlu_ilu", &flg_ilu, NULL));
229: if (flg_superlu || flg_ilu) {
230: PetscCall(KSPSetType(ksp, KSPPREONLY));
231: PetscCall(KSPGetPC(ksp, &pc));
232: if (flg_superlu) PetscCall(PCSetType(pc, PCLU));
233: else if (flg_ilu) PetscCall(PCSetType(pc, PCILU));
234: if (size == 1) {
235: #if !defined(PETSC_HAVE_SUPERLU)
236: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires SUPERLU");
237: #else
238: PetscCall(PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU));
239: #endif
240: } else {
241: #if !defined(PETSC_HAVE_SUPERLU_DIST)
242: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires SUPERLU_DIST");
243: #else
244: PetscCall(PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU_DIST));
245: #endif
246: }
247: PetscCall(PCFactorSetUpMatSolverType(pc)); /* call MatGetFactor() to create F */
248: PetscCall(PCFactorGetMatrix(pc, &F));
249: #if defined(PETSC_HAVE_SUPERLU)
250: if (size == 1) PetscCall(MatSuperluSetILUDropTol(F, 1.e-8));
251: #endif
252: }
253: #endif
255: /*
256: Example of how to use external package STRUMPACK
257: Note: runtime options
258: '-pc_type lu/ilu \
259: -pc_factor_mat_solver_type strumpack \
260: -mat_strumpack_reordering METIS \
261: -mat_strumpack_colperm 0 \
262: -mat_strumpack_hss_rel_tol 1.e-3 \
263: -mat_strumpack_hss_min_sep_size 50 \
264: -mat_strumpack_max_rank 100 \
265: -mat_strumpack_leaf_size 4'
266: are equivalent to these procedural calls
268: We refer to the STRUMPACK-sparse manual, section 5, for more info on
269: how to tune the preconditioner.
270: */
271: #if defined(PETSC_HAVE_STRUMPACK)
272: flg_ilu = PETSC_FALSE;
273: flg_strumpack = PETSC_FALSE;
274: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_strumpack_lu", &flg_strumpack, NULL));
275: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_strumpack_ilu", &flg_ilu, NULL));
276: if (flg_strumpack || flg_ilu) {
277: PetscCall(KSPSetType(ksp, KSPPREONLY));
278: PetscCall(KSPGetPC(ksp, &pc));
279: if (flg_strumpack) PetscCall(PCSetType(pc, PCLU));
280: else if (flg_ilu) PetscCall(PCSetType(pc, PCILU));
281: #if !defined(PETSC_HAVE_STRUMPACK)
282: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires STRUMPACK");
283: #endif
284: PetscCall(PCFactorSetMatSolverType(pc, MATSOLVERSTRUMPACK));
285: PetscCall(PCFactorSetUpMatSolverType(pc)); /* call MatGetFactor() to create F */
286: PetscCall(PCFactorGetMatrix(pc, &F));
287: #if defined(PETSC_HAVE_STRUMPACK)
288: /* Set the fill-reducing reordering. */
289: PetscCall(MatSTRUMPACKSetReordering(F, MAT_STRUMPACK_METIS));
290: /* Since this is a simple discretization, the diagonal is always */
291: /* nonzero, and there is no need for the extra MC64 permutation. */
292: PetscCall(MatSTRUMPACKSetColPerm(F, PETSC_FALSE));
293: /* The compression tolerance used when doing low-rank compression */
294: /* in the preconditioner. This is problem specific! */
295: PetscCall(MatSTRUMPACKSetHSSRelTol(F, 1.e-3));
296: /* Set minimum matrix size for HSS compression to 15 in order to */
297: /* demonstrate preconditioner on small problems. For performance */
298: /* a value of say 500 is better. */
299: PetscCall(MatSTRUMPACKSetHSSMinSepSize(F, 15));
300: /* You can further limit the fill in the preconditioner by */
301: /* setting a maximum rank */
302: PetscCall(MatSTRUMPACKSetHSSMaxRank(F, 100));
303: /* Set the size of the diagonal blocks (the leafs) in the HSS */
304: /* approximation. The default value should be better for real */
305: /* problems. This is mostly for illustration on a small problem. */
306: PetscCall(MatSTRUMPACKSetHSSLeafSize(F, 4));
307: #endif
308: }
309: #endif
311: /*
312: Example of how to use procedural calls that are equivalent to
313: '-ksp_type preonly -pc_type lu/ilu -pc_factor_mat_solver_type petsc'
314: */
315: flg = PETSC_FALSE;
316: flg_ilu = PETSC_FALSE;
317: flg_ch = PETSC_FALSE;
318: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_petsc_lu", &flg, NULL));
319: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_petsc_ilu", &flg_ilu, NULL));
320: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_petsc_ch", &flg_ch, NULL));
321: if (flg || flg_ilu || flg_ch) {
322: Vec diag;
324: PetscCall(KSPSetType(ksp, KSPPREONLY));
325: PetscCall(KSPGetPC(ksp, &pc));
326: if (flg) PetscCall(PCSetType(pc, PCLU));
327: else if (flg_ilu) PetscCall(PCSetType(pc, PCILU));
328: else if (flg_ch) PetscCall(PCSetType(pc, PCCHOLESKY));
329: PetscCall(PCFactorSetMatSolverType(pc, MATSOLVERPETSC));
330: PetscCall(PCFactorSetUpMatSolverType(pc)); /* call MatGetFactor() to create F */
331: PetscCall(PCFactorGetMatrix(pc, &F));
333: /* Test MatGetDiagonal() */
334: PetscCall(KSPSetUp(ksp));
335: PetscCall(VecDuplicate(x, &diag));
336: PetscCall(MatGetDiagonal(F, diag));
337: /* PetscCall(VecView(diag,PETSC_VIEWER_STDOUT_WORLD)); */
338: PetscCall(VecDestroy(&diag));
339: }
341: PetscCall(KSPSetFromOptions(ksp));
343: /* Get info from matrix factors */
344: PetscCall(KSPSetUp(ksp));
346: #if defined(PETSC_HAVE_MUMPS)
347: if (flg_mumps || flg_mumps_ch) {
348: PetscInt icntl, infog34;
349: PetscReal cntl, rinfo12, rinfo13;
350: icntl = 3;
351: PetscCall(MatMumpsGetCntl(F, icntl, &cntl));
353: /* compute determinant */
354: if (rank == 0) {
355: PetscCall(MatMumpsGetInfog(F, 34, &infog34));
356: PetscCall(MatMumpsGetRinfog(F, 12, &rinfo12));
357: PetscCall(MatMumpsGetRinfog(F, 13, &rinfo13));
358: PetscCall(PetscPrintf(PETSC_COMM_SELF, " Mumps row pivot threshold = %g\n", cntl));
359: PetscCall(PetscPrintf(PETSC_COMM_SELF, " Mumps determinant = (%g, %g) * 2^%" PetscInt_FMT " \n", (double)rinfo12, (double)rinfo13, infog34));
360: }
361: }
362: #endif
364: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
365: Solve the linear system
366: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
367: PetscCall(KSPSolve(ksp, b, x));
369: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
370: Check solution and clean up
371: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
372: PetscCall(VecAXPY(x, -1.0, u));
373: PetscCall(VecNorm(x, NORM_2, &norm));
374: PetscCall(KSPGetIterationNumber(ksp, &its));
376: /*
377: Print convergence information. PetscPrintf() produces a single
378: print statement from all processes that share a communicator.
379: An alternative is PetscFPrintf(), which prints to a file.
380: */
381: if (norm < 1.e-12) {
382: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error < 1.e-12 iterations %" PetscInt_FMT "\n", its));
383: } else {
384: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its));
385: }
387: /*
388: Free work space. All PETSc objects should be destroyed when they
389: are no longer needed.
390: */
391: PetscCall(KSPDestroy(&ksp));
392: PetscCall(VecDestroy(&u));
393: PetscCall(VecDestroy(&x));
394: PetscCall(VecDestroy(&b));
395: PetscCall(MatDestroy(&A));
397: /*
398: Always call PetscFinalize() before exiting a program. This routine
399: - finalizes the PETSc libraries as well as MPI
400: - provides summary and diagnostic information if certain runtime
401: options are chosen (e.g., -log_view).
402: */
403: PetscCall(PetscFinalize());
404: return 0;
405: }
407: /*TEST
409: test:
410: args: -use_petsc_lu
411: output_file: output/ex52_2.out
413: test:
414: suffix: mumps
415: nsize: 3
416: requires: mumps
417: args: -use_mumps_lu
418: output_file: output/ex52_1.out
420: test:
421: suffix: mumps_2
422: nsize: 3
423: requires: mumps
424: args: -use_mumps_ch
425: output_file: output/ex52_1.out
427: test:
428: suffix: mumps_3
429: nsize: 3
430: requires: mumps
431: args: -use_mumps_ch -mat_type sbaij
432: output_file: output/ex52_1.out
434: test:
435: suffix: mumps_4
436: nsize: 3
437: requires: mumps !complex !single
438: args: -use_mumps_lu -m 50 -n 50 -use_mumps_lu -print_mumps_memory
439: output_file: output/ex52_4.out
441: test:
442: suffix: mumps_omp_2
443: nsize: 4
444: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
445: args: -use_mumps_lu -mat_mumps_use_omp_threads 2
446: output_file: output/ex52_1.out
448: test:
449: suffix: mumps_omp_3
450: nsize: 4
451: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
452: args: -use_mumps_ch -mat_mumps_use_omp_threads 3
453: # Ignore the warning since we are intentionally testing the imbalanced case
454: filter: grep -v "Warning: number of OpenMP threads"
455: output_file: output/ex52_1.out
457: test:
458: suffix: mumps_omp_4
459: nsize: 4
460: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
461: # let petsc guess a proper number for threads
462: args: -use_mumps_ch -mat_type sbaij -mat_mumps_use_omp_threads
463: output_file: output/ex52_1.out
465: testset:
466: suffix: strumpack_2
467: nsize: {{1 2}}
468: requires: strumpack
469: args: -use_strumpack_lu
470: output_file: output/ex52_3.out
472: test:
473: suffix: aij
474: args: -mat_type aij
476: test:
477: requires: kokkos_kernels
478: suffix: kok
479: args: -mat_type aijkokkos
481: test:
482: requires: cuda
483: suffix: cuda
484: args: -mat_type aijcusparse
486: test:
487: requires: hip
488: suffix: hip
489: args: -mat_type aijhipsparse
491: test:
492: suffix: strumpack_ilu
493: nsize: {{1 2}}
494: requires: strumpack
495: args: -use_strumpack_ilu
496: output_file: output/ex52_3.out
498: testset:
499: suffix: superlu_dist
500: nsize: {{1 2}}
501: requires: superlu superlu_dist
502: args: -use_superlu_lu
503: output_file: output/ex52_2.out
505: test:
506: suffix: aij
507: args: -mat_type aij
509: test:
510: requires: kokkos_kernels
511: suffix: kok
512: args: -mat_type aijkokkos
514: test:
515: requires: cuda
516: suffix: cuda
517: args: -mat_type aijcusparse
519: test:
520: requires: hip
521: suffix: hip
522: args: -mat_type aijhipsparse
524: test:
525: suffix: superlu_ilu
526: requires: superlu superlu_dist
527: args: -use_superlu_ilu
528: output_file: output/ex52_2.out
530: TEST*/