Actual source code: pcmpi.c
1: /*
2: This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3: It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
5: That program may use OpenMP to compute the right hand side and matrix for the linear system
7: The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
9: The resulting KSP and PC can only be controlled via the options database, though some common commands
10: could be passed through the server.
12: */
13: #include <petsc/private/pcimpl.h>
14: #include <petsc/private/kspimpl.h>
15: #include <petsc.h>
17: #define PC_MPI_MAX_RANKS 256
18: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
20: typedef struct {
21: KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22: PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23: PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24: PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25: PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26: } PC_MPI;
28: typedef enum {
29: PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30: PCMPI_CREATE,
31: PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */
32: PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33: PCMPI_SOLVE,
34: PCMPI_VIEW,
35: PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36: } PCMPICommand;
38: static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS];
39: static PetscBool PCMPICommSet = PETSC_FALSE;
40: static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
42: static PetscErrorCode PCMPICommsCreate(void)
43: {
44: MPI_Comm comm = PC_MPI_COMM_WORLD;
45: PetscMPIInt size, rank, i;
47: MPI_Comm_size(comm, &size);
49: MPI_Comm_rank(comm, &rank);
50: /* comm for size 1 is useful only for debugging */
51: for (i = 0; i < size; i++) {
52: PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
53: MPI_Comm_split(comm, color, 0, &PCMPIComms[i]);
54: PCMPISolveCounts[i] = 0;
55: PCMPIKSPCounts[i] = 0;
56: }
57: PCMPICommSet = PETSC_TRUE;
58: return 0;
59: }
61: PetscErrorCode PCMPICommsDestroy(void)
62: {
63: MPI_Comm comm = PC_MPI_COMM_WORLD;
64: PetscMPIInt size, rank, i;
66: if (!PCMPICommSet) return 0;
67: MPI_Comm_size(comm, &size);
68: MPI_Comm_rank(comm, &rank);
69: for (i = 0; i < size; i++) {
70: if (PCMPIComms[i] != MPI_COMM_NULL) MPI_Comm_free(&PCMPIComms[i]);
71: }
72: PCMPICommSet = PETSC_FALSE;
73: return 0;
74: }
76: static PetscErrorCode PCMPICreate(PC pc)
77: {
78: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
79: MPI_Comm comm = PC_MPI_COMM_WORLD;
80: KSP ksp;
81: PetscInt N[2], mincntperrank = 0;
82: PetscMPIInt size;
83: Mat sA;
84: char *prefix;
85: PetscMPIInt len = 0;
87: if (!PCMPICommSet) PCMPICommsCreate();
88: MPI_Comm_size(comm, &size);
89: if (pc) {
90: if (size == 1) PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n");
91: PCGetOperators(pc, &sA, &sA);
92: MatGetSize(sA, &N[0], &N[1]);
93: }
94: MPI_Bcast(N, 2, MPIU_INT, 0, comm);
96: /* choose a suitable sized MPI_Comm for the problem to be solved on */
97: if (km) mincntperrank = km->mincntperrank;
98: MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm);
99: comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
100: if (comm == MPI_COMM_NULL) {
101: ksp = NULL;
102: return 0;
103: }
104: KSPCreate(comm, &ksp);
105: MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm);
106: if (pc) {
107: size_t slen;
109: MPI_Comm_size(comm, &size);
110: PCMPIKSPCounts[size - 1]++;
111: PCGetOptionsPrefix(pc, (const char **)&prefix);
112: PetscStrlen(prefix, &slen);
113: len = (PetscMPIInt)slen;
114: }
115: MPI_Bcast(&len, 1, MPI_INT, 0, comm);
116: if (len) {
117: if (!pc) PetscMalloc1(len + 1, &prefix);
118: MPI_Bcast(prefix, len + 1, MPI_CHAR, 0, comm);
119: KSPSetOptionsPrefix(ksp, prefix);
120: }
121: KSPAppendOptionsPrefix(ksp, "mpi_");
122: return 0;
123: }
125: static PetscErrorCode PCMPISetMat(PC pc)
126: {
127: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
128: Mat A;
129: PetscInt N[2], n, *ia, *ja, j, bs;
130: Mat sA;
131: MPI_Comm comm = PC_MPI_COMM_WORLD;
132: KSP ksp;
133: PetscLayout layout;
134: const PetscInt *IA = NULL, *JA = NULL;
135: const PetscInt *range;
136: PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
137: PetscScalar *a;
138: const PetscScalar *sa = NULL;
140: MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
141: if (!ksp) return 0;
142: PetscObjectGetComm((PetscObject)ksp, &comm);
143: if (pc) {
144: MPI_Comm_size(comm, &size);
145: PCMPIMatCounts[size - 1]++;
146: PCGetOperators(pc, &sA, &sA);
147: MatGetSize(sA, &N[0], &N[1]);
148: MatGetBlockSize(sA, &bs);
149: /* need to broadcast symmetry flags etc if set */
150: }
151: MPI_Bcast(N, 2, MPIU_INT, 0, comm);
152: MPI_Bcast(&bs, 1, MPIU_INT, 0, comm);
154: /* determine ownership ranges of matrix */
155: PetscLayoutCreate(comm, &layout);
156: PetscLayoutSetBlockSize(layout, bs);
157: PetscLayoutSetSize(layout, N[0]);
158: PetscLayoutSetUp(layout);
159: PetscLayoutGetLocalSize(layout, &n);
161: /* copy over the matrix nonzero structure and values */
162: if (pc) {
163: NZ = km->NZ;
164: NZdispl = km->NZdispl;
165: PetscLayoutGetRanges(layout, &range);
166: MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL);
167: for (i = 0; i < size; i++) {
168: sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
169: NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
170: }
171: displi[0] = 0;
172: NZdispl[0] = 0;
173: for (j = 1; j < size; j++) {
174: displi[j] = displi[j - 1] + sendcounti[j - 1] - 1;
175: NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
176: }
177: MatSeqAIJGetArrayRead(sA, &sa);
178: }
179: PetscLayoutDestroy(&layout);
180: MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm);
182: PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a);
183: MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm);
184: MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm);
185: MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm);
187: if (pc) {
188: MatSeqAIJRestoreArrayRead(sA, &sa);
189: MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL);
190: }
192: for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
193: ia[0] = 0;
194: MatCreateMPIAIJWithArrays(comm, n, n, N[0], N[0], ia, ja, a, &A);
195: MatSetBlockSize(A, bs);
196: MatSetOptionsPrefix(A, "mpi_");
198: PetscFree3(ia, ja, a);
199: KSPSetOperators(ksp, A, A);
200: if (!ksp->vec_sol) MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs);
201: if (pc) { /* needed for scatterv/gatherv of rhs and solution */
202: const PetscInt *range;
204: VecGetOwnershipRanges(ksp->vec_sol, &range);
205: for (i = 0; i < size; i++) {
206: km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
207: km->displ[i] = (PetscMPIInt)range[i];
208: }
209: }
210: MatDestroy(&A);
211: KSPSetFromOptions(ksp);
212: return 0;
213: }
215: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
216: {
217: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
218: KSP ksp;
219: Mat sA, A;
220: MPI_Comm comm = PC_MPI_COMM_WORLD;
221: PetscScalar *a;
222: PetscCount nz;
223: const PetscScalar *sa = NULL;
224: PetscMPIInt size;
226: if (pc) {
227: PCGetOperators(pc, &sA, &sA);
228: MatSeqAIJGetArrayRead(sA, &sa);
229: }
230: MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
231: if (!ksp) return 0;
232: PetscObjectGetComm((PetscObject)ksp, &comm);
233: MPI_Comm_size(comm, &size);
234: PCMPIMatCounts[size - 1]++;
235: KSPGetOperators(ksp, NULL, &A);
236: MatMPIAIJGetNumberNonzeros(A, &nz);
237: PetscMalloc1(nz, &a);
238: MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm);
239: if (pc) MatSeqAIJRestoreArrayRead(sA, &sa);
240: MatUpdateMPIAIJWithArray(A, a);
241: PetscFree(a);
242: return 0;
243: }
245: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
246: {
247: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
248: KSP ksp;
249: MPI_Comm comm = PC_MPI_COMM_WORLD;
250: const PetscScalar *sb = NULL, *x;
251: PetscScalar *b, *sx = NULL;
252: PetscInt n;
254: MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
255: if (!ksp) return 0;
256: PetscObjectGetComm((PetscObject)ksp, &comm);
258: /* TODO: optimize code to not require building counts/displ everytime */
260: /* scatterv rhs */
261: if (pc) {
262: PetscMPIInt size;
264: MPI_Comm_size(comm, &size);
265: PCMPISolveCounts[size - 1]++;
266: VecGetArrayRead(B, &sb);
267: }
268: VecGetLocalSize(ksp->vec_rhs, &n);
269: VecGetArray(ksp->vec_rhs, &b);
270: MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm);
271: VecRestoreArray(ksp->vec_rhs, &b);
272: if (pc) VecRestoreArrayRead(B, &sb);
274: KSPSolve(ksp, NULL, NULL);
276: /* gather solution */
277: VecGetArrayRead(ksp->vec_sol, &x);
278: if (pc) VecGetArray(X, &sx);
279: MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm);
280: if (pc) VecRestoreArray(X, &sx);
281: VecRestoreArrayRead(ksp->vec_sol, &x);
282: return 0;
283: }
285: static PetscErrorCode PCMPIDestroy(PC pc)
286: {
287: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
288: KSP ksp;
289: MPI_Comm comm = PC_MPI_COMM_WORLD;
291: MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm);
292: if (!ksp) return 0;
293: KSPDestroy(&ksp);
294: return 0;
295: }
297: /*@C
298: PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for
299: parallel `KSP` solves and management of parallel `KSP` objects.
301: Logically collective on all MPI ranks except 0
303: Options Database Keys:
304: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
305: - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
307: Note:
308: This is normally started automatically in `PetscInitialize()` when the option is provided
310: Developer Notes:
311: When called on rank zero this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
312: written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
313: (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
315: Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
317: Level: developer
319: .seealso: `PCMPIServerEnd()`, `PCMPI`
320: @*/
321: PetscErrorCode PCMPIServerBegin(void)
322: {
323: PetscMPIInt rank;
325: PetscInfo(NULL, "Starting MPI Linear Solver Server");
326: if (PetscDefined(USE_SINGLE_LIBRARY)) {
327: VecInitializePackage();
328: MatInitializePackage();
329: DMInitializePackage();
330: PCInitializePackage();
331: KSPInitializePackage();
332: SNESInitializePackage();
333: TSInitializePackage();
334: TaoInitializePackage();
335: }
337: MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank);
338: if (rank == 0) {
339: PETSC_COMM_WORLD = PETSC_COMM_SELF;
340: return 0;
341: }
343: while (PETSC_TRUE) {
344: PCMPICommand request = PCMPI_CREATE;
345: MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD);
346: switch (request) {
347: case PCMPI_CREATE:
348: PCMPICreate(NULL);
349: break;
350: case PCMPI_SET_MAT:
351: PCMPISetMat(NULL);
352: break;
353: case PCMPI_UPDATE_MAT_VALUES:
354: PCMPIUpdateMatValues(NULL);
355: break;
356: case PCMPI_VIEW:
357: // PCMPIView(NULL);
358: break;
359: case PCMPI_SOLVE:
360: PCMPISolve(NULL, NULL, NULL);
361: break;
362: case PCMPI_DESTROY:
363: PCMPIDestroy(NULL);
364: break;
365: case PCMPI_EXIT:
366: PetscFinalize();
367: exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
368: break;
369: default:
370: break;
371: }
372: }
373: return 0;
374: }
376: /*@C
377: PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
378: parallel KSP solves and management of parallel `KSP` objects.
380: Logically collective on all MPI ranks except 0
382: Note:
383: This is normally ended automatically in `PetscFinalize()` when the option is provided
385: Level: developer
387: .seealso: `PCMPIServerBegin()`, `PCMPI`
388: @*/
389: PetscErrorCode PCMPIServerEnd(void)
390: {
391: PCMPICommand request = PCMPI_EXIT;
393: if (PetscGlobalRank == 0) {
394: PetscViewer viewer = NULL;
395: PetscViewerFormat format;
397: MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD);
398: PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
399: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
400: PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL);
401: PetscOptionsEnd();
402: if (viewer) {
403: PetscBool isascii;
405: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
406: if (isascii) {
407: PetscMPIInt size;
408: PetscInt i, ksp = 0, mat = 0, solve = 0;
410: MPI_Comm_size(PETSC_COMM_WORLD, &size);
411: for (i = 0; i < size; i++) {
412: ksp += PCMPIKSPCounts[i];
413: mat += PCMPIMatCounts[i];
414: solve += PCMPISolveCounts[i];
415: }
416: PetscViewerASCIIPrintf(viewer, "MPI linear solver server:\n");
417: PetscViewerASCIIPrintf(viewer, " Parallel KSPs %" PetscInt_FMT " Mats %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", ksp, mat, solve);
418: PetscViewerASCIIPrintf(viewer, " Sequential KSPs %" PetscInt_FMT " Solves %" PetscInt_FMT "\n", PCMPIKSPCountsSeq, PCMPISolveCountsSeq);
419: }
420: PetscViewerDestroy(&viewer);
421: }
422: }
423: PCMPICommsDestroy();
424: return 0;
425: }
427: /*
428: This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
429: because, for example, the problem is small. This version is more efficient because it does not require copying any data
430: */
431: static PetscErrorCode PCSetUp_Seq(PC pc)
432: {
433: PC_MPI *km = (PC_MPI *)pc->data;
434: Mat sA;
435: const char *prefix;
437: PCGetOperators(pc, NULL, &sA);
438: PCGetOptionsPrefix(pc, &prefix);
439: KSPCreate(PETSC_COMM_SELF, &km->ksps[0]);
440: KSPSetOptionsPrefix(km->ksps[0], prefix);
441: KSPAppendOptionsPrefix(km->ksps[0], "mpi_");
442: KSPSetOperators(km->ksps[0], sA, sA);
443: KSPSetFromOptions(km->ksps[0]);
444: KSPSetUp(km->ksps[0]);
445: PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n");
446: PCMPIKSPCountsSeq++;
447: return 0;
448: }
450: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
451: {
452: PC_MPI *km = (PC_MPI *)pc->data;
454: KSPSolve(km->ksps[0], b, x);
455: PCMPISolveCountsSeq++;
456: return 0;
457: }
459: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
460: {
461: PC_MPI *km = (PC_MPI *)pc->data;
462: const char *prefix;
464: PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n");
465: PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank);
466: PCGetOptionsPrefix(pc, &prefix);
467: if (prefix) PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix);
468: else PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n");
469: return 0;
470: }
472: static PetscErrorCode PCDestroy_Seq(PC pc)
473: {
474: PC_MPI *km = (PC_MPI *)pc->data;
476: KSPDestroy(&km->ksps[0]);
477: PetscFree(pc->data);
478: return 0;
479: }
481: /*
482: PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
483: right hand side to the parallel PC
484: */
485: static PetscErrorCode PCSetUp_MPI(PC pc)
486: {
487: PC_MPI *km = (PC_MPI *)pc->data;
488: PetscMPIInt rank, size;
489: PCMPICommand request;
490: PetscBool newmatrix = PETSC_FALSE;
492: MPI_Comm_rank(MPI_COMM_WORLD, &rank);
494: MPI_Comm_size(MPI_COMM_WORLD, &size);
496: if (!pc->setupcalled) {
497: if (!km->alwaysuseserver) {
498: PetscInt n;
499: Mat sA;
500: /* short circuit for small systems */
501: PCGetOperators(pc, &sA, &sA);
502: MatGetSize(sA, &n, NULL);
503: if (n < 2 * km->mincntperrank - 1 || size == 1) {
504: pc->ops->setup = NULL;
505: pc->ops->apply = PCApply_Seq;
506: pc->ops->destroy = PCDestroy_Seq;
507: pc->ops->view = PCView_Seq;
508: PCSetUp_Seq(pc);
509: return 0;
510: }
511: }
513: request = PCMPI_CREATE;
514: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
515: PCMPICreate(pc);
516: newmatrix = PETSC_TRUE;
517: }
518: if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
520: if (newmatrix) {
521: PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n");
522: request = PCMPI_SET_MAT;
523: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
524: PCMPISetMat(pc);
525: } else {
526: PetscInfo((PetscObject)pc, "Matrix has only changed nozero values\n");
527: request = PCMPI_UPDATE_MAT_VALUES;
528: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
529: PCMPIUpdateMatValues(pc);
530: }
531: return 0;
532: }
534: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
535: {
536: PCMPICommand request = PCMPI_SOLVE;
538: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
539: PCMPISolve(pc, b, x);
540: return 0;
541: }
543: PetscErrorCode PCDestroy_MPI(PC pc)
544: {
545: PCMPICommand request = PCMPI_DESTROY;
547: MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD);
548: PCMPIDestroy(pc);
549: PetscFree(pc->data);
550: return 0;
551: }
553: /*
554: PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
555: */
556: PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
557: {
558: PC_MPI *km = (PC_MPI *)pc->data;
559: MPI_Comm comm;
560: PetscMPIInt size;
561: const char *prefix;
563: PetscObjectGetComm((PetscObject)km->ksps[0], &comm);
564: MPI_Comm_size(comm, &size);
565: PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size);
566: PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank);
567: PCGetOptionsPrefix(pc, &prefix);
568: if (prefix) PetscViewerASCIIPrintf(viewer, "*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n", prefix);
569: else PetscViewerASCIIPrintf(viewer, "*** Use -mpi_ksp_view to see the MPI KSP parameters***\n");
570: return 0;
571: }
573: PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
574: {
575: PC_MPI *km = (PC_MPI *)pc->data;
577: PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
578: PetscOptionsInt("-pc_mpi_minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL);
579: PetscOptionsBool("-pc_mpi_always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL);
580: PetscOptionsHeadEnd();
581: return 0;
582: }
584: /*MC
585: PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
587: Level: beginner
589: Options Database Keys:
590: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
591: . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server
592: . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
593: - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes
595: Notes:
596: The options database prefix for the MPI parallel `KSP` and `PC` is -mpi_
598: The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for
599: potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware.
601: It can be particularly useful for user OpenMP code or potentially user GPU code.
603: When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a `KSP` with the options prefix of -mpi)
604: and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the `KSP` directly.
606: .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`
607: M*/
608: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
609: {
610: PC_MPI *km;
612: PetscNew(&km);
613: pc->data = (void *)km;
615: km->mincntperrank = 10000;
617: pc->ops->setup = PCSetUp_MPI;
618: pc->ops->apply = PCApply_MPI;
619: pc->ops->destroy = PCDestroy_MPI;
620: pc->ops->view = PCView_MPI;
621: pc->ops->setfromoptions = PCSetFromOptions_MPI;
622: return 0;
623: }