Actual source code: redistribute.c
2: /*
3: This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4: */
5: #include <petsc/private/pcimpl.h>
6: #include <petscksp.h>
8: typedef struct {
9: KSP ksp;
10: Vec x, b;
11: VecScatter scatter;
12: IS is;
13: PetscInt dcnt, *drows; /* these are the local rows that have only diagonal entry */
14: PetscScalar *diag;
15: Vec work;
16: } PC_Redistribute;
18: static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer)
19: {
20: PC_Redistribute *red = (PC_Redistribute *)pc->data;
21: PetscBool iascii, isstring;
22: PetscInt ncnt, N;
24: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
25: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);
26: if (iascii) {
27: MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
28: MatGetSize(pc->pmat, &N, NULL);
29: PetscViewerASCIIPrintf(viewer, " Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N)));
30: PetscViewerASCIIPrintf(viewer, " Redistribute preconditioner: \n");
31: KSPView(red->ksp, viewer);
32: } else if (isstring) {
33: PetscViewerStringSPrintf(viewer, " Redistribute preconditioner");
34: KSPView(red->ksp, viewer);
35: }
36: return 0;
37: }
39: static PetscErrorCode PCSetUp_Redistribute(PC pc)
40: {
41: PC_Redistribute *red = (PC_Redistribute *)pc->data;
42: MPI_Comm comm;
43: PetscInt rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows;
44: PetscLayout map, nmap;
45: PetscMPIInt size, tag, n;
46: PETSC_UNUSED PetscMPIInt imdex;
47: PetscInt *source = NULL;
48: PetscMPIInt *sizes = NULL, nrecvs;
49: PetscInt j, nsends;
50: PetscInt *owner = NULL, *starts = NULL, count, slen;
51: PetscInt *rvalues, *svalues, recvtotal;
52: PetscMPIInt *onodes1, *olengths1;
53: MPI_Request *send_waits = NULL, *recv_waits = NULL;
54: MPI_Status recv_status, *send_status;
55: Vec tvec, diag;
56: Mat tmat;
57: const PetscScalar *d, *values;
58: const PetscInt *cols;
60: if (pc->setupcalled) {
61: KSPGetOperators(red->ksp, NULL, &tmat);
62: MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat);
63: KSPSetOperators(red->ksp, tmat, tmat);
64: } else {
65: PetscInt NN;
67: PetscObjectGetComm((PetscObject)pc, &comm);
68: MPI_Comm_size(comm, &size);
69: PetscObjectGetNewTag((PetscObject)pc, &tag);
71: /* count non-diagonal rows on process */
72: MatGetOwnershipRange(pc->mat, &rstart, &rend);
73: cnt = 0;
74: for (i = rstart; i < rend; i++) {
75: MatGetRow(pc->mat, i, &nz, &cols, &values);
76: for (PetscInt j = 0; j < nz; j++) {
77: if (values[j] != 0 && cols[j] != i) {
78: cnt++;
79: break;
80: }
81: }
82: MatRestoreRow(pc->mat, i, &nz, &cols, &values);
83: }
84: PetscMalloc1(cnt, &rows);
85: PetscMalloc1(rend - rstart - cnt, &drows);
87: /* list non-diagonal rows on process */
88: cnt = 0;
89: dcnt = 0;
90: for (i = rstart; i < rend; i++) {
91: PetscBool diagonly = PETSC_TRUE;
92: MatGetRow(pc->mat, i, &nz, &cols, &values);
93: for (PetscInt j = 0; j < nz; j++) {
94: if (values[j] != 0 && cols[j] != i) {
95: diagonly = PETSC_FALSE;
96: break;
97: }
98: }
99: if (!diagonly) rows[cnt++] = i;
100: else drows[dcnt++] = i - rstart;
101: MatRestoreRow(pc->mat, i, &nz, &cols, &values);
102: }
104: /* create PetscLayout for non-diagonal rows on each process */
105: PetscLayoutCreate(comm, &map);
106: PetscLayoutSetLocalSize(map, cnt);
107: PetscLayoutSetBlockSize(map, 1);
108: PetscLayoutSetUp(map);
109: rstart = map->rstart;
110: rend = map->rend;
112: /* create PetscLayout for load-balanced non-diagonal rows on each process */
113: PetscLayoutCreate(comm, &nmap);
114: MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm);
115: PetscLayoutSetSize(nmap, ncnt);
116: PetscLayoutSetBlockSize(nmap, 1);
117: PetscLayoutSetUp(nmap);
119: MatGetSize(pc->pmat, &NN, NULL);
120: PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN))));
122: if (size > 1) {
123: /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
124: /*
125: this code is taken from VecScatterCreate_PtoS()
126: Determines what rows need to be moved where to
127: load balance the non-diagonal rows
128: */
129: /* count number of contributors to each processor */
130: PetscMalloc2(size, &sizes, cnt, &owner);
131: PetscArrayzero(sizes, size);
132: j = 0;
133: nsends = 0;
134: for (i = rstart; i < rend; i++) {
135: if (i < nmap->range[j]) j = 0;
136: for (; j < size; j++) {
137: if (i < nmap->range[j + 1]) {
138: if (!sizes[j]++) nsends++;
139: owner[i - rstart] = j;
140: break;
141: }
142: }
143: }
144: /* inform other processors of number of messages and max length*/
145: PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs);
146: PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1);
147: PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1);
148: recvtotal = 0;
149: for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i];
151: /* post receives: rvalues - rows I will own; count - nu */
152: PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits);
153: count = 0;
154: for (i = 0; i < nrecvs; i++) {
155: MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i);
156: count += olengths1[i];
157: }
159: /* do sends:
160: 1) starts[i] gives the starting index in svalues for stuff going to
161: the ith processor
162: */
163: PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts);
164: starts[0] = 0;
165: for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
166: for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i];
167: for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart;
168: red->drows = drows;
169: red->dcnt = dcnt;
170: PetscFree(rows);
172: starts[0] = 0;
173: for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
174: count = 0;
175: for (i = 0; i < size; i++) {
176: if (sizes[i]) MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++);
177: }
179: /* wait on receives */
180: count = nrecvs;
181: slen = 0;
182: while (count) {
183: MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status);
184: /* unpack receives into our local space */
185: MPI_Get_count(&recv_status, MPIU_INT, &n);
186: slen += n;
187: count--;
188: }
190: ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is);
192: /* free all work space */
193: PetscFree(olengths1);
194: PetscFree(onodes1);
195: PetscFree3(rvalues, source, recv_waits);
196: PetscFree2(sizes, owner);
197: if (nsends) { /* wait on sends */
198: PetscMalloc1(nsends, &send_status);
199: MPI_Waitall(nsends, send_waits, send_status);
200: PetscFree(send_status);
201: }
202: PetscFree3(svalues, send_waits, starts);
203: } else {
204: ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is);
205: red->drows = drows;
206: red->dcnt = dcnt;
207: slen = cnt;
208: }
209: PetscLayoutDestroy(&map);
210: PetscLayoutDestroy(&nmap);
212: VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b);
213: VecDuplicate(red->b, &red->x);
214: MatCreateVecs(pc->pmat, &tvec, NULL);
215: VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter);
216: VecDestroy(&tvec);
217: MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat);
218: KSPSetOperators(red->ksp, tmat, tmat);
219: MatDestroy(&tmat);
220: }
222: /* get diagonal portion of matrix */
223: PetscFree(red->diag);
224: PetscMalloc1(red->dcnt, &red->diag);
225: MatCreateVecs(pc->pmat, &diag, NULL);
226: MatGetDiagonal(pc->pmat, diag);
227: VecGetArrayRead(diag, &d);
228: for (i = 0; i < red->dcnt; i++) red->diag[i] = 1.0 / d[red->drows[i]];
229: VecRestoreArrayRead(diag, &d);
230: VecDestroy(&diag);
231: KSPSetUp(red->ksp);
232: return 0;
233: }
235: static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x)
236: {
237: PC_Redistribute *red = (PC_Redistribute *)pc->data;
238: PetscInt dcnt = red->dcnt, i;
239: const PetscInt *drows = red->drows;
240: PetscScalar *xwork;
241: const PetscScalar *bwork, *diag = red->diag;
243: if (!red->work) VecDuplicate(b, &red->work);
244: /* compute the rows of solution that have diagonal entries only */
245: VecSet(x, 0.0); /* x = diag(A)^{-1} b */
246: VecGetArray(x, &xwork);
247: VecGetArrayRead(b, &bwork);
248: for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]];
249: PetscLogFlops(dcnt);
250: VecRestoreArray(red->work, &xwork);
251: VecRestoreArrayRead(b, &bwork);
252: /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
253: MatMult(pc->pmat, x, red->work);
254: VecAYPX(red->work, -1.0, b); /* red->work = b - A x */
256: VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD);
257: VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD);
258: KSPSolve(red->ksp, red->b, red->x);
259: KSPCheckSolve(red->ksp, pc, red->x);
260: VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE);
261: VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE);
262: return 0;
263: }
265: static PetscErrorCode PCDestroy_Redistribute(PC pc)
266: {
267: PC_Redistribute *red = (PC_Redistribute *)pc->data;
269: VecScatterDestroy(&red->scatter);
270: ISDestroy(&red->is);
271: VecDestroy(&red->b);
272: VecDestroy(&red->x);
273: KSPDestroy(&red->ksp);
274: VecDestroy(&red->work);
275: PetscFree(red->drows);
276: PetscFree(red->diag);
277: PetscFree(pc->data);
278: return 0;
279: }
281: static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject)
282: {
283: PC_Redistribute *red = (PC_Redistribute *)pc->data;
285: KSPSetFromOptions(red->ksp);
286: return 0;
287: }
289: /*@
290: PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE`
292: Not Collective
294: Input Parameter:
295: . pc - the preconditioner context
297: Output Parameter:
298: . innerksp - the inner `KSP`
300: Level: advanced
302: .seealso: `KSP`, `PCREDISTRIBUTE`
303: @*/
304: PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp)
305: {
306: PC_Redistribute *red = (PC_Redistribute *)pc->data;
310: *innerksp = red->ksp;
311: return 0;
312: }
314: /*MC
315: PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then
316: applies a `KSP` to that new smaller matrix
318: Notes:
319: Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_
321: Usually run this with -ksp_type preonly
323: If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
324: -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.
326: This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same.
328: Developer Note:
329: Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.
331: Level: intermediate
333: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()`
334: M*/
336: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
337: {
338: PC_Redistribute *red;
339: const char *prefix;
341: PetscNew(&red);
342: pc->data = (void *)red;
344: pc->ops->apply = PCApply_Redistribute;
345: pc->ops->applytranspose = NULL;
346: pc->ops->setup = PCSetUp_Redistribute;
347: pc->ops->destroy = PCDestroy_Redistribute;
348: pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
349: pc->ops->view = PCView_Redistribute;
351: KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp);
352: KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure);
353: PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1);
354: PCGetOptionsPrefix(pc, &prefix);
355: KSPSetOptionsPrefix(red->ksp, prefix);
356: KSPAppendOptionsPrefix(red->ksp, "redistribute_");
357: return 0;
358: }