Actual source code: redistribute.c


  2: /*
  3:   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
  4: */
  5: #include <petsc/private/pcimpl.h>
  6: #include <petscksp.h>

  8: typedef struct {
  9:   KSP          ksp;
 10:   Vec          x, b;
 11:   VecScatter   scatter;
 12:   IS           is;
 13:   PetscInt     dcnt, *drows; /* these are the local rows that have only diagonal entry */
 14:   PetscScalar *diag;
 15:   Vec          work;
 16: } PC_Redistribute;

 18: static PetscErrorCode PCView_Redistribute(PC pc, PetscViewer viewer)
 19: {
 20:   PC_Redistribute *red = (PC_Redistribute *)pc->data;
 21:   PetscBool        iascii, isstring;
 22:   PetscInt         ncnt, N;

 24:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
 25:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);
 26:   if (iascii) {
 27:     MPIU_Allreduce(&red->dcnt, &ncnt, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc));
 28:     MatGetSize(pc->pmat, &N, NULL);
 29:     PetscViewerASCIIPrintf(viewer, "    Number rows eliminated %" PetscInt_FMT " Percentage rows eliminated %g\n", ncnt, (double)(100.0 * ((PetscReal)ncnt) / ((PetscReal)N)));
 30:     PetscViewerASCIIPrintf(viewer, "  Redistribute preconditioner: \n");
 31:     KSPView(red->ksp, viewer);
 32:   } else if (isstring) {
 33:     PetscViewerStringSPrintf(viewer, " Redistribute preconditioner");
 34:     KSPView(red->ksp, viewer);
 35:   }
 36:   return 0;
 37: }

 39: static PetscErrorCode PCSetUp_Redistribute(PC pc)
 40: {
 41:   PC_Redistribute         *red = (PC_Redistribute *)pc->data;
 42:   MPI_Comm                 comm;
 43:   PetscInt                 rstart, rend, i, nz, cnt, *rows, ncnt, dcnt, *drows;
 44:   PetscLayout              map, nmap;
 45:   PetscMPIInt              size, tag, n;
 46:   PETSC_UNUSED PetscMPIInt imdex;
 47:   PetscInt                *source = NULL;
 48:   PetscMPIInt             *sizes  = NULL, nrecvs;
 49:   PetscInt                 j, nsends;
 50:   PetscInt                *owner = NULL, *starts = NULL, count, slen;
 51:   PetscInt                *rvalues, *svalues, recvtotal;
 52:   PetscMPIInt             *onodes1, *olengths1;
 53:   MPI_Request             *send_waits = NULL, *recv_waits = NULL;
 54:   MPI_Status               recv_status, *send_status;
 55:   Vec                      tvec, diag;
 56:   Mat                      tmat;
 57:   const PetscScalar       *d, *values;
 58:   const PetscInt          *cols;

 60:   if (pc->setupcalled) {
 61:     KSPGetOperators(red->ksp, NULL, &tmat);
 62:     MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_REUSE_MATRIX, &tmat);
 63:     KSPSetOperators(red->ksp, tmat, tmat);
 64:   } else {
 65:     PetscInt NN;

 67:     PetscObjectGetComm((PetscObject)pc, &comm);
 68:     MPI_Comm_size(comm, &size);
 69:     PetscObjectGetNewTag((PetscObject)pc, &tag);

 71:     /* count non-diagonal rows on process */
 72:     MatGetOwnershipRange(pc->mat, &rstart, &rend);
 73:     cnt = 0;
 74:     for (i = rstart; i < rend; i++) {
 75:       MatGetRow(pc->mat, i, &nz, &cols, &values);
 76:       for (PetscInt j = 0; j < nz; j++) {
 77:         if (values[j] != 0 && cols[j] != i) {
 78:           cnt++;
 79:           break;
 80:         }
 81:       }
 82:       MatRestoreRow(pc->mat, i, &nz, &cols, &values);
 83:     }
 84:     PetscMalloc1(cnt, &rows);
 85:     PetscMalloc1(rend - rstart - cnt, &drows);

 87:     /* list non-diagonal rows on process */
 88:     cnt  = 0;
 89:     dcnt = 0;
 90:     for (i = rstart; i < rend; i++) {
 91:       PetscBool diagonly = PETSC_TRUE;
 92:       MatGetRow(pc->mat, i, &nz, &cols, &values);
 93:       for (PetscInt j = 0; j < nz; j++) {
 94:         if (values[j] != 0 && cols[j] != i) {
 95:           diagonly = PETSC_FALSE;
 96:           break;
 97:         }
 98:       }
 99:       if (!diagonly) rows[cnt++] = i;
100:       else drows[dcnt++] = i - rstart;
101:       MatRestoreRow(pc->mat, i, &nz, &cols, &values);
102:     }

104:     /* create PetscLayout for non-diagonal rows on each process */
105:     PetscLayoutCreate(comm, &map);
106:     PetscLayoutSetLocalSize(map, cnt);
107:     PetscLayoutSetBlockSize(map, 1);
108:     PetscLayoutSetUp(map);
109:     rstart = map->rstart;
110:     rend   = map->rend;

112:     /* create PetscLayout for load-balanced non-diagonal rows on each process */
113:     PetscLayoutCreate(comm, &nmap);
114:     MPIU_Allreduce(&cnt, &ncnt, 1, MPIU_INT, MPI_SUM, comm);
115:     PetscLayoutSetSize(nmap, ncnt);
116:     PetscLayoutSetBlockSize(nmap, 1);
117:     PetscLayoutSetUp(nmap);

119:     MatGetSize(pc->pmat, &NN, NULL);
120:     PetscInfo(pc, "Number of diagonal rows eliminated %" PetscInt_FMT ", percentage eliminated %g\n", NN - ncnt, (double)(((PetscReal)(NN - ncnt)) / ((PetscReal)(NN))));

122:     if (size > 1) {
123:       /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
124:       /*
125:        this code is taken from VecScatterCreate_PtoS()
126:        Determines what rows need to be moved where to
127:        load balance the non-diagonal rows
128:        */
129:       /*  count number of contributors to each processor */
130:       PetscMalloc2(size, &sizes, cnt, &owner);
131:       PetscArrayzero(sizes, size);
132:       j      = 0;
133:       nsends = 0;
134:       for (i = rstart; i < rend; i++) {
135:         if (i < nmap->range[j]) j = 0;
136:         for (; j < size; j++) {
137:           if (i < nmap->range[j + 1]) {
138:             if (!sizes[j]++) nsends++;
139:             owner[i - rstart] = j;
140:             break;
141:           }
142:         }
143:       }
144:       /* inform other processors of number of messages and max length*/
145:       PetscGatherNumberOfMessages(comm, NULL, sizes, &nrecvs);
146:       PetscGatherMessageLengths(comm, nsends, nrecvs, sizes, &onodes1, &olengths1);
147:       PetscSortMPIIntWithArray(nrecvs, onodes1, olengths1);
148:       recvtotal = 0;
149:       for (i = 0; i < nrecvs; i++) recvtotal += olengths1[i];

151:       /* post receives:  rvalues - rows I will own; count - nu */
152:       PetscMalloc3(recvtotal, &rvalues, nrecvs, &source, nrecvs, &recv_waits);
153:       count = 0;
154:       for (i = 0; i < nrecvs; i++) {
155:         MPI_Irecv((rvalues + count), olengths1[i], MPIU_INT, onodes1[i], tag, comm, recv_waits + i);
156:         count += olengths1[i];
157:       }

159:       /* do sends:
160:        1) starts[i] gives the starting index in svalues for stuff going to
161:        the ith processor
162:        */
163:       PetscMalloc3(cnt, &svalues, nsends, &send_waits, size, &starts);
164:       starts[0] = 0;
165:       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
166:       for (i = 0; i < cnt; i++) svalues[starts[owner[i]]++] = rows[i];
167:       for (i = 0; i < cnt; i++) rows[i] = rows[i] - rstart;
168:       red->drows = drows;
169:       red->dcnt  = dcnt;
170:       PetscFree(rows);

172:       starts[0] = 0;
173:       for (i = 1; i < size; i++) starts[i] = starts[i - 1] + sizes[i - 1];
174:       count = 0;
175:       for (i = 0; i < size; i++) {
176:         if (sizes[i]) MPI_Isend(svalues + starts[i], sizes[i], MPIU_INT, i, tag, comm, send_waits + count++);
177:       }

179:       /*  wait on receives */
180:       count = nrecvs;
181:       slen  = 0;
182:       while (count) {
183:         MPI_Waitany(nrecvs, recv_waits, &imdex, &recv_status);
184:         /* unpack receives into our local space */
185:         MPI_Get_count(&recv_status, MPIU_INT, &n);
186:         slen += n;
187:         count--;
188:       }
190:       ISCreateGeneral(comm, slen, rvalues, PETSC_COPY_VALUES, &red->is);

192:       /* free all work space */
193:       PetscFree(olengths1);
194:       PetscFree(onodes1);
195:       PetscFree3(rvalues, source, recv_waits);
196:       PetscFree2(sizes, owner);
197:       if (nsends) { /* wait on sends */
198:         PetscMalloc1(nsends, &send_status);
199:         MPI_Waitall(nsends, send_waits, send_status);
200:         PetscFree(send_status);
201:       }
202:       PetscFree3(svalues, send_waits, starts);
203:     } else {
204:       ISCreateGeneral(comm, cnt, rows, PETSC_OWN_POINTER, &red->is);
205:       red->drows = drows;
206:       red->dcnt  = dcnt;
207:       slen       = cnt;
208:     }
209:     PetscLayoutDestroy(&map);
210:     PetscLayoutDestroy(&nmap);

212:     VecCreateMPI(comm, slen, PETSC_DETERMINE, &red->b);
213:     VecDuplicate(red->b, &red->x);
214:     MatCreateVecs(pc->pmat, &tvec, NULL);
215:     VecScatterCreate(tvec, red->is, red->b, NULL, &red->scatter);
216:     VecDestroy(&tvec);
217:     MatCreateSubMatrix(pc->pmat, red->is, red->is, MAT_INITIAL_MATRIX, &tmat);
218:     KSPSetOperators(red->ksp, tmat, tmat);
219:     MatDestroy(&tmat);
220:   }

222:   /* get diagonal portion of matrix */
223:   PetscFree(red->diag);
224:   PetscMalloc1(red->dcnt, &red->diag);
225:   MatCreateVecs(pc->pmat, &diag, NULL);
226:   MatGetDiagonal(pc->pmat, diag);
227:   VecGetArrayRead(diag, &d);
228:   for (i = 0; i < red->dcnt; i++) red->diag[i] = 1.0 / d[red->drows[i]];
229:   VecRestoreArrayRead(diag, &d);
230:   VecDestroy(&diag);
231:   KSPSetUp(red->ksp);
232:   return 0;
233: }

235: static PetscErrorCode PCApply_Redistribute(PC pc, Vec b, Vec x)
236: {
237:   PC_Redistribute   *red   = (PC_Redistribute *)pc->data;
238:   PetscInt           dcnt  = red->dcnt, i;
239:   const PetscInt    *drows = red->drows;
240:   PetscScalar       *xwork;
241:   const PetscScalar *bwork, *diag = red->diag;

243:   if (!red->work) VecDuplicate(b, &red->work);
244:   /* compute the rows of solution that have diagonal entries only */
245:   VecSet(x, 0.0); /* x = diag(A)^{-1} b */
246:   VecGetArray(x, &xwork);
247:   VecGetArrayRead(b, &bwork);
248:   for (i = 0; i < dcnt; i++) xwork[drows[i]] = diag[i] * bwork[drows[i]];
249:   PetscLogFlops(dcnt);
250:   VecRestoreArray(red->work, &xwork);
251:   VecRestoreArrayRead(b, &bwork);
252:   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
253:   MatMult(pc->pmat, x, red->work);
254:   VecAYPX(red->work, -1.0, b); /* red->work = b - A x */

256:   VecScatterBegin(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD);
257:   VecScatterEnd(red->scatter, red->work, red->b, INSERT_VALUES, SCATTER_FORWARD);
258:   KSPSolve(red->ksp, red->b, red->x);
259:   KSPCheckSolve(red->ksp, pc, red->x);
260:   VecScatterBegin(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE);
261:   VecScatterEnd(red->scatter, red->x, x, INSERT_VALUES, SCATTER_REVERSE);
262:   return 0;
263: }

265: static PetscErrorCode PCDestroy_Redistribute(PC pc)
266: {
267:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

269:   VecScatterDestroy(&red->scatter);
270:   ISDestroy(&red->is);
271:   VecDestroy(&red->b);
272:   VecDestroy(&red->x);
273:   KSPDestroy(&red->ksp);
274:   VecDestroy(&red->work);
275:   PetscFree(red->drows);
276:   PetscFree(red->diag);
277:   PetscFree(pc->data);
278:   return 0;
279: }

281: static PetscErrorCode PCSetFromOptions_Redistribute(PC pc, PetscOptionItems *PetscOptionsObject)
282: {
283:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

285:   KSPSetFromOptions(red->ksp);
286:   return 0;
287: }

289: /*@
290:    PCRedistributeGetKSP - Gets the `KSP` created by the `PCREDISTRIBUTE`

292:    Not Collective

294:    Input Parameter:
295: .  pc - the preconditioner context

297:    Output Parameter:
298: .  innerksp - the inner `KSP`

300:    Level: advanced

302: .seealso: `KSP`, `PCREDISTRIBUTE`
303: @*/
304: PetscErrorCode PCRedistributeGetKSP(PC pc, KSP *innerksp)
305: {
306:   PC_Redistribute *red = (PC_Redistribute *)pc->data;

310:   *innerksp = red->ksp;
311:   return 0;
312: }

314: /*MC
315:      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows (and the corresponding columns) that only have a diagonal entry and then
316:      applies a `KSP` to that new smaller matrix

318:      Notes:
319:      Options for the redistribute `KSP` and `PC` with the options database prefix -redistribute_

321:      Usually run this with -ksp_type preonly

323:      If you have used `MatZeroRows()` to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
324:      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.

326:      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same.

328:      Developer Note:
329:      Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.

331:    Level: intermediate

333: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PCRedistributeGetKSP()`, `MatZeroRows()`
334: M*/

336: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
337: {
338:   PC_Redistribute *red;
339:   const char      *prefix;

341:   PetscNew(&red);
342:   pc->data = (void *)red;

344:   pc->ops->apply          = PCApply_Redistribute;
345:   pc->ops->applytranspose = NULL;
346:   pc->ops->setup          = PCSetUp_Redistribute;
347:   pc->ops->destroy        = PCDestroy_Redistribute;
348:   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
349:   pc->ops->view           = PCView_Redistribute;

351:   KSPCreate(PetscObjectComm((PetscObject)pc), &red->ksp);
352:   KSPSetErrorIfNotConverged(red->ksp, pc->erroriffailure);
353:   PetscObjectIncrementTabLevel((PetscObject)red->ksp, (PetscObject)pc, 1);
354:   PCGetOptionsPrefix(pc, &prefix);
355:   KSPSetOptionsPrefix(red->ksp, prefix);
356:   KSPAppendOptionsPrefix(red->ksp, "redistribute_");
357:   return 0;
358: }