Actual source code: fdmpiaij.c

  1: #include <../src/mat/impls/sell/mpi/mpisell.h>
  2: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  3: #include <../src/mat/impls/baij/mpi/mpibaij.h>
  4: #include <petsc/private/isimpl.h>

  6: PetscErrorCode MatFDColoringApply_BAIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
  7: {
  8:   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
  9:   PetscInt           k, cstart, cend, l, row, col, nz, spidx, i, j;
 10:   PetscScalar        dx = 0.0, *w3_array, *dy_i, *dy = coloring->dy;
 11:   PetscScalar       *vscale_array;
 12:   const PetscScalar *xx;
 13:   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
 14:   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
 15:   void              *fctx  = coloring->fctx;
 16:   PetscInt           ctype = coloring->ctype, nxloc, nrows_k;
 17:   PetscScalar       *valaddr;
 18:   MatEntry          *Jentry  = coloring->matentry;
 19:   MatEntry2         *Jentry2 = coloring->matentry2;
 20:   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
 21:   PetscInt           bs = J->rmap->bs;

 23:   PetscFunctionBegin;
 24:   PetscCall(VecBindToCPU(x1, PETSC_TRUE));
 25:   /* (1) Set w1 = F(x1) */
 26:   if (!coloring->fset) {
 27:     PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, coloring, 0, 0, 0));
 28:     PetscCall((*f)(sctx, x1, w1, fctx));
 29:     PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, coloring, 0, 0, 0));
 30:   } else {
 31:     coloring->fset = PETSC_FALSE;
 32:   }

 34:   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
 35:   PetscCall(VecGetLocalSize(x1, &nxloc));
 36:   if (coloring->htype[0] == 'w') {
 37:     /* vscale = dx is a constant scalar */
 38:     PetscCall(VecNorm(x1, NORM_2, &unorm));
 39:     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
 40:   } else {
 41:     PetscCall(VecGetArrayRead(x1, &xx));
 42:     PetscCall(VecGetArray(vscale, &vscale_array));
 43:     for (col = 0; col < nxloc; col++) {
 44:       dx = xx[col];
 45:       if (PetscAbsScalar(dx) < umin) {
 46:         if (PetscRealPart(dx) >= 0.0) dx = umin;
 47:         else if (PetscRealPart(dx) < 0.0) dx = -umin;
 48:       }
 49:       dx *= epsilon;
 50:       vscale_array[col] = 1.0 / dx;
 51:     }
 52:     PetscCall(VecRestoreArrayRead(x1, &xx));
 53:     PetscCall(VecRestoreArray(vscale, &vscale_array));
 54:   }
 55:   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
 56:     PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
 57:     PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
 58:   }

 60:   /* (3) Loop over each color */
 61:   if (!coloring->w3) {
 62:     PetscCall(VecDuplicate(x1, &coloring->w3));
 63:     /* Vec is used intensively in particular piece of scalar CPU code; won't benefit from bouncing back and forth to the GPU */
 64:     PetscCall(VecBindToCPU(coloring->w3, PETSC_TRUE));
 65:   }
 66:   w3 = coloring->w3;

 68:   PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
 69:   if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
 70:   nz = 0;
 71:   for (k = 0; k < ncolors; k++) {
 72:     coloring->currentcolor = k;

 74:     /*
 75:       (3-1) Loop over each column associated with color
 76:       adding the perturbation to the vector w3 = x1 + dx.
 77:     */
 78:     PetscCall(VecCopy(x1, w3));
 79:     dy_i = dy;
 80:     for (i = 0; i < bs; i++) { /* Loop over a block of columns */
 81:       PetscCall(VecGetArray(w3, &w3_array));
 82:       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
 83:       if (coloring->htype[0] == 'w') {
 84:         for (l = 0; l < ncolumns[k]; l++) {
 85:           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
 86:           w3_array[col] += 1.0 / dx;
 87:           if (i) w3_array[col - 1] -= 1.0 / dx; /* resume original w3[col-1] */
 88:         }
 89:       } else {                  /* htype == 'ds' */
 90:         vscale_array -= cstart; /* shift pointer so global index can be used */
 91:         for (l = 0; l < ncolumns[k]; l++) {
 92:           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
 93:           w3_array[col] += 1.0 / vscale_array[col];
 94:           if (i) w3_array[col - 1] -= 1.0 / vscale_array[col - 1]; /* resume original w3[col-1] */
 95:         }
 96:         vscale_array += cstart;
 97:       }
 98:       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
 99:       PetscCall(VecRestoreArray(w3, &w3_array));

101:       /*
102:        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
103:                            w2 = F(x1 + dx) - F(x1)
104:        */
105:       PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
106:       PetscCall(VecPlaceArray(w2, dy_i)); /* place w2 to the array dy_i */
107:       PetscCall((*f)(sctx, w3, w2, fctx));
108:       PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
109:       PetscCall(VecAXPY(w2, -1.0, w1));
110:       PetscCall(VecResetArray(w2));
111:       dy_i += nxloc; /* points to dy+i*nxloc */
112:     }

114:     /*
115:      (3-3) Loop over rows of vector, putting results into Jacobian matrix
116:     */
117:     nrows_k = nrows[k];
118:     if (coloring->htype[0] == 'w') {
119:       for (l = 0; l < nrows_k; l++) {
120:         row     = bs * Jentry2[nz].row; /* local row index */
121:         valaddr = Jentry2[nz++].valaddr;
122:         spidx   = 0;
123:         dy_i    = dy;
124:         for (i = 0; i < bs; i++) {   /* column of the block */
125:           for (j = 0; j < bs; j++) { /* row of the block */
126:             valaddr[spidx++] = dy_i[row + j] * dx;
127:           }
128:           dy_i += nxloc; /* points to dy+i*nxloc */
129:         }
130:       }
131:     } else { /* htype == 'ds' */
132:       for (l = 0; l < nrows_k; l++) {
133:         row     = bs * Jentry[nz].row; /* local row index */
134:         col     = bs * Jentry[nz].col; /* local column index */
135:         valaddr = Jentry[nz++].valaddr;
136:         spidx   = 0;
137:         dy_i    = dy;
138:         for (i = 0; i < bs; i++) {   /* column of the block */
139:           for (j = 0; j < bs; j++) { /* row of the block */
140:             valaddr[spidx++] = dy_i[row + j] * vscale_array[col + i];
141:           }
142:           dy_i += nxloc; /* points to dy+i*nxloc */
143:         }
144:       }
145:     }
146:   }
147:   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
148:   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
149:   if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));

151:   coloring->currentcolor = -1;
152:   PetscCall(VecBindToCPU(x1, PETSC_FALSE));
153:   PetscFunctionReturn(PETSC_SUCCESS);
154: }

156: /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */
157: PetscErrorCode MatFDColoringApply_AIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
158: {
159:   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
160:   PetscInt           k, cstart, cend, l, row, col, nz;
161:   PetscScalar        dx = 0.0, *y, *w3_array;
162:   const PetscScalar *xx;
163:   PetscScalar       *vscale_array;
164:   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
165:   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
166:   void              *fctx  = coloring->fctx;
167:   ISColoringType     ctype = coloring->ctype;
168:   PetscInt           nxloc, nrows_k;
169:   MatEntry          *Jentry  = coloring->matentry;
170:   MatEntry2         *Jentry2 = coloring->matentry2;
171:   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
172:   PetscBool          alreadyboundtocpu;

174:   PetscFunctionBegin;
175:   PetscCall(VecBoundToCPU(x1, &alreadyboundtocpu));
176:   PetscCall(VecBindToCPU(x1, PETSC_TRUE));
177:   PetscCheck(!(ctype == IS_COLORING_LOCAL) || !(J->ops->fdcoloringapply == MatFDColoringApply_AIJ), PetscObjectComm((PetscObject)J), PETSC_ERR_SUP, "Must call MatColoringUseDM() with IS_COLORING_LOCAL");
178:   /* (1) Set w1 = F(x1) */
179:   if (!coloring->fset) {
180:     PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
181:     PetscCall((*f)(sctx, x1, w1, fctx));
182:     PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
183:   } else {
184:     coloring->fset = PETSC_FALSE;
185:   }

187:   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
188:   if (coloring->htype[0] == 'w') {
189:     /* vscale = 1./dx is a constant scalar */
190:     PetscCall(VecNorm(x1, NORM_2, &unorm));
191:     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
192:   } else {
193:     PetscCall(VecGetLocalSize(x1, &nxloc));
194:     PetscCall(VecGetArrayRead(x1, &xx));
195:     PetscCall(VecGetArray(vscale, &vscale_array));
196:     for (col = 0; col < nxloc; col++) {
197:       dx = xx[col];
198:       if (PetscAbsScalar(dx) < umin) {
199:         if (PetscRealPart(dx) >= 0.0) dx = umin;
200:         else if (PetscRealPart(dx) < 0.0) dx = -umin;
201:       }
202:       dx *= epsilon;
203:       vscale_array[col] = 1.0 / dx;
204:     }
205:     PetscCall(VecRestoreArrayRead(x1, &xx));
206:     PetscCall(VecRestoreArray(vscale, &vscale_array));
207:   }
208:   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
209:     PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
210:     PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
211:   }

213:   /* (3) Loop over each color */
214:   if (!coloring->w3) PetscCall(VecDuplicate(x1, &coloring->w3));
215:   w3 = coloring->w3;

217:   PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
218:   if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
219:   nz = 0;

221:   if (coloring->bcols > 1) { /* use blocked insertion of Jentry */
222:     PetscInt     i, m = J->rmap->n, nbcols, bcols = coloring->bcols;
223:     PetscScalar *dy = coloring->dy, *dy_k;

225:     nbcols = 0;
226:     for (k = 0; k < ncolors; k += bcols) {
227:       /*
228:        (3-1) Loop over each column associated with color
229:        adding the perturbation to the vector w3 = x1 + dx.
230:        */

232:       dy_k = dy;
233:       if (k + bcols > ncolors) bcols = ncolors - k;
234:       for (i = 0; i < bcols; i++) {
235:         coloring->currentcolor = k + i;

237:         PetscCall(VecCopy(x1, w3));
238:         PetscCall(VecGetArray(w3, &w3_array));
239:         if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
240:         if (coloring->htype[0] == 'w') {
241:           for (l = 0; l < ncolumns[k + i]; l++) {
242:             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
243:             w3_array[col] += 1.0 / dx;
244:           }
245:         } else {                  /* htype == 'ds' */
246:           vscale_array -= cstart; /* shift pointer so global index can be used */
247:           for (l = 0; l < ncolumns[k + i]; l++) {
248:             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
249:             w3_array[col] += 1.0 / vscale_array[col];
250:           }
251:           vscale_array += cstart;
252:         }
253:         if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
254:         PetscCall(VecRestoreArray(w3, &w3_array));

256:         /*
257:          (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
258:                            w2 = F(x1 + dx) - F(x1)
259:          */
260:         PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
261:         PetscCall(VecPlaceArray(w2, dy_k)); /* place w2 to the array dy_i */
262:         PetscCall((*f)(sctx, w3, w2, fctx));
263:         PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
264:         PetscCall(VecAXPY(w2, -1.0, w1));
265:         PetscCall(VecResetArray(w2));
266:         dy_k += m; /* points to dy+i*nxloc */
267:       }

269:       /*
270:        (3-3) Loop over block rows of vector, putting results into Jacobian matrix
271:        */
272:       nrows_k = nrows[nbcols++];

274:       if (coloring->htype[0] == 'w') {
275:         for (l = 0; l < nrows_k; l++) {
276:           row = Jentry2[nz].row; /* local row index */
277:                                  /* The 'useless' ifdef is due to a bug in NVIDIA nvc 21.11, which triggers a segfault on this line. We write it in
278:              another way, and it seems work. See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html
279:            */
280: #if defined(PETSC_USE_COMPLEX)
281:           PetscScalar *tmp = Jentry2[nz].valaddr;
282:           *tmp             = dy[row] * dx;
283: #else
284:           *(Jentry2[nz].valaddr) = dy[row] * dx;
285: #endif
286:           nz++;
287:         }
288:       } else { /* htype == 'ds' */
289:         for (l = 0; l < nrows_k; l++) {
290:           row = Jentry[nz].row; /* local row index */
291: #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
292:           PetscScalar *tmp = Jentry[nz].valaddr;
293:           *tmp             = dy[row] * vscale_array[Jentry[nz].col];
294: #else
295:           *(Jentry[nz].valaddr)  = dy[row] * vscale_array[Jentry[nz].col];
296: #endif
297:           nz++;
298:         }
299:       }
300:     }
301:   } else { /* bcols == 1 */
302:     for (k = 0; k < ncolors; k++) {
303:       coloring->currentcolor = k;

305:       /*
306:        (3-1) Loop over each column associated with color
307:        adding the perturbation to the vector w3 = x1 + dx.
308:        */
309:       PetscCall(VecCopy(x1, w3));
310:       PetscCall(VecGetArray(w3, &w3_array));
311:       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
312:       if (coloring->htype[0] == 'w') {
313:         for (l = 0; l < ncolumns[k]; l++) {
314:           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
315:           w3_array[col] += 1.0 / dx;
316:         }
317:       } else {                  /* htype == 'ds' */
318:         vscale_array -= cstart; /* shift pointer so global index can be used */
319:         for (l = 0; l < ncolumns[k]; l++) {
320:           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
321:           w3_array[col] += 1.0 / vscale_array[col];
322:         }
323:         vscale_array += cstart;
324:       }
325:       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
326:       PetscCall(VecRestoreArray(w3, &w3_array));

328:       /*
329:        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
330:                            w2 = F(x1 + dx) - F(x1)
331:        */
332:       PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
333:       PetscCall((*f)(sctx, w3, w2, fctx));
334:       PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
335:       PetscCall(VecAXPY(w2, -1.0, w1));

337:       /*
338:        (3-3) Loop over rows of vector, putting results into Jacobian matrix
339:        */
340:       nrows_k = nrows[k];
341:       PetscCall(VecGetArray(w2, &y));
342:       if (coloring->htype[0] == 'w') {
343:         for (l = 0; l < nrows_k; l++) {
344:           row = Jentry2[nz].row; /* local row index */
345: #if defined(PETSC_USE_COMPLEX)   /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
346:           PetscScalar *tmp = Jentry2[nz].valaddr;
347:           *tmp             = y[row] * dx;
348: #else
349:           *(Jentry2[nz].valaddr) = y[row] * dx;
350: #endif
351:           nz++;
352:         }
353:       } else { /* htype == 'ds' */
354:         for (l = 0; l < nrows_k; l++) {
355:           row = Jentry[nz].row; /* local row index */
356: #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
357:           PetscScalar *tmp = Jentry[nz].valaddr;
358:           *tmp             = y[row] * vscale_array[Jentry[nz].col];
359: #else
360:           *(Jentry[nz].valaddr)  = y[row] * vscale_array[Jentry[nz].col];
361: #endif
362:           nz++;
363:         }
364:       }
365:       PetscCall(VecRestoreArray(w2, &y));
366:     }
367:   }

369: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
370:   if (J->offloadmask != PETSC_OFFLOAD_UNALLOCATED) J->offloadmask = PETSC_OFFLOAD_CPU;
371: #endif
372:   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
373:   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
374:   if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));
375:   coloring->currentcolor = -1;
376:   if (!alreadyboundtocpu) PetscCall(VecBindToCPU(x1, PETSC_FALSE));
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }

380: PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
381: {
382:   PetscMPIInt            size, *ncolsonproc, *disp, nn;
383:   PetscInt               i, n, nrows, nrows_i, j, k, m, ncols, col, *rowhit, cstart, cend, colb;
384:   const PetscInt        *is, *A_ci, *A_cj, *B_ci, *B_cj, *row = NULL, *ltog = NULL;
385:   PetscInt               nis = iscoloring->n, nctot, *cols, tmp = 0;
386:   ISLocalToGlobalMapping map   = mat->cmap->mapping;
387:   PetscInt               ctype = c->ctype, *spidxA, *spidxB, nz, bs, bs2, spidx;
388:   Mat                    A, B;
389:   PetscScalar           *A_val, *B_val, **valaddrhit;
390:   MatEntry              *Jentry;
391:   MatEntry2             *Jentry2;
392:   PetscBool              isBAIJ, isSELL;
393:   PetscInt               bcols = c->bcols;
394: #if defined(PETSC_USE_CTABLE)
395:   PetscHMapI colmap = NULL;
396: #else
397:   PetscInt *colmap = NULL;      /* local col number of off-diag col */
398: #endif

400:   PetscFunctionBegin;
401:   if (ctype == IS_COLORING_LOCAL) {
402:     PetscCheck(map, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
403:     PetscCall(ISLocalToGlobalMappingGetIndices(map, &ltog));
404:   }

406:   PetscCall(MatGetBlockSize(mat, &bs));
407:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
408:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
409:   if (isBAIJ) {
410:     Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
411:     Mat_SeqBAIJ *spA, *spB;
412:     A     = baij->A;
413:     spA   = (Mat_SeqBAIJ *)A->data;
414:     A_val = spA->a;
415:     B     = baij->B;
416:     spB   = (Mat_SeqBAIJ *)B->data;
417:     B_val = spB->a;
418:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
419:     if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
420:     colmap = baij->colmap;
421:     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
422:     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));

424:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
425:       PetscInt *garray;
426:       PetscCall(PetscMalloc1(B->cmap->n, &garray));
427:       for (i = 0; i < baij->B->cmap->n / bs; i++) {
428:         for (j = 0; j < bs; j++) garray[i * bs + j] = bs * baij->garray[i] + j;
429:       }
430:       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, garray, &c->vscale));
431:       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
432:       PetscCall(PetscFree(garray));
433:     }
434:   } else if (isSELL) {
435:     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
436:     Mat_SeqSELL *spA, *spB;
437:     A     = sell->A;
438:     spA   = (Mat_SeqSELL *)A->data;
439:     A_val = spA->val;
440:     B     = sell->B;
441:     spB   = (Mat_SeqSELL *)B->data;
442:     B_val = spB->val;
443:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
444:     if (!sell->colmap) {
445:       /* Allow access to data structures of local part of matrix
446:        - creates aij->colmap which maps global column number to local number in part B */
447:       PetscCall(MatCreateColmap_MPISELL_Private(mat));
448:     }
449:     colmap = sell->colmap;
450:     PetscCall(MatGetColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
451:     PetscCall(MatGetColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));

453:     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */

455:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
456:       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, sell->garray, &c->vscale));
457:       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
458:     }
459:   } else {
460:     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
461:     Mat_SeqAIJ *spA, *spB;
462:     A     = aij->A;
463:     spA   = (Mat_SeqAIJ *)A->data;
464:     A_val = spA->a;
465:     B     = aij->B;
466:     spB   = (Mat_SeqAIJ *)B->data;
467:     B_val = spB->a;
468:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
469:     if (!aij->colmap) {
470:       /* Allow access to data structures of local part of matrix
471:        - creates aij->colmap which maps global column number to local number in part B */
472:       PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
473:     }
474:     colmap = aij->colmap;
475:     PetscCall(MatGetColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
476:     PetscCall(MatGetColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));

478:     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */

480:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
481:       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, aij->garray, &c->vscale));
482:       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
483:     }
484:   }

486:   m      = mat->rmap->n / bs;
487:   cstart = mat->cmap->rstart / bs;
488:   cend   = mat->cmap->rend / bs;

490:   PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
491:   PetscCall(PetscMalloc1(nis, &c->nrows));

493:   if (c->htype[0] == 'd') {
494:     PetscCall(PetscMalloc1(nz, &Jentry));
495:     c->matentry = Jentry;
496:   } else if (c->htype[0] == 'w') {
497:     PetscCall(PetscMalloc1(nz, &Jentry2));
498:     c->matentry2 = Jentry2;
499:   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");

501:   PetscCall(PetscMalloc2(m + 1, &rowhit, m + 1, &valaddrhit));
502:   nz = 0;
503:   PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));

505:   if (ctype == IS_COLORING_GLOBAL) {
506:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
507:     PetscCall(PetscMalloc2(size, &ncolsonproc, size, &disp));
508:   }

510:   for (i = 0; i < nis; i++) { /* for each local color */
511:     PetscCall(ISGetLocalSize(c->isa[i], &n));
512:     PetscCall(ISGetIndices(c->isa[i], &is));

514:     c->ncolumns[i] = n; /* local number of columns of this color on this process */
515:     c->columns[i]  = (PetscInt *)is;

517:     if (ctype == IS_COLORING_GLOBAL) {
518:       /* Determine nctot, the total (parallel) number of columns of this color */
519:       /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
520:       PetscCall(PetscMPIIntCast(n, &nn));
521:       PetscCallMPI(MPI_Allgather(&nn, 1, MPI_INT, ncolsonproc, 1, MPI_INT, PetscObjectComm((PetscObject)mat)));
522:       nctot = 0;
523:       for (j = 0; j < size; j++) nctot += ncolsonproc[j];
524:       if (!nctot) PetscCall(PetscInfo(mat, "Coloring of matrix has some unneeded colors with no corresponding rows\n"));

526:       disp[0] = 0;
527:       for (j = 1; j < size; j++) disp[j] = disp[j - 1] + ncolsonproc[j - 1];

529:       /* Get cols, the complete list of columns for this color on each process */
530:       PetscCall(PetscMalloc1(nctot + 1, &cols));
531:       PetscCallMPI(MPI_Allgatherv((void *)is, n, MPIU_INT, cols, ncolsonproc, disp, MPIU_INT, PetscObjectComm((PetscObject)mat)));
532:     } else if (ctype == IS_COLORING_LOCAL) {
533:       /* Determine local number of columns of this color on this process, including ghost points */
534:       nctot = n;
535:       cols  = (PetscInt *)is;
536:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not provided for this MatFDColoring type");

538:     /* Mark all rows affect by these columns */
539:     PetscCall(PetscArrayzero(rowhit, m));
540:     bs2     = bs * bs;
541:     nrows_i = 0;
542:     for (j = 0; j < nctot; j++) { /* loop over columns*/
543:       if (ctype == IS_COLORING_LOCAL) {
544:         col = ltog[cols[j]];
545:       } else {
546:         col = cols[j];
547:       }
548:       if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */
549:         tmp   = A_ci[col - cstart];
550:         row   = A_cj + tmp;
551:         nrows = A_ci[col - cstart + 1] - tmp;
552:         nrows_i += nrows;

554:         /* loop over columns of A marking them in rowhit */
555:         for (k = 0; k < nrows; k++) {
556:           /* set valaddrhit for part A */
557:           spidx            = bs2 * spidxA[tmp + k];
558:           valaddrhit[*row] = &A_val[spidx];
559:           rowhit[*row++]   = col - cstart + 1; /* local column index */
560:         }
561:       } else { /* column is in B, off-diagonal block of mat */
562: #if defined(PETSC_USE_CTABLE)
563:         PetscCall(PetscHMapIGetWithDefault(colmap, col + 1, 0, &colb));
564:         colb--;
565: #else
566:         colb = colmap[col] - 1; /* local column index */
567: #endif
568:         if (colb == -1) {
569:           nrows = 0;
570:         } else {
571:           colb  = colb / bs;
572:           tmp   = B_ci[colb];
573:           row   = B_cj + tmp;
574:           nrows = B_ci[colb + 1] - tmp;
575:         }
576:         nrows_i += nrows;
577:         /* loop over columns of B marking them in rowhit */
578:         for (k = 0; k < nrows; k++) {
579:           /* set valaddrhit for part B */
580:           spidx            = bs2 * spidxB[tmp + k];
581:           valaddrhit[*row] = &B_val[spidx];
582:           rowhit[*row++]   = colb + 1 + cend - cstart; /* local column index */
583:         }
584:       }
585:     }
586:     c->nrows[i] = nrows_i;

588:     if (c->htype[0] == 'd') {
589:       for (j = 0; j < m; j++) {
590:         if (rowhit[j]) {
591:           Jentry[nz].row     = j;             /* local row index */
592:           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
593:           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
594:           nz++;
595:         }
596:       }
597:     } else { /* c->htype == 'wp' */
598:       for (j = 0; j < m; j++) {
599:         if (rowhit[j]) {
600:           Jentry2[nz].row     = j;             /* local row index */
601:           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
602:           nz++;
603:         }
604:       }
605:     }
606:     if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree(cols));
607:   }
608:   if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree2(ncolsonproc, disp));

610:   if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
611:     PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
612:   }

614:   if (isBAIJ) {
615:     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
616:     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
617:     PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
618:   } else if (isSELL) {
619:     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
620:     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
621:   } else {
622:     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
623:     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
624:   }

626:   PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));
627:   PetscCall(PetscFree2(rowhit, valaddrhit));

629:   if (ctype == IS_COLORING_LOCAL) PetscCall(ISLocalToGlobalMappingRestoreIndices(map, &ltog));
630:   PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
631:   PetscFunctionReturn(PETSC_SUCCESS);
632: }

634: PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
635: {
636:   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
637:   PetscBool isBAIJ, isSELL;

639:   PetscFunctionBegin;
640:   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian;
641:    bcols is chosen s.t. dy-array takes 50% of memory space as mat */
642:   PetscCall(MatGetBlockSize(mat, &bs));
643:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
644:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
645:   if (isBAIJ || m == 0) {
646:     c->brows = m;
647:     c->bcols = 1;
648:   } else if (isSELL) {
649:     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
650:     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
651:     Mat_SeqSELL *spA, *spB;
652:     Mat          A, B;
653:     PetscInt     nz, brows, bcols;
654:     PetscReal    mem;

656:     bs = 1; /* only bs=1 is supported for MPISELL matrix */

658:     A     = sell->A;
659:     spA   = (Mat_SeqSELL *)A->data;
660:     B     = sell->B;
661:     spB   = (Mat_SeqSELL *)B->data;
662:     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
663:     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
664:     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
665:     brows = 1000 / bcols;
666:     if (bcols > nis) bcols = nis;
667:     if (brows == 0 || brows > m) brows = m;
668:     c->brows = brows;
669:     c->bcols = bcols;
670:   } else { /* mpiaij matrix */
671:     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
672:     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
673:     Mat_SeqAIJ *spA, *spB;
674:     Mat         A, B;
675:     PetscInt    nz, brows, bcols;
676:     PetscReal   mem;

678:     bs = 1; /* only bs=1 is supported for MPIAIJ matrix */

680:     A     = aij->A;
681:     spA   = (Mat_SeqAIJ *)A->data;
682:     B     = aij->B;
683:     spB   = (Mat_SeqAIJ *)B->data;
684:     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
685:     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
686:     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
687:     brows = 1000 / bcols;
688:     if (bcols > nis) bcols = nis;
689:     if (brows == 0 || brows > m) brows = m;
690:     c->brows = brows;
691:     c->bcols = bcols;
692:   }

694:   c->M       = mat->rmap->N / bs; /* set the global rows and columns and local rows */
695:   c->N       = mat->cmap->N / bs;
696:   c->m       = mat->rmap->n / bs;
697:   c->rstart  = mat->rmap->rstart / bs;
698:   c->ncolors = nis;
699:   PetscFunctionReturn(PETSC_SUCCESS);
700: }

702: /*@C

704:     MatFDColoringSetValues - takes a matrix in compressed color format and enters the matrix into a PETSc `Mat`

706:    Collective

708:    Input Parameters:
709: +    J - the sparse matrix
710: .    coloring - created with `MatFDColoringCreate()` and a local coloring
711: -    y - column major storage of matrix values with one color of values per column, the number of rows of y should match
712:          the number of local rows of `J` and the number of columns is the number of colors.

714:    Level: intermediate

716:    Notes:
717:    The matrix in compressed color format may come from an automatic differentiation code

719:    The code will be slightly faster if `MatFDColoringSetBlockSize`(coloring,`PETSC_DEFAULT`,nc); is called immediately after creating the coloring

721: .seealso: [](chapter_matrices), `Mat`, `MatFDColoringCreate()`, `ISColoring`, `ISColoringCreate()`, `ISColoringSetType()`, `IS_COLORING_LOCAL`, `MatFDColoringSetBlockSize()`
722: @*/
723: PetscErrorCode MatFDColoringSetValues(Mat J, MatFDColoring coloring, const PetscScalar *y)
724: {
725:   MatEntry2      *Jentry2;
726:   PetscInt        row, i, nrows_k, l, ncolors, nz = 0, bcols, nbcols = 0;
727:   const PetscInt *nrows;
728:   PetscBool       eq;

730:   PetscFunctionBegin;
733:   PetscCall(PetscObjectCompareId((PetscObject)J, coloring->matid, &eq));
734:   PetscCheck(eq, PetscObjectComm((PetscObject)J), PETSC_ERR_ARG_WRONG, "Matrix used with MatFDColoringSetValues() must be that used with MatFDColoringCreate()");
735:   Jentry2 = coloring->matentry2;
736:   nrows   = coloring->nrows;
737:   ncolors = coloring->ncolors;
738:   bcols   = coloring->bcols;

740:   for (i = 0; i < ncolors; i += bcols) {
741:     nrows_k = nrows[nbcols++];
742:     for (l = 0; l < nrows_k; l++) {
743:       row                      = Jentry2[nz].row; /* local row index */
744:       *(Jentry2[nz++].valaddr) = y[row];
745:     }
746:     y += bcols * coloring->m;
747:   }
748:   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
749:   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
750:   PetscFunctionReturn(PETSC_SUCCESS);
751: }