Actual source code: fdmpiaij.c
1: #include <../src/mat/impls/sell/mpi/mpisell.h>
2: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3: #include <../src/mat/impls/baij/mpi/mpibaij.h>
4: #include <petsc/private/isimpl.h>
6: PetscErrorCode MatFDColoringApply_BAIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
7: {
8: PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
9: PetscInt k, cstart, cend, l, row, col, nz, spidx, i, j;
10: PetscScalar dx = 0.0, *w3_array, *dy_i, *dy = coloring->dy;
11: PetscScalar *vscale_array;
12: const PetscScalar *xx;
13: PetscReal epsilon = coloring->error_rel, umin = coloring->umin, unorm;
14: Vec w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
15: void *fctx = coloring->fctx;
16: PetscInt ctype = coloring->ctype, nxloc, nrows_k;
17: PetscScalar *valaddr;
18: MatEntry *Jentry = coloring->matentry;
19: MatEntry2 *Jentry2 = coloring->matentry2;
20: const PetscInt ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
21: PetscInt bs = J->rmap->bs;
23: PetscFunctionBegin;
24: PetscCall(VecBindToCPU(x1, PETSC_TRUE));
25: /* (1) Set w1 = F(x1) */
26: if (!coloring->fset) {
27: PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, coloring, 0, 0, 0));
28: PetscCall((*f)(sctx, x1, w1, fctx));
29: PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, coloring, 0, 0, 0));
30: } else {
31: coloring->fset = PETSC_FALSE;
32: }
34: /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
35: PetscCall(VecGetLocalSize(x1, &nxloc));
36: if (coloring->htype[0] == 'w') {
37: /* vscale = dx is a constant scalar */
38: PetscCall(VecNorm(x1, NORM_2, &unorm));
39: dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
40: } else {
41: PetscCall(VecGetArrayRead(x1, &xx));
42: PetscCall(VecGetArray(vscale, &vscale_array));
43: for (col = 0; col < nxloc; col++) {
44: dx = xx[col];
45: if (PetscAbsScalar(dx) < umin) {
46: if (PetscRealPart(dx) >= 0.0) dx = umin;
47: else if (PetscRealPart(dx) < 0.0) dx = -umin;
48: }
49: dx *= epsilon;
50: vscale_array[col] = 1.0 / dx;
51: }
52: PetscCall(VecRestoreArrayRead(x1, &xx));
53: PetscCall(VecRestoreArray(vscale, &vscale_array));
54: }
55: if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
56: PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
57: PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
58: }
60: /* (3) Loop over each color */
61: if (!coloring->w3) {
62: PetscCall(VecDuplicate(x1, &coloring->w3));
63: /* Vec is used intensively in particular piece of scalar CPU code; won't benefit from bouncing back and forth to the GPU */
64: PetscCall(VecBindToCPU(coloring->w3, PETSC_TRUE));
65: }
66: w3 = coloring->w3;
68: PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
69: if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
70: nz = 0;
71: for (k = 0; k < ncolors; k++) {
72: coloring->currentcolor = k;
74: /*
75: (3-1) Loop over each column associated with color
76: adding the perturbation to the vector w3 = x1 + dx.
77: */
78: PetscCall(VecCopy(x1, w3));
79: dy_i = dy;
80: for (i = 0; i < bs; i++) { /* Loop over a block of columns */
81: PetscCall(VecGetArray(w3, &w3_array));
82: if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
83: if (coloring->htype[0] == 'w') {
84: for (l = 0; l < ncolumns[k]; l++) {
85: col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
86: w3_array[col] += 1.0 / dx;
87: if (i) w3_array[col - 1] -= 1.0 / dx; /* resume original w3[col-1] */
88: }
89: } else { /* htype == 'ds' */
90: vscale_array -= cstart; /* shift pointer so global index can be used */
91: for (l = 0; l < ncolumns[k]; l++) {
92: col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
93: w3_array[col] += 1.0 / vscale_array[col];
94: if (i) w3_array[col - 1] -= 1.0 / vscale_array[col - 1]; /* resume original w3[col-1] */
95: }
96: vscale_array += cstart;
97: }
98: if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
99: PetscCall(VecRestoreArray(w3, &w3_array));
101: /*
102: (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
103: w2 = F(x1 + dx) - F(x1)
104: */
105: PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
106: PetscCall(VecPlaceArray(w2, dy_i)); /* place w2 to the array dy_i */
107: PetscCall((*f)(sctx, w3, w2, fctx));
108: PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
109: PetscCall(VecAXPY(w2, -1.0, w1));
110: PetscCall(VecResetArray(w2));
111: dy_i += nxloc; /* points to dy+i*nxloc */
112: }
114: /*
115: (3-3) Loop over rows of vector, putting results into Jacobian matrix
116: */
117: nrows_k = nrows[k];
118: if (coloring->htype[0] == 'w') {
119: for (l = 0; l < nrows_k; l++) {
120: row = bs * Jentry2[nz].row; /* local row index */
121: valaddr = Jentry2[nz++].valaddr;
122: spidx = 0;
123: dy_i = dy;
124: for (i = 0; i < bs; i++) { /* column of the block */
125: for (j = 0; j < bs; j++) { /* row of the block */
126: valaddr[spidx++] = dy_i[row + j] * dx;
127: }
128: dy_i += nxloc; /* points to dy+i*nxloc */
129: }
130: }
131: } else { /* htype == 'ds' */
132: for (l = 0; l < nrows_k; l++) {
133: row = bs * Jentry[nz].row; /* local row index */
134: col = bs * Jentry[nz].col; /* local column index */
135: valaddr = Jentry[nz++].valaddr;
136: spidx = 0;
137: dy_i = dy;
138: for (i = 0; i < bs; i++) { /* column of the block */
139: for (j = 0; j < bs; j++) { /* row of the block */
140: valaddr[spidx++] = dy_i[row + j] * vscale_array[col + i];
141: }
142: dy_i += nxloc; /* points to dy+i*nxloc */
143: }
144: }
145: }
146: }
147: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
148: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
149: if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));
151: coloring->currentcolor = -1;
152: PetscCall(VecBindToCPU(x1, PETSC_FALSE));
153: PetscFunctionReturn(PETSC_SUCCESS);
154: }
156: /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */
157: PetscErrorCode MatFDColoringApply_AIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
158: {
159: PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
160: PetscInt k, cstart, cend, l, row, col, nz;
161: PetscScalar dx = 0.0, *y, *w3_array;
162: const PetscScalar *xx;
163: PetscScalar *vscale_array;
164: PetscReal epsilon = coloring->error_rel, umin = coloring->umin, unorm;
165: Vec w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
166: void *fctx = coloring->fctx;
167: ISColoringType ctype = coloring->ctype;
168: PetscInt nxloc, nrows_k;
169: MatEntry *Jentry = coloring->matentry;
170: MatEntry2 *Jentry2 = coloring->matentry2;
171: const PetscInt ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
172: PetscBool alreadyboundtocpu;
174: PetscFunctionBegin;
175: PetscCall(VecBoundToCPU(x1, &alreadyboundtocpu));
176: PetscCall(VecBindToCPU(x1, PETSC_TRUE));
177: PetscCheck(!(ctype == IS_COLORING_LOCAL) || !(J->ops->fdcoloringapply == MatFDColoringApply_AIJ), PetscObjectComm((PetscObject)J), PETSC_ERR_SUP, "Must call MatColoringUseDM() with IS_COLORING_LOCAL");
178: /* (1) Set w1 = F(x1) */
179: if (!coloring->fset) {
180: PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
181: PetscCall((*f)(sctx, x1, w1, fctx));
182: PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
183: } else {
184: coloring->fset = PETSC_FALSE;
185: }
187: /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
188: if (coloring->htype[0] == 'w') {
189: /* vscale = 1./dx is a constant scalar */
190: PetscCall(VecNorm(x1, NORM_2, &unorm));
191: dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
192: } else {
193: PetscCall(VecGetLocalSize(x1, &nxloc));
194: PetscCall(VecGetArrayRead(x1, &xx));
195: PetscCall(VecGetArray(vscale, &vscale_array));
196: for (col = 0; col < nxloc; col++) {
197: dx = xx[col];
198: if (PetscAbsScalar(dx) < umin) {
199: if (PetscRealPart(dx) >= 0.0) dx = umin;
200: else if (PetscRealPart(dx) < 0.0) dx = -umin;
201: }
202: dx *= epsilon;
203: vscale_array[col] = 1.0 / dx;
204: }
205: PetscCall(VecRestoreArrayRead(x1, &xx));
206: PetscCall(VecRestoreArray(vscale, &vscale_array));
207: }
208: if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
209: PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
210: PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
211: }
213: /* (3) Loop over each color */
214: if (!coloring->w3) PetscCall(VecDuplicate(x1, &coloring->w3));
215: w3 = coloring->w3;
217: PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
218: if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
219: nz = 0;
221: if (coloring->bcols > 1) { /* use blocked insertion of Jentry */
222: PetscInt i, m = J->rmap->n, nbcols, bcols = coloring->bcols;
223: PetscScalar *dy = coloring->dy, *dy_k;
225: nbcols = 0;
226: for (k = 0; k < ncolors; k += bcols) {
227: /*
228: (3-1) Loop over each column associated with color
229: adding the perturbation to the vector w3 = x1 + dx.
230: */
232: dy_k = dy;
233: if (k + bcols > ncolors) bcols = ncolors - k;
234: for (i = 0; i < bcols; i++) {
235: coloring->currentcolor = k + i;
237: PetscCall(VecCopy(x1, w3));
238: PetscCall(VecGetArray(w3, &w3_array));
239: if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
240: if (coloring->htype[0] == 'w') {
241: for (l = 0; l < ncolumns[k + i]; l++) {
242: col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
243: w3_array[col] += 1.0 / dx;
244: }
245: } else { /* htype == 'ds' */
246: vscale_array -= cstart; /* shift pointer so global index can be used */
247: for (l = 0; l < ncolumns[k + i]; l++) {
248: col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
249: w3_array[col] += 1.0 / vscale_array[col];
250: }
251: vscale_array += cstart;
252: }
253: if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
254: PetscCall(VecRestoreArray(w3, &w3_array));
256: /*
257: (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
258: w2 = F(x1 + dx) - F(x1)
259: */
260: PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
261: PetscCall(VecPlaceArray(w2, dy_k)); /* place w2 to the array dy_i */
262: PetscCall((*f)(sctx, w3, w2, fctx));
263: PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
264: PetscCall(VecAXPY(w2, -1.0, w1));
265: PetscCall(VecResetArray(w2));
266: dy_k += m; /* points to dy+i*nxloc */
267: }
269: /*
270: (3-3) Loop over block rows of vector, putting results into Jacobian matrix
271: */
272: nrows_k = nrows[nbcols++];
274: if (coloring->htype[0] == 'w') {
275: for (l = 0; l < nrows_k; l++) {
276: row = Jentry2[nz].row; /* local row index */
277: /* The 'useless' ifdef is due to a bug in NVIDIA nvc 21.11, which triggers a segfault on this line. We write it in
278: another way, and it seems work. See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html
279: */
280: #if defined(PETSC_USE_COMPLEX)
281: PetscScalar *tmp = Jentry2[nz].valaddr;
282: *tmp = dy[row] * dx;
283: #else
284: *(Jentry2[nz].valaddr) = dy[row] * dx;
285: #endif
286: nz++;
287: }
288: } else { /* htype == 'ds' */
289: for (l = 0; l < nrows_k; l++) {
290: row = Jentry[nz].row; /* local row index */
291: #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
292: PetscScalar *tmp = Jentry[nz].valaddr;
293: *tmp = dy[row] * vscale_array[Jentry[nz].col];
294: #else
295: *(Jentry[nz].valaddr) = dy[row] * vscale_array[Jentry[nz].col];
296: #endif
297: nz++;
298: }
299: }
300: }
301: } else { /* bcols == 1 */
302: for (k = 0; k < ncolors; k++) {
303: coloring->currentcolor = k;
305: /*
306: (3-1) Loop over each column associated with color
307: adding the perturbation to the vector w3 = x1 + dx.
308: */
309: PetscCall(VecCopy(x1, w3));
310: PetscCall(VecGetArray(w3, &w3_array));
311: if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
312: if (coloring->htype[0] == 'w') {
313: for (l = 0; l < ncolumns[k]; l++) {
314: col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
315: w3_array[col] += 1.0 / dx;
316: }
317: } else { /* htype == 'ds' */
318: vscale_array -= cstart; /* shift pointer so global index can be used */
319: for (l = 0; l < ncolumns[k]; l++) {
320: col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
321: w3_array[col] += 1.0 / vscale_array[col];
322: }
323: vscale_array += cstart;
324: }
325: if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
326: PetscCall(VecRestoreArray(w3, &w3_array));
328: /*
329: (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
330: w2 = F(x1 + dx) - F(x1)
331: */
332: PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
333: PetscCall((*f)(sctx, w3, w2, fctx));
334: PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
335: PetscCall(VecAXPY(w2, -1.0, w1));
337: /*
338: (3-3) Loop over rows of vector, putting results into Jacobian matrix
339: */
340: nrows_k = nrows[k];
341: PetscCall(VecGetArray(w2, &y));
342: if (coloring->htype[0] == 'w') {
343: for (l = 0; l < nrows_k; l++) {
344: row = Jentry2[nz].row; /* local row index */
345: #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
346: PetscScalar *tmp = Jentry2[nz].valaddr;
347: *tmp = y[row] * dx;
348: #else
349: *(Jentry2[nz].valaddr) = y[row] * dx;
350: #endif
351: nz++;
352: }
353: } else { /* htype == 'ds' */
354: for (l = 0; l < nrows_k; l++) {
355: row = Jentry[nz].row; /* local row index */
356: #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
357: PetscScalar *tmp = Jentry[nz].valaddr;
358: *tmp = y[row] * vscale_array[Jentry[nz].col];
359: #else
360: *(Jentry[nz].valaddr) = y[row] * vscale_array[Jentry[nz].col];
361: #endif
362: nz++;
363: }
364: }
365: PetscCall(VecRestoreArray(w2, &y));
366: }
367: }
369: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
370: if (J->offloadmask != PETSC_OFFLOAD_UNALLOCATED) J->offloadmask = PETSC_OFFLOAD_CPU;
371: #endif
372: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
373: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
374: if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));
375: coloring->currentcolor = -1;
376: if (!alreadyboundtocpu) PetscCall(VecBindToCPU(x1, PETSC_FALSE));
377: PetscFunctionReturn(PETSC_SUCCESS);
378: }
380: PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
381: {
382: PetscMPIInt size, *ncolsonproc, *disp, nn;
383: PetscInt i, n, nrows, nrows_i, j, k, m, ncols, col, *rowhit, cstart, cend, colb;
384: const PetscInt *is, *A_ci, *A_cj, *B_ci, *B_cj, *row = NULL, *ltog = NULL;
385: PetscInt nis = iscoloring->n, nctot, *cols, tmp = 0;
386: ISLocalToGlobalMapping map = mat->cmap->mapping;
387: PetscInt ctype = c->ctype, *spidxA, *spidxB, nz, bs, bs2, spidx;
388: Mat A, B;
389: PetscScalar *A_val, *B_val, **valaddrhit;
390: MatEntry *Jentry;
391: MatEntry2 *Jentry2;
392: PetscBool isBAIJ, isSELL;
393: PetscInt bcols = c->bcols;
394: #if defined(PETSC_USE_CTABLE)
395: PetscHMapI colmap = NULL;
396: #else
397: PetscInt *colmap = NULL; /* local col number of off-diag col */
398: #endif
400: PetscFunctionBegin;
401: if (ctype == IS_COLORING_LOCAL) {
402: PetscCheck(map, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
403: PetscCall(ISLocalToGlobalMappingGetIndices(map, <og));
404: }
406: PetscCall(MatGetBlockSize(mat, &bs));
407: PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
408: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
409: if (isBAIJ) {
410: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
411: Mat_SeqBAIJ *spA, *spB;
412: A = baij->A;
413: spA = (Mat_SeqBAIJ *)A->data;
414: A_val = spA->a;
415: B = baij->B;
416: spB = (Mat_SeqBAIJ *)B->data;
417: B_val = spB->a;
418: nz = spA->nz + spB->nz; /* total nonzero entries of mat */
419: if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
420: colmap = baij->colmap;
421: PetscCall(MatGetColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
422: PetscCall(MatGetColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
424: if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
425: PetscInt *garray;
426: PetscCall(PetscMalloc1(B->cmap->n, &garray));
427: for (i = 0; i < baij->B->cmap->n / bs; i++) {
428: for (j = 0; j < bs; j++) garray[i * bs + j] = bs * baij->garray[i] + j;
429: }
430: PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, garray, &c->vscale));
431: PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
432: PetscCall(PetscFree(garray));
433: }
434: } else if (isSELL) {
435: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
436: Mat_SeqSELL *spA, *spB;
437: A = sell->A;
438: spA = (Mat_SeqSELL *)A->data;
439: A_val = spA->val;
440: B = sell->B;
441: spB = (Mat_SeqSELL *)B->data;
442: B_val = spB->val;
443: nz = spA->nz + spB->nz; /* total nonzero entries of mat */
444: if (!sell->colmap) {
445: /* Allow access to data structures of local part of matrix
446: - creates aij->colmap which maps global column number to local number in part B */
447: PetscCall(MatCreateColmap_MPISELL_Private(mat));
448: }
449: colmap = sell->colmap;
450: PetscCall(MatGetColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
451: PetscCall(MatGetColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
453: bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
455: if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
456: PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, sell->garray, &c->vscale));
457: PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
458: }
459: } else {
460: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
461: Mat_SeqAIJ *spA, *spB;
462: A = aij->A;
463: spA = (Mat_SeqAIJ *)A->data;
464: A_val = spA->a;
465: B = aij->B;
466: spB = (Mat_SeqAIJ *)B->data;
467: B_val = spB->a;
468: nz = spA->nz + spB->nz; /* total nonzero entries of mat */
469: if (!aij->colmap) {
470: /* Allow access to data structures of local part of matrix
471: - creates aij->colmap which maps global column number to local number in part B */
472: PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
473: }
474: colmap = aij->colmap;
475: PetscCall(MatGetColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
476: PetscCall(MatGetColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
478: bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */
480: if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
481: PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, aij->garray, &c->vscale));
482: PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
483: }
484: }
486: m = mat->rmap->n / bs;
487: cstart = mat->cmap->rstart / bs;
488: cend = mat->cmap->rend / bs;
490: PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
491: PetscCall(PetscMalloc1(nis, &c->nrows));
493: if (c->htype[0] == 'd') {
494: PetscCall(PetscMalloc1(nz, &Jentry));
495: c->matentry = Jentry;
496: } else if (c->htype[0] == 'w') {
497: PetscCall(PetscMalloc1(nz, &Jentry2));
498: c->matentry2 = Jentry2;
499: } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");
501: PetscCall(PetscMalloc2(m + 1, &rowhit, m + 1, &valaddrhit));
502: nz = 0;
503: PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));
505: if (ctype == IS_COLORING_GLOBAL) {
506: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
507: PetscCall(PetscMalloc2(size, &ncolsonproc, size, &disp));
508: }
510: for (i = 0; i < nis; i++) { /* for each local color */
511: PetscCall(ISGetLocalSize(c->isa[i], &n));
512: PetscCall(ISGetIndices(c->isa[i], &is));
514: c->ncolumns[i] = n; /* local number of columns of this color on this process */
515: c->columns[i] = (PetscInt *)is;
517: if (ctype == IS_COLORING_GLOBAL) {
518: /* Determine nctot, the total (parallel) number of columns of this color */
519: /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
520: PetscCall(PetscMPIIntCast(n, &nn));
521: PetscCallMPI(MPI_Allgather(&nn, 1, MPI_INT, ncolsonproc, 1, MPI_INT, PetscObjectComm((PetscObject)mat)));
522: nctot = 0;
523: for (j = 0; j < size; j++) nctot += ncolsonproc[j];
524: if (!nctot) PetscCall(PetscInfo(mat, "Coloring of matrix has some unneeded colors with no corresponding rows\n"));
526: disp[0] = 0;
527: for (j = 1; j < size; j++) disp[j] = disp[j - 1] + ncolsonproc[j - 1];
529: /* Get cols, the complete list of columns for this color on each process */
530: PetscCall(PetscMalloc1(nctot + 1, &cols));
531: PetscCallMPI(MPI_Allgatherv((void *)is, n, MPIU_INT, cols, ncolsonproc, disp, MPIU_INT, PetscObjectComm((PetscObject)mat)));
532: } else if (ctype == IS_COLORING_LOCAL) {
533: /* Determine local number of columns of this color on this process, including ghost points */
534: nctot = n;
535: cols = (PetscInt *)is;
536: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not provided for this MatFDColoring type");
538: /* Mark all rows affect by these columns */
539: PetscCall(PetscArrayzero(rowhit, m));
540: bs2 = bs * bs;
541: nrows_i = 0;
542: for (j = 0; j < nctot; j++) { /* loop over columns*/
543: if (ctype == IS_COLORING_LOCAL) {
544: col = ltog[cols[j]];
545: } else {
546: col = cols[j];
547: }
548: if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */
549: tmp = A_ci[col - cstart];
550: row = A_cj + tmp;
551: nrows = A_ci[col - cstart + 1] - tmp;
552: nrows_i += nrows;
554: /* loop over columns of A marking them in rowhit */
555: for (k = 0; k < nrows; k++) {
556: /* set valaddrhit for part A */
557: spidx = bs2 * spidxA[tmp + k];
558: valaddrhit[*row] = &A_val[spidx];
559: rowhit[*row++] = col - cstart + 1; /* local column index */
560: }
561: } else { /* column is in B, off-diagonal block of mat */
562: #if defined(PETSC_USE_CTABLE)
563: PetscCall(PetscHMapIGetWithDefault(colmap, col + 1, 0, &colb));
564: colb--;
565: #else
566: colb = colmap[col] - 1; /* local column index */
567: #endif
568: if (colb == -1) {
569: nrows = 0;
570: } else {
571: colb = colb / bs;
572: tmp = B_ci[colb];
573: row = B_cj + tmp;
574: nrows = B_ci[colb + 1] - tmp;
575: }
576: nrows_i += nrows;
577: /* loop over columns of B marking them in rowhit */
578: for (k = 0; k < nrows; k++) {
579: /* set valaddrhit for part B */
580: spidx = bs2 * spidxB[tmp + k];
581: valaddrhit[*row] = &B_val[spidx];
582: rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */
583: }
584: }
585: }
586: c->nrows[i] = nrows_i;
588: if (c->htype[0] == 'd') {
589: for (j = 0; j < m; j++) {
590: if (rowhit[j]) {
591: Jentry[nz].row = j; /* local row index */
592: Jentry[nz].col = rowhit[j] - 1; /* local column index */
593: Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
594: nz++;
595: }
596: }
597: } else { /* c->htype == 'wp' */
598: for (j = 0; j < m; j++) {
599: if (rowhit[j]) {
600: Jentry2[nz].row = j; /* local row index */
601: Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
602: nz++;
603: }
604: }
605: }
606: if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree(cols));
607: }
608: if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree2(ncolsonproc, disp));
610: if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
611: PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
612: }
614: if (isBAIJ) {
615: PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
616: PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
617: PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
618: } else if (isSELL) {
619: PetscCall(MatRestoreColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
620: PetscCall(MatRestoreColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
621: } else {
622: PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
623: PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
624: }
626: PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));
627: PetscCall(PetscFree2(rowhit, valaddrhit));
629: if (ctype == IS_COLORING_LOCAL) PetscCall(ISLocalToGlobalMappingRestoreIndices(map, <og));
630: PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
631: PetscFunctionReturn(PETSC_SUCCESS);
632: }
634: PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
635: {
636: PetscInt bs, nis = iscoloring->n, m = mat->rmap->n;
637: PetscBool isBAIJ, isSELL;
639: PetscFunctionBegin;
640: /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian;
641: bcols is chosen s.t. dy-array takes 50% of memory space as mat */
642: PetscCall(MatGetBlockSize(mat, &bs));
643: PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
644: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
645: if (isBAIJ || m == 0) {
646: c->brows = m;
647: c->bcols = 1;
648: } else if (isSELL) {
649: /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
650: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
651: Mat_SeqSELL *spA, *spB;
652: Mat A, B;
653: PetscInt nz, brows, bcols;
654: PetscReal mem;
656: bs = 1; /* only bs=1 is supported for MPISELL matrix */
658: A = sell->A;
659: spA = (Mat_SeqSELL *)A->data;
660: B = sell->B;
661: spB = (Mat_SeqSELL *)B->data;
662: nz = spA->nz + spB->nz; /* total local nonzero entries of mat */
663: mem = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
664: bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
665: brows = 1000 / bcols;
666: if (bcols > nis) bcols = nis;
667: if (brows == 0 || brows > m) brows = m;
668: c->brows = brows;
669: c->bcols = bcols;
670: } else { /* mpiaij matrix */
671: /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
672: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
673: Mat_SeqAIJ *spA, *spB;
674: Mat A, B;
675: PetscInt nz, brows, bcols;
676: PetscReal mem;
678: bs = 1; /* only bs=1 is supported for MPIAIJ matrix */
680: A = aij->A;
681: spA = (Mat_SeqAIJ *)A->data;
682: B = aij->B;
683: spB = (Mat_SeqAIJ *)B->data;
684: nz = spA->nz + spB->nz; /* total local nonzero entries of mat */
685: mem = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
686: bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
687: brows = 1000 / bcols;
688: if (bcols > nis) bcols = nis;
689: if (brows == 0 || brows > m) brows = m;
690: c->brows = brows;
691: c->bcols = bcols;
692: }
694: c->M = mat->rmap->N / bs; /* set the global rows and columns and local rows */
695: c->N = mat->cmap->N / bs;
696: c->m = mat->rmap->n / bs;
697: c->rstart = mat->rmap->rstart / bs;
698: c->ncolors = nis;
699: PetscFunctionReturn(PETSC_SUCCESS);
700: }
702: /*@C
704: MatFDColoringSetValues - takes a matrix in compressed color format and enters the matrix into a PETSc `Mat`
706: Collective
708: Input Parameters:
709: + J - the sparse matrix
710: . coloring - created with `MatFDColoringCreate()` and a local coloring
711: - y - column major storage of matrix values with one color of values per column, the number of rows of y should match
712: the number of local rows of `J` and the number of columns is the number of colors.
714: Level: intermediate
716: Notes:
717: The matrix in compressed color format may come from an automatic differentiation code
719: The code will be slightly faster if `MatFDColoringSetBlockSize`(coloring,`PETSC_DEFAULT`,nc); is called immediately after creating the coloring
721: .seealso: [](chapter_matrices), `Mat`, `MatFDColoringCreate()`, `ISColoring`, `ISColoringCreate()`, `ISColoringSetType()`, `IS_COLORING_LOCAL`, `MatFDColoringSetBlockSize()`
722: @*/
723: PetscErrorCode MatFDColoringSetValues(Mat J, MatFDColoring coloring, const PetscScalar *y)
724: {
725: MatEntry2 *Jentry2;
726: PetscInt row, i, nrows_k, l, ncolors, nz = 0, bcols, nbcols = 0;
727: const PetscInt *nrows;
728: PetscBool eq;
730: PetscFunctionBegin;
733: PetscCall(PetscObjectCompareId((PetscObject)J, coloring->matid, &eq));
734: PetscCheck(eq, PetscObjectComm((PetscObject)J), PETSC_ERR_ARG_WRONG, "Matrix used with MatFDColoringSetValues() must be that used with MatFDColoringCreate()");
735: Jentry2 = coloring->matentry2;
736: nrows = coloring->nrows;
737: ncolors = coloring->ncolors;
738: bcols = coloring->bcols;
740: for (i = 0; i < ncolors; i += bcols) {
741: nrows_k = nrows[nbcols++];
742: for (l = 0; l < nrows_k; l++) {
743: row = Jentry2[nz].row; /* local row index */
744: *(Jentry2[nz++].valaddr) = y[row];
745: }
746: y += bcols * coloring->m;
747: }
748: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
749: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
750: PetscFunctionReturn(PETSC_SUCCESS);
751: }