Actual source code: fdda.c
2: #include <petsc/private/dmdaimpl.h>
3: #include <petscmat.h>
4: #include <petscbt.h>
6: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
11: /*
12: For ghost i that may be negative or greater than the upper bound this
13: maps it into the 0:m-1 range using periodicity
14: */
15: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
17: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18: {
19: PetscInt i, j, nz, *fill;
21: PetscFunctionBegin;
22: if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
24: /* count number nonzeros */
25: nz = 0;
26: for (i = 0; i < w; i++) {
27: for (j = 0; j < w; j++) {
28: if (dfill[w * i + j]) nz++;
29: }
30: }
31: PetscCall(PetscMalloc1(nz + w + 1, &fill));
32: /* construct modified CSR storage of nonzero structure */
33: /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
34: so fill[1] - fill[0] gives number of nonzeros in first row etc */
35: nz = w + 1;
36: for (i = 0; i < w; i++) {
37: fill[i] = nz;
38: for (j = 0; j < w; j++) {
39: if (dfill[w * i + j]) {
40: fill[nz] = j;
41: nz++;
42: }
43: }
44: }
45: fill[w] = nz;
47: *rfill = fill;
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52: {
53: PetscInt nz;
55: PetscFunctionBegin;
56: if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
58: /* Determine number of non-zeros */
59: nz = (dfillsparse[w] - w - 1);
61: /* Allocate space for our copy of the given sparse matrix representation. */
62: PetscCall(PetscMalloc1(nz + w + 1, rfill));
63: PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64: PetscFunctionReturn(PETSC_SUCCESS);
65: }
67: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68: {
69: PetscInt i, k, cnt = 1;
71: PetscFunctionBegin;
73: /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74: columns to the left with any nonzeros in them plus 1 */
75: PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76: for (i = 0; i < dd->w; i++) {
77: for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78: }
79: for (i = 0; i < dd->w; i++) {
80: if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81: }
82: PetscFunctionReturn(PETSC_SUCCESS);
83: }
85: /*@
86: DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87: of the matrix returned by `DMCreateMatrix()`.
89: Logically Collective on da
91: Input Parameters:
92: + da - the distributed array
93: . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94: - ofill - the fill pattern in the off-diagonal blocks
96: Level: developer
98: Notes:
99: This only makes sense when you are doing multicomponent problems but using the
100: `MATMPIAIJ` matrix format
102: The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103: representing coupling and 0 entries for missing coupling. For example
104: .vb
105: dfill[9] = {1, 0, 0,
106: 1, 1, 0,
107: 0, 1, 1}
108: .ve
109: means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110: itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111: diagonal block).
113: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114: can be represented in the dfill, ofill format
116: Contributed by Glenn Hammond
118: .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119: @*/
120: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121: {
122: DM_DA *dd = (DM_DA *)da->data;
124: PetscFunctionBegin;
125: /* save the given dfill and ofill information */
126: PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127: PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
129: /* count nonzeros in ofill columns */
130: PetscCall(DMDASetBlockFills_Private2(dd));
131: PetscFunctionReturn(PETSC_SUCCESS);
132: }
134: /*@
135: DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136: of the matrix returned by `DMCreateMatrix()`, using sparse representations
137: of fill patterns.
139: Logically Collective on da
141: Input Parameters:
142: + da - the distributed array
143: . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
144: - ofill - the sparse fill pattern in the off-diagonal blocks
146: Level: developer
148: Notes:
149: This only makes sense when you are doing multicomponent problems but using the
150: `MATMPIAIJ` matrix format
152: The format for dfill and ofill is a sparse representation of a
153: dof-by-dof matrix with 1 entries representing coupling and 0 entries
154: for missing coupling. The sparse representation is a 1 dimensional
155: array of length nz + dof + 1, where nz is the number of non-zeros in
156: the matrix. The first dof entries in the array give the
157: starting array indices of each row's items in the rest of the array,
158: the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159: and the remaining nz items give the column indices of each of
160: the 1s within the logical 2D matrix. Each row's items within
161: the array are the column indices of the 1s within that row
162: of the 2D matrix. PETSc developers may recognize that this is the
163: same format as that computed by the `DMDASetBlockFills_Private()`
164: function from a dense 2D matrix representation.
166: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167: can be represented in the dfill, ofill format
169: Contributed by Philip C. Roth
171: .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172: @*/
173: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174: {
175: DM_DA *dd = (DM_DA *)da->data;
177: PetscFunctionBegin;
178: /* save the given dfill and ofill information */
179: PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180: PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
182: /* count nonzeros in ofill columns */
183: PetscCall(DMDASetBlockFills_Private2(dd));
184: PetscFunctionReturn(PETSC_SUCCESS);
185: }
187: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188: {
189: PetscInt dim, m, n, p, nc;
190: DMBoundaryType bx, by, bz;
191: MPI_Comm comm;
192: PetscMPIInt size;
193: PetscBool isBAIJ;
194: DM_DA *dd = (DM_DA *)da->data;
196: PetscFunctionBegin;
197: /*
198: m
199: ------------------------------------------------------
200: | |
201: | |
202: | ---------------------- |
203: | | | |
204: n | yn | | |
205: | | | |
206: | .--------------------- |
207: | (xs,ys) xn |
208: | . |
209: | (gxs,gys) |
210: | |
211: -----------------------------------------------------
212: */
214: /*
215: nc - number of components per grid point
216: col - number of colors needed in one direction for single component problem
218: */
219: PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
221: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222: PetscCallMPI(MPI_Comm_size(comm, &size));
223: if (ctype == IS_COLORING_LOCAL) {
224: if (size == 1) {
225: ctype = IS_COLORING_GLOBAL;
226: } else {
227: PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228: }
229: }
231: /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232: matrices is for the blocks, not the individual matrix elements */
233: PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236: if (isBAIJ) {
237: dd->w = 1;
238: dd->xs = dd->xs / nc;
239: dd->xe = dd->xe / nc;
240: dd->Xs = dd->Xs / nc;
241: dd->Xe = dd->Xe / nc;
242: }
244: /*
245: We do not provide a getcoloring function in the DMDA operations because
246: the basic DMDA does not know about matrices. We think of DMDA as being
247: more low-level then matrices.
248: */
249: if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250: else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251: else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252: else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253: if (isBAIJ) {
254: dd->w = nc;
255: dd->xs = dd->xs * nc;
256: dd->xe = dd->xe * nc;
257: dd->Xs = dd->Xs * nc;
258: dd->Xe = dd->Xe * nc;
259: }
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: /* ---------------------------------------------------------------------------------*/
265: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
266: {
267: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
268: PetscInt ncolors = 0;
269: MPI_Comm comm;
270: DMBoundaryType bx, by;
271: DMDAStencilType st;
272: ISColoringValue *colors;
273: DM_DA *dd = (DM_DA *)da->data;
275: PetscFunctionBegin;
276: /*
277: nc - number of components per grid point
278: col - number of colors needed in one direction for single component problem
280: */
281: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
282: col = 2 * s + 1;
283: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
284: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
285: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
287: /* special case as taught to us by Paul Hovland */
288: if (st == DMDA_STENCIL_STAR && s == 1) {
289: PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
290: } else {
291: if (ctype == IS_COLORING_GLOBAL) {
292: if (!dd->localcoloring) {
293: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
294: ii = 0;
295: for (j = ys; j < ys + ny; j++) {
296: for (i = xs; i < xs + nx; i++) {
297: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
298: }
299: }
300: ncolors = nc + nc * (col - 1 + col * (col - 1));
301: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
302: }
303: *coloring = dd->localcoloring;
304: } else if (ctype == IS_COLORING_LOCAL) {
305: if (!dd->ghostedcoloring) {
306: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
307: ii = 0;
308: for (j = gys; j < gys + gny; j++) {
309: for (i = gxs; i < gxs + gnx; i++) {
310: for (k = 0; k < nc; k++) {
311: /* the complicated stuff is to handle periodic boundaries */
312: colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
313: }
314: }
315: }
316: ncolors = nc + nc * (col - 1 + col * (col - 1));
317: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
318: /* PetscIntView(ncolors,(PetscInt*)colors,0); */
320: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
321: }
322: *coloring = dd->ghostedcoloring;
323: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
324: }
325: PetscCall(ISColoringReference(*coloring));
326: PetscFunctionReturn(PETSC_SUCCESS);
327: }
329: /* ---------------------------------------------------------------------------------*/
331: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
332: {
333: PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
334: PetscInt ncolors;
335: MPI_Comm comm;
336: DMBoundaryType bx, by, bz;
337: DMDAStencilType st;
338: ISColoringValue *colors;
339: DM_DA *dd = (DM_DA *)da->data;
341: PetscFunctionBegin;
342: /*
343: nc - number of components per grid point
344: col - number of colors needed in one direction for single component problem
345: */
346: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
347: col = 2 * s + 1;
348: PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
349: by 2*stencil_width + 1\n");
350: PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
351: by 2*stencil_width + 1\n");
352: PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
353: by 2*stencil_width + 1\n");
355: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
356: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
357: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
359: /* create the coloring */
360: if (ctype == IS_COLORING_GLOBAL) {
361: if (!dd->localcoloring) {
362: PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
363: ii = 0;
364: for (k = zs; k < zs + nz; k++) {
365: for (j = ys; j < ys + ny; j++) {
366: for (i = xs; i < xs + nx; i++) {
367: for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
368: }
369: }
370: }
371: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
372: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
373: }
374: *coloring = dd->localcoloring;
375: } else if (ctype == IS_COLORING_LOCAL) {
376: if (!dd->ghostedcoloring) {
377: PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
378: ii = 0;
379: for (k = gzs; k < gzs + gnz; k++) {
380: for (j = gys; j < gys + gny; j++) {
381: for (i = gxs; i < gxs + gnx; i++) {
382: for (l = 0; l < nc; l++) {
383: /* the complicated stuff is to handle periodic boundaries */
384: colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
385: }
386: }
387: }
388: }
389: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
390: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
391: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
392: }
393: *coloring = dd->ghostedcoloring;
394: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
395: PetscCall(ISColoringReference(*coloring));
396: PetscFunctionReturn(PETSC_SUCCESS);
397: }
399: /* ---------------------------------------------------------------------------------*/
401: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
402: {
403: PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
404: PetscInt ncolors;
405: MPI_Comm comm;
406: DMBoundaryType bx;
407: ISColoringValue *colors;
408: DM_DA *dd = (DM_DA *)da->data;
410: PetscFunctionBegin;
411: /*
412: nc - number of components per grid point
413: col - number of colors needed in one direction for single component problem
414: */
415: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
416: col = 2 * s + 1;
417: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
418: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
419: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
421: /* create the coloring */
422: if (ctype == IS_COLORING_GLOBAL) {
423: if (!dd->localcoloring) {
424: PetscCall(PetscMalloc1(nc * nx, &colors));
425: if (dd->ofillcols) {
426: PetscInt tc = 0;
427: for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
428: i1 = 0;
429: for (i = xs; i < xs + nx; i++) {
430: for (l = 0; l < nc; l++) {
431: if (dd->ofillcols[l] && (i % col)) {
432: colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
433: } else {
434: colors[i1++] = l;
435: }
436: }
437: }
438: ncolors = nc + 2 * s * tc;
439: } else {
440: i1 = 0;
441: for (i = xs; i < xs + nx; i++) {
442: for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
443: }
444: ncolors = nc + nc * (col - 1);
445: }
446: PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
447: }
448: *coloring = dd->localcoloring;
449: } else if (ctype == IS_COLORING_LOCAL) {
450: if (!dd->ghostedcoloring) {
451: PetscCall(PetscMalloc1(nc * gnx, &colors));
452: i1 = 0;
453: for (i = gxs; i < gxs + gnx; i++) {
454: for (l = 0; l < nc; l++) {
455: /* the complicated stuff is to handle periodic boundaries */
456: colors[i1++] = l + nc * (SetInRange(i, m) % col);
457: }
458: }
459: ncolors = nc + nc * (col - 1);
460: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
461: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
462: }
463: *coloring = dd->ghostedcoloring;
464: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
465: PetscCall(ISColoringReference(*coloring));
466: PetscFunctionReturn(PETSC_SUCCESS);
467: }
469: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
470: {
471: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
472: PetscInt ncolors;
473: MPI_Comm comm;
474: DMBoundaryType bx, by;
475: ISColoringValue *colors;
476: DM_DA *dd = (DM_DA *)da->data;
478: PetscFunctionBegin;
479: /*
480: nc - number of components per grid point
481: col - number of colors needed in one direction for single component problem
482: */
483: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
484: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
485: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
486: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
487: /* create the coloring */
488: if (ctype == IS_COLORING_GLOBAL) {
489: if (!dd->localcoloring) {
490: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
491: ii = 0;
492: for (j = ys; j < ys + ny; j++) {
493: for (i = xs; i < xs + nx; i++) {
494: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
495: }
496: }
497: ncolors = 5 * nc;
498: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
499: }
500: *coloring = dd->localcoloring;
501: } else if (ctype == IS_COLORING_LOCAL) {
502: if (!dd->ghostedcoloring) {
503: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
504: ii = 0;
505: for (j = gys; j < gys + gny; j++) {
506: for (i = gxs; i < gxs + gnx; i++) {
507: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
508: }
509: }
510: ncolors = 5 * nc;
511: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
512: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
513: }
514: *coloring = dd->ghostedcoloring;
515: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
516: PetscFunctionReturn(PETSC_SUCCESS);
517: }
519: /* =========================================================================== */
520: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
522: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
523: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
524: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
525: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
526: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
527: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
528: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
529: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
530: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
531: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
532: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
533: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
535: /*@C
536: MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
538: Logically Collective on mat
540: Input Parameters:
541: + mat - the matrix
542: - da - the da
544: Level: intermediate
546: .seealso: `Mat`, `MatSetUp()`
547: @*/
548: PetscErrorCode MatSetupDM(Mat mat, DM da)
549: {
550: PetscFunctionBegin;
553: PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
554: PetscFunctionReturn(PETSC_SUCCESS);
555: }
557: PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
558: {
559: DM da;
560: const char *prefix;
561: Mat Anatural;
562: AO ao;
563: PetscInt rstart, rend, *petsc, i;
564: IS is;
565: MPI_Comm comm;
566: PetscViewerFormat format;
568: PetscFunctionBegin;
569: /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
570: PetscCall(PetscViewerGetFormat(viewer, &format));
571: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
573: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
574: PetscCall(MatGetDM(A, &da));
575: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
577: PetscCall(DMDAGetAO(da, &ao));
578: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
579: PetscCall(PetscMalloc1(rend - rstart, &petsc));
580: for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
581: PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
582: PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
584: /* call viewer on natural ordering */
585: PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
586: PetscCall(ISDestroy(&is));
587: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
588: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
589: PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
590: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
591: PetscCall(MatView(Anatural, viewer));
592: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
593: PetscCall(MatDestroy(&Anatural));
594: PetscFunctionReturn(PETSC_SUCCESS);
595: }
597: PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
598: {
599: DM da;
600: Mat Anatural, Aapp;
601: AO ao;
602: PetscInt rstart, rend, *app, i, m, n, M, N;
603: IS is;
604: MPI_Comm comm;
606: PetscFunctionBegin;
607: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
608: PetscCall(MatGetDM(A, &da));
609: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
611: /* Load the matrix in natural ordering */
612: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
613: PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
614: PetscCall(MatGetSize(A, &M, &N));
615: PetscCall(MatGetLocalSize(A, &m, &n));
616: PetscCall(MatSetSizes(Anatural, m, n, M, N));
617: PetscCall(MatLoad(Anatural, viewer));
619: /* Map natural ordering to application ordering and create IS */
620: PetscCall(DMDAGetAO(da, &ao));
621: PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
622: PetscCall(PetscMalloc1(rend - rstart, &app));
623: for (i = rstart; i < rend; i++) app[i - rstart] = i;
624: PetscCall(AOPetscToApplication(ao, rend - rstart, app));
625: PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
627: /* Do permutation and replace header */
628: PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
629: PetscCall(MatHeaderReplace(A, &Aapp));
630: PetscCall(ISDestroy(&is));
631: PetscCall(MatDestroy(&Anatural));
632: PetscFunctionReturn(PETSC_SUCCESS);
633: }
635: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
636: {
637: PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
638: Mat A;
639: MPI_Comm comm;
640: MatType Atype;
641: MatType mtype;
642: PetscMPIInt size;
643: DM_DA *dd = (DM_DA *)da->data;
644: void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
646: PetscFunctionBegin;
647: PetscCall(MatInitializePackage());
648: mtype = da->mattype;
650: /*
651: m
652: ------------------------------------------------------
653: | |
654: | |
655: | ---------------------- |
656: | | | |
657: n | ny | | |
658: | | | |
659: | .--------------------- |
660: | (xs,ys) nx |
661: | . |
662: | (gxs,gys) |
663: | |
664: -----------------------------------------------------
665: */
667: /*
668: nc - number of components per grid point
669: col - number of colors needed in one direction for single component problem
670: */
671: M = dd->M;
672: N = dd->N;
673: P = dd->P;
674: dim = da->dim;
675: dof = dd->w;
676: /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
677: PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
678: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
679: PetscCall(MatCreate(comm, &A));
680: PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
681: PetscCall(MatSetType(A, mtype));
682: PetscCall(MatSetFromOptions(A));
683: if (dof * nx * ny * nz < da->bind_below) {
684: PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
685: PetscCall(MatBindToCPU(A, PETSC_TRUE));
686: }
687: PetscCall(MatSetDM(A, da));
688: if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
689: PetscCall(MatGetType(A, &Atype));
690: /*
691: We do not provide a getmatrix function in the DMDA operations because
692: the basic DMDA does not know about matrices. We think of DMDA as being more
693: more low-level than matrices. This is kind of cheating but, cause sometimes
694: we think of DMDA has higher level than matrices.
696: We could switch based on Atype (or mtype), but we do not since the
697: specialized setting routines depend only on the particular preallocation
698: details of the matrix, not the type itself.
699: */
700: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
701: if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
702: if (!aij) {
703: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
704: if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
705: if (!baij) {
706: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
707: if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
708: if (!sbaij) {
709: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
710: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
711: }
712: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
713: }
714: }
715: if (aij) {
716: if (dim == 1) {
717: if (dd->ofill) {
718: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
719: } else {
720: DMBoundaryType bx;
721: PetscMPIInt size;
722: PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
723: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
724: if (size == 1 && bx == DM_BOUNDARY_NONE) {
725: PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
726: } else {
727: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
728: }
729: }
730: } else if (dim == 2) {
731: if (dd->ofill) {
732: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
733: } else {
734: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
735: }
736: } else if (dim == 3) {
737: if (dd->ofill) {
738: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
739: } else {
740: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
741: }
742: }
743: } else if (baij) {
744: if (dim == 2) {
745: PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
746: } else if (dim == 3) {
747: PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
748: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
749: } else if (sbaij) {
750: if (dim == 2) {
751: PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
752: } else if (dim == 3) {
753: PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
754: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
755: } else if (sell) {
756: if (dim == 2) {
757: PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
758: } else if (dim == 3) {
759: PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
760: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
761: } else if (is) {
762: PetscCall(DMCreateMatrix_DA_IS(da, A));
763: } else {
764: ISLocalToGlobalMapping ltog;
766: PetscCall(MatSetBlockSize(A, dof));
767: PetscCall(MatSetUp(A));
768: PetscCall(DMGetLocalToGlobalMapping(da, <og));
769: PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
770: }
771: PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
772: PetscCall(MatSetStencil(A, dim, dims, starts, dof));
773: PetscCall(MatSetDM(A, da));
774: PetscCallMPI(MPI_Comm_size(comm, &size));
775: if (size > 1) {
776: /* change viewer to display matrix in natural ordering */
777: PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
778: PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
779: }
780: *J = A;
781: PetscFunctionReturn(PETSC_SUCCESS);
782: }
784: /* ---------------------------------------------------------------------------------*/
785: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
787: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
788: {
789: DM_DA *da = (DM_DA *)dm->data;
790: Mat lJ, P;
791: ISLocalToGlobalMapping ltog;
792: IS is;
793: PetscBT bt;
794: const PetscInt *e_loc, *idx;
795: PetscInt i, nel, nen, nv, dof, *gidx, n, N;
797: /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
798: We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
799: PetscFunctionBegin;
800: dof = da->w;
801: PetscCall(MatSetBlockSize(J, dof));
802: PetscCall(DMGetLocalToGlobalMapping(dm, <og));
804: /* flag local elements indices in local DMDA numbering */
805: PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
806: PetscCall(PetscBTCreate(nv / dof, &bt));
807: PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
808: for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
810: /* filter out (set to -1) the global indices not used by the local elements */
811: PetscCall(PetscMalloc1(nv / dof, &gidx));
812: PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
813: PetscCall(PetscArraycpy(gidx, idx, nv / dof));
814: PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
815: for (i = 0; i < nv / dof; i++)
816: if (!PetscBTLookup(bt, i)) gidx[i] = -1;
817: PetscCall(PetscBTDestroy(&bt));
818: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
819: PetscCall(ISLocalToGlobalMappingCreateIS(is, <og));
820: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
821: PetscCall(ISLocalToGlobalMappingDestroy(<og));
822: PetscCall(ISDestroy(&is));
824: /* Preallocation */
825: if (dm->prealloc_skip) {
826: PetscCall(MatSetUp(J));
827: } else {
828: PetscCall(MatISGetLocalMat(J, &lJ));
829: PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL));
830: PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
831: PetscCall(MatSetType(P, MATPREALLOCATOR));
832: PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
833: PetscCall(MatGetSize(lJ, &N, NULL));
834: PetscCall(MatGetLocalSize(lJ, &n, NULL));
835: PetscCall(MatSetSizes(P, n, n, N, N));
836: PetscCall(MatSetBlockSize(P, dof));
837: PetscCall(MatSetUp(P));
838: for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
839: PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
840: PetscCall(MatISRestoreLocalMat(J, &lJ));
841: PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
842: PetscCall(MatDestroy(&P));
844: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
845: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
846: }
847: PetscFunctionReturn(PETSC_SUCCESS);
848: }
850: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
851: {
852: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
853: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
854: MPI_Comm comm;
855: PetscScalar *values;
856: DMBoundaryType bx, by;
857: ISLocalToGlobalMapping ltog;
858: DMDAStencilType st;
860: PetscFunctionBegin;
861: /*
862: nc - number of components per grid point
863: col - number of colors needed in one direction for single component problem
864: */
865: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
866: col = 2 * s + 1;
867: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
868: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
869: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
871: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
872: PetscCall(DMGetLocalToGlobalMapping(da, <og));
874: PetscCall(MatSetBlockSize(J, nc));
875: /* determine the matrix preallocation information */
876: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
877: for (i = xs; i < xs + nx; i++) {
878: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
879: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
881: for (j = ys; j < ys + ny; j++) {
882: slot = i - gxs + gnx * (j - gys);
884: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
885: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
887: cnt = 0;
888: for (k = 0; k < nc; k++) {
889: for (l = lstart; l < lend + 1; l++) {
890: for (p = pstart; p < pend + 1; p++) {
891: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
892: cols[cnt++] = k + nc * (slot + gnx * l + p);
893: }
894: }
895: }
896: rows[k] = k + nc * (slot);
897: }
898: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
899: }
900: }
901: PetscCall(MatSetBlockSize(J, nc));
902: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
903: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
904: MatPreallocateEnd(dnz, onz);
906: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
908: /*
909: For each node in the grid: we get the neighbors in the local (on processor ordering
910: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
911: PETSc ordering.
912: */
913: if (!da->prealloc_only) {
914: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
915: for (i = xs; i < xs + nx; i++) {
916: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
917: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
919: for (j = ys; j < ys + ny; j++) {
920: slot = i - gxs + gnx * (j - gys);
922: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
923: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
925: cnt = 0;
926: for (k = 0; k < nc; k++) {
927: for (l = lstart; l < lend + 1; l++) {
928: for (p = pstart; p < pend + 1; p++) {
929: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
930: cols[cnt++] = k + nc * (slot + gnx * l + p);
931: }
932: }
933: }
934: rows[k] = k + nc * (slot);
935: }
936: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
937: }
938: }
939: PetscCall(PetscFree(values));
940: /* do not copy values to GPU since they are all zero and not yet needed there */
941: PetscCall(MatBindToCPU(J, PETSC_TRUE));
942: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
943: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
944: PetscCall(MatBindToCPU(J, PETSC_FALSE));
945: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
946: }
947: PetscCall(PetscFree2(rows, cols));
948: PetscFunctionReturn(PETSC_SUCCESS);
949: }
951: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
952: {
953: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
954: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
955: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
956: MPI_Comm comm;
957: PetscScalar *values;
958: DMBoundaryType bx, by, bz;
959: ISLocalToGlobalMapping ltog;
960: DMDAStencilType st;
962: PetscFunctionBegin;
963: /*
964: nc - number of components per grid point
965: col - number of colors needed in one direction for single component problem
966: */
967: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
968: col = 2 * s + 1;
969: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
970: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
971: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
973: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
974: PetscCall(DMGetLocalToGlobalMapping(da, <og));
976: PetscCall(MatSetBlockSize(J, nc));
977: /* determine the matrix preallocation information */
978: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
979: for (i = xs; i < xs + nx; i++) {
980: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
981: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
982: for (j = ys; j < ys + ny; j++) {
983: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
984: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
985: for (k = zs; k < zs + nz; k++) {
986: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
987: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
989: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
991: cnt = 0;
992: for (l = 0; l < nc; l++) {
993: for (ii = istart; ii < iend + 1; ii++) {
994: for (jj = jstart; jj < jend + 1; jj++) {
995: for (kk = kstart; kk < kend + 1; kk++) {
996: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
997: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
998: }
999: }
1000: }
1001: }
1002: rows[l] = l + nc * (slot);
1003: }
1004: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1005: }
1006: }
1007: }
1008: PetscCall(MatSetBlockSize(J, nc));
1009: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1010: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1011: MatPreallocateEnd(dnz, onz);
1012: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1014: /*
1015: For each node in the grid: we get the neighbors in the local (on processor ordering
1016: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1017: PETSc ordering.
1018: */
1019: if (!da->prealloc_only) {
1020: PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1021: for (i = xs; i < xs + nx; i++) {
1022: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1023: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1024: for (j = ys; j < ys + ny; j++) {
1025: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1026: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1027: for (k = zs; k < zs + nz; k++) {
1028: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1029: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1031: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1033: cnt = 0;
1034: for (l = 0; l < nc; l++) {
1035: for (ii = istart; ii < iend + 1; ii++) {
1036: for (jj = jstart; jj < jend + 1; jj++) {
1037: for (kk = kstart; kk < kend + 1; kk++) {
1038: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1039: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1040: }
1041: }
1042: }
1043: }
1044: rows[l] = l + nc * (slot);
1045: }
1046: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1047: }
1048: }
1049: }
1050: PetscCall(PetscFree(values));
1051: /* do not copy values to GPU since they are all zero and not yet needed there */
1052: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1053: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1054: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1055: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1056: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1057: }
1058: PetscCall(PetscFree2(rows, cols));
1059: PetscFunctionReturn(PETSC_SUCCESS);
1060: }
1062: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1063: {
1064: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1065: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1066: MPI_Comm comm;
1067: DMBoundaryType bx, by;
1068: ISLocalToGlobalMapping ltog, mltog;
1069: DMDAStencilType st;
1070: PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1072: PetscFunctionBegin;
1073: /*
1074: nc - number of components per grid point
1075: col - number of colors needed in one direction for single component problem
1076: */
1077: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1078: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1079: col = 2 * s + 1;
1080: /*
1081: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1082: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1083: */
1084: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1085: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1086: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1087: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1088: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1090: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1091: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1093: PetscCall(MatSetBlockSize(J, nc));
1094: /* determine the matrix preallocation information */
1095: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1096: for (i = xs; i < xs + nx; i++) {
1097: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1098: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1100: for (j = ys; j < ys + ny; j++) {
1101: slot = i - gxs + gnx * (j - gys);
1103: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1104: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1106: cnt = 0;
1107: for (k = 0; k < nc; k++) {
1108: for (l = lstart; l < lend + 1; l++) {
1109: for (p = pstart; p < pend + 1; p++) {
1110: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1111: cols[cnt++] = k + nc * (slot + gnx * l + p);
1112: }
1113: }
1114: }
1115: rows[k] = k + nc * (slot);
1116: }
1117: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1118: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1119: }
1120: }
1121: PetscCall(MatSetBlockSize(J, nc));
1122: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1123: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1124: MatPreallocateEnd(dnz, onz);
1125: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1126: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1128: /*
1129: For each node in the grid: we get the neighbors in the local (on processor ordering
1130: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1131: PETSc ordering.
1132: */
1133: if (!da->prealloc_only) {
1134: for (i = xs; i < xs + nx; i++) {
1135: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1136: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1138: for (j = ys; j < ys + ny; j++) {
1139: slot = i - gxs + gnx * (j - gys);
1141: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1142: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1144: cnt = 0;
1145: for (l = lstart; l < lend + 1; l++) {
1146: for (p = pstart; p < pend + 1; p++) {
1147: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1148: cols[cnt++] = nc * (slot + gnx * l + p);
1149: for (k = 1; k < nc; k++) {
1150: cols[cnt] = 1 + cols[cnt - 1];
1151: cnt++;
1152: }
1153: }
1154: }
1155: }
1156: for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1157: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1158: }
1159: }
1160: /* do not copy values to GPU since they are all zero and not yet needed there */
1161: PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1162: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1163: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1164: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1165: if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1166: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1167: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1168: }
1169: PetscCall(PetscFree2(rows, cols));
1170: PetscFunctionReturn(PETSC_SUCCESS);
1171: }
1173: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1174: {
1175: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1176: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1177: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1178: DM_DA *dd = (DM_DA *)da->data;
1179: PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1180: MPI_Comm comm;
1181: DMBoundaryType bx, by;
1182: ISLocalToGlobalMapping ltog;
1183: DMDAStencilType st;
1184: PetscBool removedups = PETSC_FALSE;
1186: PetscFunctionBegin;
1187: /*
1188: nc - number of components per grid point
1189: col - number of colors needed in one direction for single component problem
1190: */
1191: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1192: col = 2 * s + 1;
1193: /*
1194: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1195: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1196: */
1197: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1198: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1199: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1200: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1201: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1203: PetscCall(PetscMalloc1(col * col * nc, &cols));
1204: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1206: PetscCall(MatSetBlockSize(J, nc));
1207: /* determine the matrix preallocation information */
1208: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1209: for (i = xs; i < xs + nx; i++) {
1210: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1211: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1213: for (j = ys; j < ys + ny; j++) {
1214: slot = i - gxs + gnx * (j - gys);
1216: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1217: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1219: for (k = 0; k < nc; k++) {
1220: cnt = 0;
1221: for (l = lstart; l < lend + 1; l++) {
1222: for (p = pstart; p < pend + 1; p++) {
1223: if (l || p) {
1224: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1225: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1226: }
1227: } else {
1228: if (dfill) {
1229: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1230: } else {
1231: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1232: }
1233: }
1234: }
1235: }
1236: row = k + nc * (slot);
1237: maxcnt = PetscMax(maxcnt, cnt);
1238: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1239: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1240: }
1241: }
1242: }
1243: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1244: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1245: MatPreallocateEnd(dnz, onz);
1246: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1248: /*
1249: For each node in the grid: we get the neighbors in the local (on processor ordering
1250: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1251: PETSc ordering.
1252: */
1253: if (!da->prealloc_only) {
1254: for (i = xs; i < xs + nx; i++) {
1255: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1256: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1258: for (j = ys; j < ys + ny; j++) {
1259: slot = i - gxs + gnx * (j - gys);
1261: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1262: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1264: for (k = 0; k < nc; k++) {
1265: cnt = 0;
1266: for (l = lstart; l < lend + 1; l++) {
1267: for (p = pstart; p < pend + 1; p++) {
1268: if (l || p) {
1269: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1270: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1271: }
1272: } else {
1273: if (dfill) {
1274: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1275: } else {
1276: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1277: }
1278: }
1279: }
1280: }
1281: row = k + nc * (slot);
1282: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1283: }
1284: }
1285: }
1286: /* do not copy values to GPU since they are all zero and not yet needed there */
1287: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1288: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1289: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1290: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1291: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1292: }
1293: PetscCall(PetscFree(cols));
1294: PetscFunctionReturn(PETSC_SUCCESS);
1295: }
1297: /* ---------------------------------------------------------------------------------*/
1299: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1300: {
1301: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1302: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1303: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1304: MPI_Comm comm;
1305: DMBoundaryType bx, by, bz;
1306: ISLocalToGlobalMapping ltog, mltog;
1307: DMDAStencilType st;
1308: PetscBool removedups = PETSC_FALSE;
1310: PetscFunctionBegin;
1311: /*
1312: nc - number of components per grid point
1313: col - number of colors needed in one direction for single component problem
1314: */
1315: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1316: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1317: col = 2 * s + 1;
1319: /*
1320: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1321: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1322: */
1323: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1324: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1325: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1327: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1328: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1329: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1331: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1332: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1334: PetscCall(MatSetBlockSize(J, nc));
1335: /* determine the matrix preallocation information */
1336: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1337: for (i = xs; i < xs + nx; i++) {
1338: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1339: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1340: for (j = ys; j < ys + ny; j++) {
1341: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1342: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1343: for (k = zs; k < zs + nz; k++) {
1344: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1345: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1347: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1349: cnt = 0;
1350: for (l = 0; l < nc; l++) {
1351: for (ii = istart; ii < iend + 1; ii++) {
1352: for (jj = jstart; jj < jend + 1; jj++) {
1353: for (kk = kstart; kk < kend + 1; kk++) {
1354: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1355: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1356: }
1357: }
1358: }
1359: }
1360: rows[l] = l + nc * (slot);
1361: }
1362: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1363: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1364: }
1365: }
1366: }
1367: PetscCall(MatSetBlockSize(J, nc));
1368: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1369: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1370: MatPreallocateEnd(dnz, onz);
1371: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1372: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1374: /*
1375: For each node in the grid: we get the neighbors in the local (on processor ordering
1376: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1377: PETSc ordering.
1378: */
1379: if (!da->prealloc_only) {
1380: for (i = xs; i < xs + nx; i++) {
1381: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1382: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1383: for (j = ys; j < ys + ny; j++) {
1384: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1385: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1386: for (k = zs; k < zs + nz; k++) {
1387: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1388: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1390: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1392: cnt = 0;
1393: for (kk = kstart; kk < kend + 1; kk++) {
1394: for (jj = jstart; jj < jend + 1; jj++) {
1395: for (ii = istart; ii < iend + 1; ii++) {
1396: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1397: cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1398: for (l = 1; l < nc; l++) {
1399: cols[cnt] = 1 + cols[cnt - 1];
1400: cnt++;
1401: }
1402: }
1403: }
1404: }
1405: }
1406: rows[0] = nc * (slot);
1407: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1408: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1409: }
1410: }
1411: }
1412: /* do not copy values to GPU since they are all zero and not yet needed there */
1413: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1414: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1415: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1416: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1417: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1418: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1419: }
1420: PetscCall(PetscFree2(rows, cols));
1421: PetscFunctionReturn(PETSC_SUCCESS);
1422: }
1424: /* ---------------------------------------------------------------------------------*/
1426: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1427: {
1428: DM_DA *dd = (DM_DA *)da->data;
1429: PetscInt xs, nx, i, j, gxs, gnx, row, k, l;
1430: PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1431: PetscInt *ofill = dd->ofill, *dfill = dd->dfill;
1432: DMBoundaryType bx;
1433: ISLocalToGlobalMapping ltog;
1434: PetscMPIInt rank, size;
1436: PetscFunctionBegin;
1437: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1438: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1440: /*
1441: nc - number of components per grid point
1442: */
1443: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1444: PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1445: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1446: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1448: PetscCall(MatSetBlockSize(J, nc));
1449: PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1451: /*
1452: note should be smaller for first and last process with no periodic
1453: does not handle dfill
1454: */
1455: cnt = 0;
1456: /* coupling with process to the left */
1457: for (i = 0; i < s; i++) {
1458: for (j = 0; j < nc; j++) {
1459: ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1460: cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1461: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1462: if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1463: else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1464: }
1465: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1466: cnt++;
1467: }
1468: }
1469: for (i = s; i < nx - s; i++) {
1470: for (j = 0; j < nc; j++) {
1471: cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1472: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1473: cnt++;
1474: }
1475: }
1476: /* coupling with process to the right */
1477: for (i = nx - s; i < nx; i++) {
1478: for (j = 0; j < nc; j++) {
1479: ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1480: cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1481: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1482: if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1483: else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1484: }
1485: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1486: cnt++;
1487: }
1488: }
1490: PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1491: PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1492: PetscCall(PetscFree2(cols, ocols));
1494: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1495: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1497: /*
1498: For each node in the grid: we get the neighbors in the local (on processor ordering
1499: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1500: PETSc ordering.
1501: */
1502: if (!da->prealloc_only) {
1503: PetscCall(PetscMalloc1(maxcnt, &cols));
1504: row = xs * nc;
1505: /* coupling with process to the left */
1506: for (i = xs; i < xs + s; i++) {
1507: for (j = 0; j < nc; j++) {
1508: cnt = 0;
1509: if (rank) {
1510: for (l = 0; l < s; l++) {
1511: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1512: }
1513: }
1514: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1515: for (l = 0; l < s; l++) {
1516: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1517: }
1518: }
1519: if (dfill) {
1520: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1521: } else {
1522: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1523: }
1524: for (l = 0; l < s; l++) {
1525: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1526: }
1527: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1528: row++;
1529: }
1530: }
1531: for (i = xs + s; i < xs + nx - s; i++) {
1532: for (j = 0; j < nc; j++) {
1533: cnt = 0;
1534: for (l = 0; l < s; l++) {
1535: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1536: }
1537: if (dfill) {
1538: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1539: } else {
1540: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1541: }
1542: for (l = 0; l < s; l++) {
1543: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1544: }
1545: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1546: row++;
1547: }
1548: }
1549: /* coupling with process to the right */
1550: for (i = xs + nx - s; i < xs + nx; i++) {
1551: for (j = 0; j < nc; j++) {
1552: cnt = 0;
1553: for (l = 0; l < s; l++) {
1554: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1555: }
1556: if (dfill) {
1557: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1558: } else {
1559: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1560: }
1561: if (rank < size - 1) {
1562: for (l = 0; l < s; l++) {
1563: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1564: }
1565: }
1566: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1567: for (l = 0; l < s; l++) {
1568: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1569: }
1570: }
1571: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1572: row++;
1573: }
1574: }
1575: PetscCall(PetscFree(cols));
1576: /* do not copy values to GPU since they are all zero and not yet needed there */
1577: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1578: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1579: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1580: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1581: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1582: }
1583: PetscFunctionReturn(PETSC_SUCCESS);
1584: }
1586: /* ---------------------------------------------------------------------------------*/
1588: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1589: {
1590: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1591: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1592: PetscInt istart, iend;
1593: DMBoundaryType bx;
1594: ISLocalToGlobalMapping ltog, mltog;
1596: PetscFunctionBegin;
1597: /*
1598: nc - number of components per grid point
1599: col - number of colors needed in one direction for single component problem
1600: */
1601: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1602: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1603: col = 2 * s + 1;
1605: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1606: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1608: PetscCall(MatSetBlockSize(J, nc));
1609: PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1610: PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1612: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1613: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1614: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1616: /*
1617: For each node in the grid: we get the neighbors in the local (on processor ordering
1618: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1619: PETSc ordering.
1620: */
1621: if (!da->prealloc_only) {
1622: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1623: for (i = xs; i < xs + nx; i++) {
1624: istart = PetscMax(-s, gxs - i);
1625: iend = PetscMin(s, gxs + gnx - i - 1);
1626: slot = i - gxs;
1628: cnt = 0;
1629: for (i1 = istart; i1 < iend + 1; i1++) {
1630: cols[cnt++] = nc * (slot + i1);
1631: for (l = 1; l < nc; l++) {
1632: cols[cnt] = 1 + cols[cnt - 1];
1633: cnt++;
1634: }
1635: }
1636: rows[0] = nc * (slot);
1637: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1638: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1639: }
1640: /* do not copy values to GPU since they are all zero and not yet needed there */
1641: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1642: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1643: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1644: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1645: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1646: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1647: PetscCall(PetscFree2(rows, cols));
1648: }
1649: PetscFunctionReturn(PETSC_SUCCESS);
1650: }
1652: /* ---------------------------------------------------------------------------------*/
1654: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1655: {
1656: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1657: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1658: PetscInt istart, iend;
1659: DMBoundaryType bx;
1660: ISLocalToGlobalMapping ltog, mltog;
1662: PetscFunctionBegin;
1663: /*
1664: nc - number of components per grid point
1665: col - number of colors needed in one direction for single component problem
1666: */
1667: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1668: col = 2 * s + 1;
1670: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1671: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1673: PetscCall(MatSetBlockSize(J, nc));
1674: PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1676: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1677: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1678: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1680: /*
1681: For each node in the grid: we get the neighbors in the local (on processor ordering
1682: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1683: PETSc ordering.
1684: */
1685: if (!da->prealloc_only) {
1686: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1687: for (i = xs; i < xs + nx; i++) {
1688: istart = PetscMax(-s, gxs - i);
1689: iend = PetscMin(s, gxs + gnx - i - 1);
1690: slot = i - gxs;
1692: cnt = 0;
1693: for (i1 = istart; i1 < iend + 1; i1++) {
1694: cols[cnt++] = nc * (slot + i1);
1695: for (l = 1; l < nc; l++) {
1696: cols[cnt] = 1 + cols[cnt - 1];
1697: cnt++;
1698: }
1699: }
1700: rows[0] = nc * (slot);
1701: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1702: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1703: }
1704: /* do not copy values to GPU since they are all zero and not yet needed there */
1705: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1706: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1707: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1708: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1709: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1710: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1711: PetscCall(PetscFree2(rows, cols));
1712: }
1713: PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1714: PetscFunctionReturn(PETSC_SUCCESS);
1715: }
1717: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1718: {
1719: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1720: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1721: PetscInt istart, iend, jstart, jend, ii, jj;
1722: MPI_Comm comm;
1723: PetscScalar *values;
1724: DMBoundaryType bx, by;
1725: DMDAStencilType st;
1726: ISLocalToGlobalMapping ltog;
1728: PetscFunctionBegin;
1729: /*
1730: nc - number of components per grid point
1731: col - number of colors needed in one direction for single component problem
1732: */
1733: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1734: col = 2 * s + 1;
1736: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1737: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1738: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1740: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1742: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1744: /* determine the matrix preallocation information */
1745: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1746: for (i = xs; i < xs + nx; i++) {
1747: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1748: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1749: for (j = ys; j < ys + ny; j++) {
1750: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1751: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1752: slot = i - gxs + gnx * (j - gys);
1754: /* Find block columns in block row */
1755: cnt = 0;
1756: for (ii = istart; ii < iend + 1; ii++) {
1757: for (jj = jstart; jj < jend + 1; jj++) {
1758: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1759: cols[cnt++] = slot + ii + gnx * jj;
1760: }
1761: }
1762: }
1763: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1764: }
1765: }
1766: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1767: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1768: MatPreallocateEnd(dnz, onz);
1770: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1772: /*
1773: For each node in the grid: we get the neighbors in the local (on processor ordering
1774: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1775: PETSc ordering.
1776: */
1777: if (!da->prealloc_only) {
1778: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1779: for (i = xs; i < xs + nx; i++) {
1780: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1781: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1782: for (j = ys; j < ys + ny; j++) {
1783: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1784: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1785: slot = i - gxs + gnx * (j - gys);
1786: cnt = 0;
1787: for (ii = istart; ii < iend + 1; ii++) {
1788: for (jj = jstart; jj < jend + 1; jj++) {
1789: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1790: cols[cnt++] = slot + ii + gnx * jj;
1791: }
1792: }
1793: }
1794: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1795: }
1796: }
1797: PetscCall(PetscFree(values));
1798: /* do not copy values to GPU since they are all zero and not yet needed there */
1799: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1800: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1801: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1802: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1803: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1804: }
1805: PetscCall(PetscFree(cols));
1806: PetscFunctionReturn(PETSC_SUCCESS);
1807: }
1809: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1810: {
1811: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1812: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1813: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1814: MPI_Comm comm;
1815: PetscScalar *values;
1816: DMBoundaryType bx, by, bz;
1817: DMDAStencilType st;
1818: ISLocalToGlobalMapping ltog;
1820: PetscFunctionBegin;
1821: /*
1822: nc - number of components per grid point
1823: col - number of colors needed in one direction for single component problem
1824: */
1825: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1826: col = 2 * s + 1;
1828: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1829: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1830: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1832: PetscCall(PetscMalloc1(col * col * col, &cols));
1834: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1836: /* determine the matrix preallocation information */
1837: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1838: for (i = xs; i < xs + nx; i++) {
1839: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1840: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1841: for (j = ys; j < ys + ny; j++) {
1842: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1843: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1844: for (k = zs; k < zs + nz; k++) {
1845: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1846: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1848: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1850: /* Find block columns in block row */
1851: cnt = 0;
1852: for (ii = istart; ii < iend + 1; ii++) {
1853: for (jj = jstart; jj < jend + 1; jj++) {
1854: for (kk = kstart; kk < kend + 1; kk++) {
1855: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1856: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1857: }
1858: }
1859: }
1860: }
1861: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1862: }
1863: }
1864: }
1865: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1866: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1867: MatPreallocateEnd(dnz, onz);
1869: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1871: /*
1872: For each node in the grid: we get the neighbors in the local (on processor ordering
1873: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1874: PETSc ordering.
1875: */
1876: if (!da->prealloc_only) {
1877: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1878: for (i = xs; i < xs + nx; i++) {
1879: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1880: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1881: for (j = ys; j < ys + ny; j++) {
1882: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1883: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1884: for (k = zs; k < zs + nz; k++) {
1885: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1886: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1888: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1890: cnt = 0;
1891: for (ii = istart; ii < iend + 1; ii++) {
1892: for (jj = jstart; jj < jend + 1; jj++) {
1893: for (kk = kstart; kk < kend + 1; kk++) {
1894: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1895: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1896: }
1897: }
1898: }
1899: }
1900: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1901: }
1902: }
1903: }
1904: PetscCall(PetscFree(values));
1905: /* do not copy values to GPU since they are all zero and not yet needed there */
1906: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1907: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1908: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1909: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1910: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1911: }
1912: PetscCall(PetscFree(cols));
1913: PetscFunctionReturn(PETSC_SUCCESS);
1914: }
1916: /*
1917: This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1918: identify in the local ordering with periodic domain.
1919: */
1920: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1921: {
1922: PetscInt i, n;
1924: PetscFunctionBegin;
1925: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1926: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1927: for (i = 0, n = 0; i < *cnt; i++) {
1928: if (col[i] >= *row) col[n++] = col[i];
1929: }
1930: *cnt = n;
1931: PetscFunctionReturn(PETSC_SUCCESS);
1932: }
1934: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1935: {
1936: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1937: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1938: PetscInt istart, iend, jstart, jend, ii, jj;
1939: MPI_Comm comm;
1940: PetscScalar *values;
1941: DMBoundaryType bx, by;
1942: DMDAStencilType st;
1943: ISLocalToGlobalMapping ltog;
1945: PetscFunctionBegin;
1946: /*
1947: nc - number of components per grid point
1948: col - number of colors needed in one direction for single component problem
1949: */
1950: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1951: col = 2 * s + 1;
1953: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1954: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1955: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1957: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1959: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1961: /* determine the matrix preallocation information */
1962: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1963: for (i = xs; i < xs + nx; i++) {
1964: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1965: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1966: for (j = ys; j < ys + ny; j++) {
1967: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1968: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1969: slot = i - gxs + gnx * (j - gys);
1971: /* Find block columns in block row */
1972: cnt = 0;
1973: for (ii = istart; ii < iend + 1; ii++) {
1974: for (jj = jstart; jj < jend + 1; jj++) {
1975: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1976: }
1977: }
1978: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1979: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1980: }
1981: }
1982: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1983: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1984: MatPreallocateEnd(dnz, onz);
1986: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1988: /*
1989: For each node in the grid: we get the neighbors in the local (on processor ordering
1990: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1991: PETSc ordering.
1992: */
1993: if (!da->prealloc_only) {
1994: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1995: for (i = xs; i < xs + nx; i++) {
1996: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1997: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1998: for (j = ys; j < ys + ny; j++) {
1999: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2000: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2001: slot = i - gxs + gnx * (j - gys);
2003: /* Find block columns in block row */
2004: cnt = 0;
2005: for (ii = istart; ii < iend + 1; ii++) {
2006: for (jj = jstart; jj < jend + 1; jj++) {
2007: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
2008: }
2009: }
2010: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2011: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2012: }
2013: }
2014: PetscCall(PetscFree(values));
2015: /* do not copy values to GPU since they are all zero and not yet needed there */
2016: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2017: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2018: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2019: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2020: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2021: }
2022: PetscCall(PetscFree(cols));
2023: PetscFunctionReturn(PETSC_SUCCESS);
2024: }
2026: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2027: {
2028: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2029: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2030: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2031: MPI_Comm comm;
2032: PetscScalar *values;
2033: DMBoundaryType bx, by, bz;
2034: DMDAStencilType st;
2035: ISLocalToGlobalMapping ltog;
2037: PetscFunctionBegin;
2038: /*
2039: nc - number of components per grid point
2040: col - number of colors needed in one direction for single component problem
2041: */
2042: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2043: col = 2 * s + 1;
2045: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2046: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2047: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2049: /* create the matrix */
2050: PetscCall(PetscMalloc1(col * col * col, &cols));
2052: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2054: /* determine the matrix preallocation information */
2055: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2056: for (i = xs; i < xs + nx; i++) {
2057: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2058: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2059: for (j = ys; j < ys + ny; j++) {
2060: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2061: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2062: for (k = zs; k < zs + nz; k++) {
2063: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2064: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2066: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2068: /* Find block columns in block row */
2069: cnt = 0;
2070: for (ii = istart; ii < iend + 1; ii++) {
2071: for (jj = jstart; jj < jend + 1; jj++) {
2072: for (kk = kstart; kk < kend + 1; kk++) {
2073: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2074: }
2075: }
2076: }
2077: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2078: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2079: }
2080: }
2081: }
2082: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2083: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2084: MatPreallocateEnd(dnz, onz);
2086: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2088: /*
2089: For each node in the grid: we get the neighbors in the local (on processor ordering
2090: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2091: PETSc ordering.
2092: */
2093: if (!da->prealloc_only) {
2094: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2095: for (i = xs; i < xs + nx; i++) {
2096: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2097: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2098: for (j = ys; j < ys + ny; j++) {
2099: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2100: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2101: for (k = zs; k < zs + nz; k++) {
2102: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2103: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2105: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2107: cnt = 0;
2108: for (ii = istart; ii < iend + 1; ii++) {
2109: for (jj = jstart; jj < jend + 1; jj++) {
2110: for (kk = kstart; kk < kend + 1; kk++) {
2111: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2112: }
2113: }
2114: }
2115: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2116: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2117: }
2118: }
2119: }
2120: PetscCall(PetscFree(values));
2121: /* do not copy values to GPU since they are all zero and not yet needed there */
2122: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2123: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2124: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2125: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2126: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2127: }
2128: PetscCall(PetscFree(cols));
2129: PetscFunctionReturn(PETSC_SUCCESS);
2130: }
2132: /* ---------------------------------------------------------------------------------*/
2134: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2135: {
2136: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2137: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2138: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2139: DM_DA *dd = (DM_DA *)da->data;
2140: PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2141: MPI_Comm comm;
2142: PetscScalar *values;
2143: DMBoundaryType bx, by, bz;
2144: ISLocalToGlobalMapping ltog;
2145: DMDAStencilType st;
2146: PetscBool removedups = PETSC_FALSE;
2148: PetscFunctionBegin;
2149: /*
2150: nc - number of components per grid point
2151: col - number of colors needed in one direction for single component problem
2152: */
2153: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2154: col = 2 * s + 1;
2156: /*
2157: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2158: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2159: */
2160: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2161: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2162: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2164: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2165: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2166: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2168: PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2169: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2171: /* determine the matrix preallocation information */
2172: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2174: PetscCall(MatSetBlockSize(J, nc));
2175: for (i = xs; i < xs + nx; i++) {
2176: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2177: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2178: for (j = ys; j < ys + ny; j++) {
2179: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2180: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2181: for (k = zs; k < zs + nz; k++) {
2182: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2183: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2185: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2187: for (l = 0; l < nc; l++) {
2188: cnt = 0;
2189: for (ii = istart; ii < iend + 1; ii++) {
2190: for (jj = jstart; jj < jend + 1; jj++) {
2191: for (kk = kstart; kk < kend + 1; kk++) {
2192: if (ii || jj || kk) {
2193: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2194: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2195: }
2196: } else {
2197: if (dfill) {
2198: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2199: } else {
2200: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2201: }
2202: }
2203: }
2204: }
2205: }
2206: row = l + nc * (slot);
2207: maxcnt = PetscMax(maxcnt, cnt);
2208: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2209: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2210: }
2211: }
2212: }
2213: }
2214: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2215: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2216: MatPreallocateEnd(dnz, onz);
2217: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2219: /*
2220: For each node in the grid: we get the neighbors in the local (on processor ordering
2221: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2222: PETSc ordering.
2223: */
2224: if (!da->prealloc_only) {
2225: PetscCall(PetscCalloc1(maxcnt, &values));
2226: for (i = xs; i < xs + nx; i++) {
2227: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2228: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2229: for (j = ys; j < ys + ny; j++) {
2230: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2231: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2232: for (k = zs; k < zs + nz; k++) {
2233: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2234: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2236: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2238: for (l = 0; l < nc; l++) {
2239: cnt = 0;
2240: for (ii = istart; ii < iend + 1; ii++) {
2241: for (jj = jstart; jj < jend + 1; jj++) {
2242: for (kk = kstart; kk < kend + 1; kk++) {
2243: if (ii || jj || kk) {
2244: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2245: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2246: }
2247: } else {
2248: if (dfill) {
2249: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2250: } else {
2251: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2252: }
2253: }
2254: }
2255: }
2256: }
2257: row = l + nc * (slot);
2258: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2259: }
2260: }
2261: }
2262: }
2263: PetscCall(PetscFree(values));
2264: /* do not copy values to GPU since they are all zero and not yet needed there */
2265: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2266: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2267: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2268: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2269: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2270: }
2271: PetscCall(PetscFree(cols));
2272: PetscFunctionReturn(PETSC_SUCCESS);
2273: }