Actual source code: fdda.c
1: /*$Id: fdda.c,v 1.66 2001/04/16 03:16:27 bsmith Exp $*/
2:
3: #include petscda.h
4: #include petscmat.h
5: #include src/dm/da/daimpl.h
7: EXTERN int DAGetColoring1d_MPIAIJ(DA,ISColoringType,ISColoring *,Mat *);
8: EXTERN int DAGetColoring2d_MPIAIJ(DA,ISColoringType,ISColoring *,Mat *);
9: EXTERN int DAGetColoring2d_5pt_MPIAIJ(DA,ISColoringType,ISColoring *,Mat *);
10: EXTERN int DAGetColoring3d_MPIAIJ(DA,ISColoringType,ISColoring *,Mat *);
11: EXTERN int DAGetColoring3d_MPIBAIJ(DA,ISColoringType,ISColoring *,Mat *);
13: /*@C
14: DAGetColoring - Gets the coloring and nonzero structure required for computing the Jacobian via
15: finite differences on a function defined using a stencil on the DA.
17: Collective on DA
19: Input Parameter:
20: + da - the distributed array
21: - mtype - either MATMPIAIJ or MATMPIBAIJ
23: Output Parameters:
24: + coloring - matrix coloring for use in computing Jacobians (or PETSC_NULL if not needed)
25: - J - matrix with the correct nonzero structure (or PETSC_NULL if not needed)
26: (obviously without the correct Jacobian values)
28: Level: advanced
30: Notes: These compute the graph coloring of the graph of A^{T}A. The coloring used
31: for efficient (parallel or thread based) triangular solves etc is NOT yet
32: available.
34: This does not yet handle BAIJ matrices, because
35: 1) we need a way for the user to indicate what matrix type they want
36: 2) after the matrix is created, for BAIJ matrices we need to set nc to 1 and
37: use MatSetValuesBlockedLocal() instead of MatSetValuesLocal()
39: .seealso ISColoringView(), ISColoringGetIS(), MatFDColoringCreate(), DAGetColoringMPIBAIJ()
41: @*/
42: int DAGetColoring(DA da,ISColoringType ctype,MatType mtype,ISColoring *coloring,Mat *J)
43: {
44: int ierr,dim;
45: PetscTruth aij;
48: /*
49: m
50: ------------------------------------------------------
51: | |
52: | |
53: | ---------------------- |
54: | | | |
55: n | yn | | |
56: | | | |
57: | .--------------------- |
58: | (xs,ys) xn |
59: | . |
60: | (gxs,gys) |
61: | |
62: -----------------------------------------------------
63: */
65: /*
66: nc - number of components per grid point
67: col - number of colors needed in one direction for single component problem
68:
69: */
70: DAGetInfo(da,&dim,0,0,0,0,0,0,0,0,0,0);
72: /*
73: We do not provide a getcoloring function in the DA operations because
74: the basic DA does not know about matrices. We think of DA as being more
75: more low-level then matrices.
76: */
77: PetscStrcmp(MATMPIAIJ,mtype,&aij);
78: if (aij) {
79: if (dim == 1) {
80: DAGetColoring1d_MPIAIJ(da,ctype,coloring,J);
81: } else if (dim == 2) {
82: DAGetColoring2d_MPIAIJ(da,ctype,coloring,J);
83: } else if (dim == 3) {
84: DAGetColoring3d_MPIAIJ(da,ctype,coloring,J);
85: }
86: } else {
87: if (dim == 3) {
88: DAGetColoring3d_MPIBAIJ(da,ctype,coloring,J);
89: } else {
90: SETERRQ1(1,"Not done for %d dimension, send use mail petsc-maint@mcs.anl.gov for code",dim);
91: }
92: }
93: return(0);
94: }
96: /* ---------------------------------------------------------------------------------*/
98: int DAGetColoring2d_MPIAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
99: {
100: int ierr,xs,ys,nx,ny,*colors,i,j,ii,slot,gxs,gys,gnx,gny;
101: int m,n,dim,w,s,*cols,k,nc,*rows,col,cnt,l,p;
102: int lstart,lend,pstart,pend,*dnz,*onz,size;
103: MPI_Comm comm;
104: Scalar *values;
105: DAPeriodicType wrap;
106: ISLocalToGlobalMapping ltog;
107: DAStencilType st;
110: /*
111: nc - number of components per grid point
112: col - number of colors needed in one direction for single component problem
113:
114: */
115: DAGetInfo(da,&dim,&m,&n,0,0,0,0,&w,&s,&wrap,&st);
116: if (st == DA_STENCIL_STAR && s == 1) {
117: DAGetColoring2d_5pt_MPIAIJ(da,ctype,coloring,J);
118: return(0);
119: }
120: nc = w;
121: col = 2*s + 1;
122: if (DAXPeriodic(wrap) && (m % col)){
123: SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisiblen
124: by 2*stencil_width + 1n");
125: }
126: if (DAYPeriodic(wrap) && (n % col)){
127: SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisiblen
128: by 2*stencil_width + 1n");
129: }
130: DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
131: DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
132: PetscObjectGetComm((PetscObject)da,&comm);
133: MPI_Comm_size(comm,&size);
135: /* create the coloring */
136: if (coloring) {
137: if (ctype == IS_COLORING_GLOBAL) {
138: PetscMalloc(nc*nx*ny*sizeof(int),&colors);
139: ii = 0;
140: for (j=ys; j<ys+ny; j++) {
141: for (i=xs; i<xs+nx; i++) {
142: for (k=0; k<nc; k++) {
143: colors[ii++] = k + nc*((i % col) + col*(j % col));
144: }
145: }
146: }
147: ISColoringCreate(comm,nc*nx*ny,colors,coloring);
148: } else if (ctype == IS_COLORING_LOCAL) {
149: PetscMalloc(nc*gnx*gny*sizeof(int),&colors);
150: ii = 0;
151: for (j=gys; j<gys+gny; j++) {
152: for (i=gxs; i<gxs+gnx; i++) {
153: for (k=0; k<nc; k++) {
154: colors[ii++] = k + nc*((i % col) + col*(j % col));
155: }
156: }
157: }
158: ISColoringCreate(comm,nc*gnx*gny,colors,coloring);
159: } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
160: }
162: /* Create the matrix */
163: if (J) {
164: int bs = nc,dims[2],starts[2];
165: /* create empty Jacobian matrix */
166: ierr = MatCreate(comm,nc*nx*ny,nc*nx*ny,PETSC_DECIDE,PETSC_DECIDE,J);
168: PetscMalloc(col*col*nc*nc*sizeof(Scalar),&values);
169: PetscMemzero(values,col*col*nc*nc*sizeof(Scalar));
170: PetscMalloc(nc*sizeof(int),&rows);
171: PetscMalloc(col*col*nc*nc*sizeof(int),&cols);
172: DAGetISLocalToGlobalMapping(da,<og);
174: /* determine the matrix preallocation information */
175: MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
176: for (i=xs; i<xs+nx; i++) {
178: pstart = PetscMax(-s,-i);
179: pend = PetscMin(s,m-i-1);
181: for (j=ys; j<ys+ny; j++) {
182: slot = i - gxs + gnx*(j - gys);
184: lstart = PetscMax(-s,-j);
185: lend = PetscMin(s,n-j-1);
187: cnt = 0;
188: for (k=0; k<nc; k++) {
189: for (l=lstart; l<lend+1; l++) {
190: for (p=pstart; p<pend+1; p++) {
191: if ((st == DA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
192: cols[cnt++] = k + nc*(slot + gnx*l + p);
193: }
194: }
195: }
196: rows[k] = k + nc*(slot);
197: }
198: MatPreallocateSetLocal(ltog,nc,rows,cnt,cols,dnz,onz);
199: }
200: }
201: /* set matrix type and preallocation information */
202: if (size > 1) {
203: MatSetType(*J,MATMPIAIJ);
204: } else {
205: MatSetType(*J,MATSEQAIJ);
206: }
207: MatSeqAIJSetPreallocation(*J,0,dnz);
208: MatSeqBAIJSetPreallocation(*J,bs,0,dnz);
209: MatMPIAIJSetPreallocation(*J,0,dnz,0,onz);
210: MatMPIBAIJSetPreallocation(*J,bs,0,dnz,0,onz);
211: MatPreallocateFinalize(dnz,onz);
212: MatSetLocalToGlobalMapping(*J,ltog);
213: DAGetGhostCorners(da,&starts[0],&starts[1],PETSC_IGNORE,&dims[0],&dims[1],PETSC_IGNORE);
214: MatSetStencil(*J,2,dims,starts,nc);
216: /*
217: For each node in the grid: we get the neighbors in the local (on processor ordering
218: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
219: PETSc ordering.
220: */
221: for (i=xs; i<xs+nx; i++) {
223: pstart = PetscMax(-s,-i);
224: pend = PetscMin(s,m-i-1);
226: for (j=ys; j<ys+ny; j++) {
227: slot = i - gxs + gnx*(j - gys);
229: lstart = PetscMax(-s,-j);
230: lend = PetscMin(s,n-j-1);
232: cnt = 0;
233: for (k=0; k<nc; k++) {
234: for (l=lstart; l<lend+1; l++) {
235: for (p=pstart; p<pend+1; p++) {
236: if ((st == DA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
237: cols[cnt++] = k + nc*(slot + gnx*l + p);
238: }
239: }
240: }
241: rows[k] = k + nc*(slot);
242: }
243: MatSetValuesLocal(*J,nc,rows,cnt,cols,values,INSERT_VALUES);
244: }
245: }
246: PetscFree(values);
247: PetscFree(rows);
248: PetscFree(cols);
249: MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
250: MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
251: }
252: return(0);
253: }
255: /* ---------------------------------------------------------------------------------*/
257: int DAGetColoring3d_MPIAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
258: {
259: int ierr,xs,ys,nx,ny,*colors,i,j,slot,gxs,gys,gnx,gny;
260: int m,n,dim,s,*cols,k,nc,*rows,col,cnt,l,p,*dnz,*onz;
261: int istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,size;
262: MPI_Comm comm;
263: Scalar *values;
264: DAPeriodicType wrap;
265: ISLocalToGlobalMapping ltog;
266: DAStencilType st;
269: /*
270: nc - number of components per grid point
271: col - number of colors needed in one direction for single component problem
272:
273: */
274: DAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);
275: col = 2*s + 1;
276: if (DAXPeriodic(wrap) && (m % col)){
277: SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisiblen
278: by 2*stencil_width + 1n");
279: }
280: if (DAYPeriodic(wrap) && (n % col)){
281: SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisiblen
282: by 2*stencil_width + 1n");
283: }
284: if (DAZPeriodic(wrap) && (p % col)){
285: SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisiblen
286: by 2*stencil_width + 1n");
287: }
289: DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
290: DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
291: PetscObjectGetComm((PetscObject)da,&comm);
292: MPI_Comm_size(comm,&size);
294: /* create the coloring */
295: if (coloring) {
296: if (ctype == IS_COLORING_GLOBAL) {
297: PetscMalloc(nc*nx*ny*nz*sizeof(int),&colors);
298: ii = 0;
299: for (k=zs; k<zs+nz; k++) {
300: for (j=ys; j<ys+ny; j++) {
301: for (i=xs; i<xs+nx; i++) {
302: for (l=0; l<nc; l++) {
303: colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
304: }
305: }
306: }
307: }
308: ISColoringCreate(comm,nc*nx*ny*nz,colors,coloring);
309: } else if (ctype == IS_COLORING_LOCAL) {
310: PetscMalloc(nc*gnx*gny*gnz*sizeof(int),&colors);
311: ii = 0;
312: for (k=gzs; k<gzs+gnz; k++) {
313: for (j=gys; j<gys+gny; j++) {
314: for (i=gxs; i<gxs+gnx; i++) {
315: for (l=0; l<nc; l++) {
316: colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
317: }
318: }
319: }
320: }
321: ISColoringCreate(comm,nc*gnx*gny*gnz,colors,coloring);
322: } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
323: }
325: /* create the matrix */
326: if (J) {
327: int bs = nc,dims[3],starts[3];
328: /* create empty Jacobian matrix */
329: MatCreate(comm,nc*nx*ny*nz,nc*nx*ny*nz,PETSC_DECIDE,PETSC_DECIDE,J);
330: PetscMalloc(col*col*col*nc*nc*nc*sizeof(Scalar),&values);
331: PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(Scalar));
332: PetscMalloc(nc*sizeof(int),&rows);
333: PetscMalloc(col*col*col*nc*sizeof(int),&cols);
334: DAGetISLocalToGlobalMapping(da,<og);
336: /* determine the matrix preallocation information */
337: MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
338: for (i=xs; i<xs+nx; i++) {
339: istart = PetscMax(-s,-i);
340: iend = PetscMin(s,m-i-1);
341: for (j=ys; j<ys+ny; j++) {
342: jstart = PetscMax(-s,-j);
343: jend = PetscMin(s,n-j-1);
344: for (k=zs; k<zs+nz; k++) {
345: kstart = PetscMax(-s,-k);
346: kend = PetscMin(s,p-k-1);
348: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
350: cnt = 0;
351: for (l=0; l<nc; l++) {
352: for (ii=istart; ii<iend+1; ii++) {
353: for (jj=jstart; jj<jend+1; jj++) {
354: for (kk=kstart; kk<kend+1; kk++) {
355: if ((st == DA_STENCIL_BOX) || (!ii || !jj || !kk)) { /* entries on star */
356: cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
357: }
358: }
359: }
360: }
361: rows[l] = l + nc*(slot);
362: }
363: MatPreallocateSetLocal(ltog,nc,rows,cnt,cols,dnz,onz);
364: }
365: }
366: }
367: /* set matrix type and preallocation */
368: if (size > 1) {
369: MatSetType(*J,MATMPIAIJ);
370: } else {
371: MatSetType(*J,MATSEQAIJ);
372: }
373: MatSeqAIJSetPreallocation(*J,0,dnz);
374: MatSeqBAIJSetPreallocation(*J,bs,0,dnz);
375: MatMPIAIJSetPreallocation(*J,0,dnz,0,onz);
376: MatMPIBAIJSetPreallocation(*J,bs,0,dnz,0,onz);
377: MatPreallocateFinalize(dnz,onz);
378: MatSetLocalToGlobalMapping(*J,ltog);
379: DAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
380: MatSetStencil(*J,3,dims,starts,nc);
382: /*
383: For each node in the grid: we get the neighbors in the local (on processor ordering
384: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
385: PETSc ordering.
386: */
387: for (i=xs; i<xs+nx; i++) {
388: istart = PetscMax(-s,-i);
389: iend = PetscMin(s,m-i-1);
390: for (j=ys; j<ys+ny; j++) {
391: jstart = PetscMax(-s,-j);
392: jend = PetscMin(s,n-j-1);
393: for (k=zs; k<zs+nz; k++) {
394: kstart = PetscMax(-s,-k);
395: kend = PetscMin(s,p-k-1);
397: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
399: cnt = 0;
400: for (l=0; l<nc; l++) {
401: for (ii=istart; ii<iend+1; ii++) {
402: for (jj=jstart; jj<jend+1; jj++) {
403: for (kk=kstart; kk<kend+1; kk++) {
404: if ((st == DA_STENCIL_BOX) || (!ii || !jj || !kk)) { /* entries on star */
405: cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
406: }
407: }
408: }
409: }
410: rows[l] = l + nc*(slot);
411: }
412: MatSetValuesLocal(*J,nc,rows,cnt,cols,values,INSERT_VALUES);
413: }
414: }
415: }
416: PetscFree(values);
417: PetscFree(rows);
418: PetscFree(cols);
419: MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
420: MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
421: }
422: return(0);
423: }
425: /* ---------------------------------------------------------------------------------*/
427: int DAGetColoring1d_MPIAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
428: {
429: int ierr,xs,nx,*colors,i,i1,slot,gxs,gnx;
430: int m,dim,s,*cols,nc,*rows,col,cnt,l;
431: int istart,iend,size;
432: MPI_Comm comm;
433: Scalar *values;
434: DAPeriodicType wrap;
435: ISLocalToGlobalMapping ltog;
438: /*
439: nc - number of components per grid point
440: col - number of colors needed in one direction for single component problem
441:
442: */
443: DAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&wrap,0);
444: col = 2*s + 1;
446: if (DAXPeriodic(wrap) && (m % col)) {
447: SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisiblen
448: by 2*stencil_width + 1n");
449: }
452: DAGetCorners(da,&xs,0,0,&nx,0,0);
453: DAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);
454: PetscObjectGetComm((PetscObject)da,&comm);
455: MPI_Comm_size(comm,&size);
457: /* create the coloring */
458: if (coloring) {
459: if (ctype == IS_COLORING_GLOBAL) {
460: PetscMalloc(nc*nx*sizeof(int),&colors);
461: i1 = 0;
462: for (i=xs; i<xs+nx; i++) {
463: for (l=0; l<nc; l++) {
464: colors[i1++] = l + nc*(i % col);
465: }
466: }
467: ISColoringCreate(comm,nc*nx,colors,coloring);
468: } else if (ctype == IS_COLORING_LOCAL) {
469: PetscMalloc(nc*gnx*sizeof(int),&colors);
470: i1 = 0;
471: for (i=gxs; i<gxs+gnx; i++) {
472: for (l=0; l<nc; l++) {
473: colors[i1++] = l + nc*(i % col);
474: }
475: }
476: ISColoringCreate(comm,nc*gnx,colors,coloring);
477: } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
478: }
480: /* create empty Jacobian matrix */
481: if (J) {
482: int bs = nc,dims[1],starts[1];
483:
484: ierr = MatCreate(comm,nc*nx,nc*nx,PETSC_DECIDE,PETSC_DECIDE,J);
485: if (size > 1) {
486: MatSetType(*J,MATMPIAIJ);
487: } else {
488: MatSetType(*J,MATSEQAIJ);
489: }
490: MatSeqAIJSetPreallocation(*J,col*nc,0);
491: MatSeqBAIJSetPreallocation(*J,bs,col,0);
492: MatMPIAIJSetPreallocation(*J,col*nc,0,0,0);
493: MatMPIBAIJSetPreallocation(*J,bs,col,0,0,0);
494: DAGetGhostCorners(da,&starts[0],PETSC_IGNORE,PETSC_IGNORE,&dims[0],PETSC_IGNORE,PETSC_IGNORE);
495: MatSetStencil(*J,1,dims,starts,nc);
497: PetscMalloc(col*nc*nc*sizeof(Scalar),&values);
498: PetscMemzero(values,col*nc*nc*sizeof(Scalar));
499: PetscMalloc(nc*sizeof(int),&rows);
500: PetscMalloc(col*nc*sizeof(int),&cols);
501:
502: DAGetISLocalToGlobalMapping(da,<og);
503: MatSetLocalToGlobalMapping(*J,ltog);
505: /*
506: For each node in the grid: we get the neighbors in the local (on processor ordering
507: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
508: PETSc ordering.
509: */
510: for (i=xs; i<xs+nx; i++) {
511: istart = PetscMax(-s,gxs - i);
512: iend = PetscMin(s,gxs + gnx - i - 1);
513: slot = i - gxs;
515: cnt = 0;
516: for (l=0; l<nc; l++) {
517: for (i1=istart; i1<iend+1; i1++) {
518: cols[cnt++] = l + nc*(slot + i1);
519: }
520: rows[l] = l + nc*(slot);
521: }
522: MatSetValuesLocal(*J,nc,rows,cnt,cols,values,INSERT_VALUES);
523: }
524: PetscFree(values);
525: PetscFree(rows);
526: PetscFree(cols);
527: MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
528: MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
529: }
530: return(0);
531: }
533: int DAGetColoring3d_MPIBAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
534: {
535: int ierr,xs,ys,nx,ny,*colors,i,j,slot,gxs,gys,gnx,gny;
536: int m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
537: int istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
538: MPI_Comm comm;
539: Scalar *values;
540: DAPeriodicType wrap;
541: DAStencilType st;
542: ISLocalToGlobalMapping ltog;
545: /*
546: nc - number of components per grid point
547: col - number of colors needed in one direction for single component problem
548:
549: */
550: DAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);
551: if (wrap != DA_NONPERIODIC) SETERRQ(PETSC_ERR_SUP,"Currently no support for periodic");
552: col = 2*s + 1;
554: DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
555: DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
556: PetscObjectGetComm((PetscObject)da,&comm);
558: /* create the coloring */
559: if (coloring) {
560: if (ctype == IS_COLORING_GLOBAL) {
561: PetscMalloc(nx*ny*nz*sizeof(int),&colors);
562: ii = 0;
563: for (k=zs; k<zs+nz; k++) {
564: for (j=ys; j<ys+ny; j++) {
565: for (i=xs; i<xs+nx; i++) {
566: colors[ii++] = (i % col) + col*(j % col) + col*col*(k % col);
567: }
568: }
569: }
570: ISColoringCreate(comm,nx*ny*nz,colors,coloring);
571: } else if (ctype == IS_COLORING_LOCAL) {
572: PetscMalloc(gnx*gny*gnz*sizeof(int),&colors);
573: ii = 0;
574: for (k=gzs; k<gzs+gnz; k++) {
575: for (j=gys; j<gys+gny; j++) {
576: for (i=gxs; i<gxs+gnx; i++) {
577: colors[ii++] = (i % col) + col*(j % col) + col*col*(k % col);
578: }
579: }
580: }
581: ISColoringCreate(comm,gnx*gny*gnz,colors,coloring);
582: } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
583: }
585: /* create the matrix */
586: if (J) {
588: ierr = PetscMalloc(col*col*col*nc*nc*sizeof(Scalar),&values);
589: ierr = PetscMemzero(values,col*col*col*nc*nc*sizeof(Scalar));
590: ierr = PetscMalloc(col*col*col*sizeof(int),&cols);
592: DAGetISLocalToGlobalMappingBlck(da,<og);
594: /* determine the matrix preallocation information */
595: MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
596: for (i=xs; i<xs+nx; i++) {
597: istart = PetscMax(-s,-i);
598: iend = PetscMin(s,m-i-1);
599: for (j=ys; j<ys+ny; j++) {
600: jstart = PetscMax(-s,-j);
601: jend = PetscMin(s,n-j-1);
602: for (k=zs; k<zs+nz; k++) {
603: kstart = PetscMax(-s,-k);
604: kend = PetscMin(s,p-k-1);
606: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
608: /* Find block columns in block row */
609: cnt = 0;
610: if (st == DA_STENCIL_BOX) { /* if using BOX stencil */
611: for (ii=istart; ii<iend+1; ii++) {
612: for (jj=jstart; jj<jend+1; jj++) {
613: for (kk=kstart; kk<kend+1; kk++) {
614: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
615: }
616: }
617: }
618: } else { /* Star stencil */
619: cnt = 0;
620: for (ii=istart; ii<iend+1; ii++) {
621: if (ii) {
622: /* jj and kk must be zero */
623: /* cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; */
624: cols[cnt++] = slot + ii;
625: } else {
626: for (jj=jstart; jj<jend+1; jj++) {
627: if (jj) {
628: /* ii and kk must be zero */
629: cols[cnt++] = slot + gnx*jj;
630: } else {
631: /* ii and jj must be zero */
632: for (kk=kstart; kk<kend+1; kk++) {
633: cols[cnt++] = slot + gnx*gny*kk;
634: }
635: }
636: }
637: }
638: }
639: }
640: MatPreallocateSetLocal(ltog,1,&slot,cnt,cols,dnz,onz);
641: }
642: }
643: }
645: /* create empty Jacobian matrix */
646: MatCreateMPIBAIJ(comm,nc,nc*nx*ny*nz,nc*nx*ny*nz,PETSC_DECIDE,
647: PETSC_DECIDE,0,dnz,0,onz,J);
649: MatPreallocateFinalize(dnz,onz);
650: MatSetLocalToGlobalMappingBlock(*J,ltog);
652: /*
653: For each node in the grid: we get the neighbors in the local (on processor ordering
654: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
655: PETSc ordering.
656: */
658: for (i=xs; i<xs+nx; i++) {
659: istart = PetscMax(-s,-i);
660: iend = PetscMin(s,m-i-1);
661: for (j=ys; j<ys+ny; j++) {
662: jstart = PetscMax(-s,-j);
663: jend = PetscMin(s,n-j-1);
664: for (k=zs; k<zs+nz; k++) {
665: kstart = PetscMax(-s,-k);
666: kend = PetscMin(s,p-k-1);
668: slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);
670: cnt = 0;
671: if (st == DA_STENCIL_BOX) { /* if using BOX stencil */
672: for (ii=istart; ii<iend+1; ii++) {
673: for (jj=jstart; jj<jend+1; jj++) {
674: for (kk=kstart; kk<kend+1; kk++) {
675: cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
676: }
677: }
678: }
679: } else { /* Star stencil */
680: cnt = 0;
681: for (ii=istart; ii<iend+1; ii++) {
682: if (ii) {
683: /* jj and kk must be zero */
684: /* cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; */
685: cols[cnt++] = slot + ii;
686: } else {
687: for (jj=jstart; jj<jend+1; jj++) {
688: if (jj) {
689: /* ii and kk must be zero */
690: cols[cnt++] = slot + gnx*jj;
691: } else {
692: /* ii and jj must be zero */
693: for (kk=kstart; kk<kend+1; kk++) {
694: cols[cnt++] = slot + gnx*gny*kk;
695: }
696: }
697: }
698: }
699: }
700: }
701: MatSetValuesBlockedLocal(*J,1,&slot,cnt,cols,values,INSERT_VALUES);
702: }
703: }
704: }
705: PetscFree(values);
706: PetscFree(cols);
707: MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
708: MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
709: }
710: return(0);
711: }
713: int DAGetColoring2d_5pt_MPIAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
714: {
715: int ierr,xs,ys,nx,ny,*colors,i,j,ii,slot,gxs,gys,gnx,gny;
716: int m,n,dim,w,s,*cols,k,nc,*rows,col,cnt,l,p;
717: int lstart,lend,pstart,pend,*dnz,*onz,size;
718: MPI_Comm comm;
719: Scalar *values;
720: DAPeriodicType wrap;
721: ISLocalToGlobalMapping ltog;
722: DAStencilType st;
725: /*
726: nc - number of components per grid point
727: col - number of colors needed in one direction for single component problem
728:
729: */
730: DAGetInfo(da,&dim,&m,&n,0,0,0,0,&w,&s,&wrap,&st);
731: nc = w;
732: col = 2*s + 1;
733: if (DAXPeriodic(wrap) && (m % col)){
734: SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisiblen
735: by 2*stencil_width + 1n");
736: }
737: if (DAYPeriodic(wrap) && (n % col)){
738: SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisiblen
739: by 2*stencil_width + 1n");
740: }
741: DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
742: DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
743: PetscObjectGetComm((PetscObject)da,&comm);
744: MPI_Comm_size(comm,&size);
746: /* create the coloring */
747: if (coloring) {
748: if (ctype == IS_COLORING_GLOBAL) {
749: PetscMalloc(nc*nx*ny*sizeof(int),&colors);
750: ii = 0;
751: for (j=ys; j<ys+ny; j++) {
752: for (i=xs; i<xs+nx; i++) {
753: for (k=0; k<nc; k++) {
754: colors[ii++] = k + nc*((i % col) + col*(j % col));
755: }
756: }
757: }
758: ISColoringCreate(comm,nc*nx*ny,colors,coloring);
759: } else if (ctype == IS_COLORING_LOCAL) {
760: PetscMalloc(nc*gnx*gny*sizeof(int),&colors);
761: ii = 0;
762: for (j=gys; j<gys+gny; j++) {
763: for (i=gxs; i<gxs+gnx; i++) {
764: for (k=0; k<nc; k++) {
765: colors[ii++] = k + nc*((i % col) + col*(j % col));
766: }
767: }
768: }
769: ISColoringCreate(comm,nc*gnx*gny,colors,coloring);
770: } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
771: }
773: /*
774: This code below is identical to the general case; should reorganize
775: the code to have less duplications
776: */
777: /* Create the matrix */
778: if (J) {
779: int bs = nc,dims[2],starts[2];
780: /* create empty Jacobian matrix */
781: ierr = MatCreate(comm,nc*nx*ny,nc*nx*ny,PETSC_DECIDE,PETSC_DECIDE,J);
783: PetscMalloc(col*col*nc*nc*sizeof(Scalar),&values);
784: PetscMemzero(values,col*col*nc*nc*sizeof(Scalar));
785: PetscMalloc(nc*sizeof(int),&rows);
786: PetscMalloc(col*col*nc*nc*sizeof(int),&cols);
787: DAGetISLocalToGlobalMapping(da,<og);
789: /* determine the matrix preallocation information */
790: MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
791: for (i=xs; i<xs+nx; i++) {
793: pstart = PetscMax(-s,-i);
794: pend = PetscMin(s,m-i-1);
796: for (j=ys; j<ys+ny; j++) {
797: slot = i - gxs + gnx*(j - gys);
799: lstart = PetscMax(-s,-j);
800: lend = PetscMin(s,n-j-1);
802: cnt = 0;
803: for (k=0; k<nc; k++) {
804: for (l=lstart; l<lend+1; l++) {
805: for (p=pstart; p<pend+1; p++) {
806: if ((st == DA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
807: cols[cnt++] = k + nc*(slot + gnx*l + p);
808: }
809: }
810: }
811: rows[k] = k + nc*(slot);
812: }
813: MatPreallocateSetLocal(ltog,nc,rows,cnt,cols,dnz,onz);
814: }
815: }
816: /* set matrix type and preallocation information */
817: if (size > 1) {
818: MatSetType(*J,MATMPIAIJ);
819: } else {
820: MatSetType(*J,MATSEQAIJ);
821: }
822: MatSeqAIJSetPreallocation(*J,0,dnz);
823: MatSeqBAIJSetPreallocation(*J,bs,0,dnz);
824: MatMPIAIJSetPreallocation(*J,0,dnz,0,onz);
825: MatMPIBAIJSetPreallocation(*J,bs,0,dnz,0,onz);
826: MatPreallocateFinalize(dnz,onz);
827: MatSetLocalToGlobalMapping(*J,ltog);
828: DAGetGhostCorners(da,&starts[0],&starts[1],PETSC_IGNORE,&dims[0],&dims[1],PETSC_IGNORE);
829: MatSetStencil(*J,2,dims,starts,nc);
831: /*
832: For each node in the grid: we get the neighbors in the local (on processor ordering
833: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
834: PETSc ordering.
835: */
836: for (i=xs; i<xs+nx; i++) {
838: pstart = PetscMax(-s,-i);
839: pend = PetscMin(s,m-i-1);
841: for (j=ys; j<ys+ny; j++) {
842: slot = i - gxs + gnx*(j - gys);
844: lstart = PetscMax(-s,-j);
845: lend = PetscMin(s,n-j-1);
847: cnt = 0;
848: for (k=0; k<nc; k++) {
849: for (l=lstart; l<lend+1; l++) {
850: for (p=pstart; p<pend+1; p++) {
851: if ((st == DA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
852: cols[cnt++] = k + nc*(slot + gnx*l + p);
853: }
854: }
855: }
856: rows[k] = k + nc*(slot);
857: }
858: MatSetValuesLocal(*J,nc,rows,cnt,cols,values,INSERT_VALUES);
859: }
860: }
861: PetscFree(values);
862: PetscFree(rows);
863: PetscFree(cols);
864: MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
865: MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
866: }
867: return(0);
868: }