Actual source code: fdda.c

  1: /*$Id: fdda.c,v 1.66 2001/04/16 03:16:27 bsmith Exp $*/
  2: 
 3:  #include petscda.h
 4:  #include petscmat.h
 5:  #include src/dm/da/daimpl.h

  7: EXTERN int DAGetColoring1d_MPIAIJ(DA,ISColoringType,ISColoring *,Mat *);
  8: EXTERN int DAGetColoring2d_MPIAIJ(DA,ISColoringType,ISColoring *,Mat *);
  9: EXTERN int DAGetColoring2d_5pt_MPIAIJ(DA,ISColoringType,ISColoring *,Mat *);
 10: EXTERN int DAGetColoring3d_MPIAIJ(DA,ISColoringType,ISColoring *,Mat *);
 11: EXTERN int DAGetColoring3d_MPIBAIJ(DA,ISColoringType,ISColoring *,Mat *);

 13: /*@C
 14:     DAGetColoring - Gets the coloring and nonzero structure required for computing the Jacobian via
 15:     finite differences on a function defined using a stencil on the DA.

 17:     Collective on DA

 19:     Input Parameter:
 20: +   da - the distributed array
 21: -   mtype - either MATMPIAIJ or MATMPIBAIJ

 23:     Output Parameters:
 24: +   coloring - matrix coloring for use in computing Jacobians (or PETSC_NULL if not needed)
 25: -   J  - matrix with the correct nonzero structure  (or PETSC_NULL if not needed)
 26:         (obviously without the correct Jacobian values)

 28:     Level: advanced

 30:     Notes: These compute the graph coloring of the graph of A^{T}A. The coloring used 
 31:    for efficient (parallel or thread based) triangular solves etc is NOT yet 
 32:    available. 

 34:    This does not yet handle BAIJ matrices, because
 35:       1) we need a way for the user to indicate what matrix type they want
 36:       2) after the matrix is created, for BAIJ matrices we need to set nc to 1 and
 37:          use MatSetValuesBlockedLocal() instead of MatSetValuesLocal()

 39: .seealso ISColoringView(), ISColoringGetIS(), MatFDColoringCreate(), DAGetColoringMPIBAIJ()

 41: @*/
 42: int DAGetColoring(DA da,ISColoringType ctype,MatType mtype,ISColoring *coloring,Mat *J)
 43: {
 44:   int        ierr,dim;
 45:   PetscTruth aij;

 48:   /*
 49:                                   m
 50:           ------------------------------------------------------
 51:          |                                                     |
 52:          |                                                     |
 53:          |               ----------------------                |
 54:          |               |                    |                |
 55:       n  |           yn  |                    |                |
 56:          |               |                    |                |
 57:          |               .---------------------                |
 58:          |             (xs,ys)     xn                          |
 59:          |            .                                        |
 60:          |         (gxs,gys)                                   |
 61:          |                                                     |
 62:           -----------------------------------------------------
 63:   */

 65:   /*     
 66:          nc - number of components per grid point 
 67:          col - number of colors needed in one direction for single component problem
 68:   
 69:   */
 70:   DAGetInfo(da,&dim,0,0,0,0,0,0,0,0,0,0);

 72:   /*
 73:      We do not provide a getcoloring function in the DA operations because 
 74:    the basic DA does not know about matrices. We think of DA as being more 
 75:    more low-level then matrices.
 76:   */
 77:   PetscStrcmp(MATMPIAIJ,mtype,&aij);
 78:   if (aij) {
 79:     if (dim == 1) {
 80:       DAGetColoring1d_MPIAIJ(da,ctype,coloring,J);
 81:     } else if (dim == 2) {
 82:        DAGetColoring2d_MPIAIJ(da,ctype,coloring,J);
 83:     } else if (dim == 3) {
 84:        DAGetColoring3d_MPIAIJ(da,ctype,coloring,J);
 85:     }
 86:   } else {
 87:     if (dim == 3) {
 88:        DAGetColoring3d_MPIBAIJ(da,ctype,coloring,J);
 89:   } else {
 90:       SETERRQ1(1,"Not done for %d dimension, send use mail petsc-maint@mcs.anl.gov for code",dim);
 91:     }
 92:   }
 93:   return(0);
 94: }

 96: /* ---------------------------------------------------------------------------------*/

 98: int DAGetColoring2d_MPIAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
 99: {
100:   int                    ierr,xs,ys,nx,ny,*colors,i,j,ii,slot,gxs,gys,gnx,gny;
101:   int                    m,n,dim,w,s,*cols,k,nc,*rows,col,cnt,l,p;
102:   int                    lstart,lend,pstart,pend,*dnz,*onz,size;
103:   MPI_Comm               comm;
104:   Scalar                 *values;
105:   DAPeriodicType         wrap;
106:   ISLocalToGlobalMapping ltog;
107:   DAStencilType          st;

110:   /*     
111:          nc - number of components per grid point 
112:          col - number of colors needed in one direction for single component problem
113:   
114:   */
115:   DAGetInfo(da,&dim,&m,&n,0,0,0,0,&w,&s,&wrap,&st);
116:   if (st == DA_STENCIL_STAR && s == 1) {
117:     DAGetColoring2d_5pt_MPIAIJ(da,ctype,coloring,J);
118:     return(0);
119:   }
120:   nc     = w;
121:   col    = 2*s + 1;
122:   if (DAXPeriodic(wrap) && (m % col)){
123:     SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisiblen
124:                  by 2*stencil_width + 1n");
125:   }
126:   if (DAYPeriodic(wrap) && (n % col)){
127:     SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisiblen
128:                  by 2*stencil_width + 1n");
129:   }
130:   DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
131:   DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
132:   PetscObjectGetComm((PetscObject)da,&comm);
133:   MPI_Comm_size(comm,&size);

135:   /* create the coloring */
136:   if (coloring) {
137:     if (ctype == IS_COLORING_GLOBAL) {
138:       PetscMalloc(nc*nx*ny*sizeof(int),&colors);
139:       ii = 0;
140:       for (j=ys; j<ys+ny; j++) {
141:         for (i=xs; i<xs+nx; i++) {
142:           for (k=0; k<nc; k++) {
143:             colors[ii++] = k + nc*((i % col) + col*(j % col));
144:           }
145:         }
146:       }
147:       ISColoringCreate(comm,nc*nx*ny,colors,coloring);
148:     } else if (ctype == IS_COLORING_LOCAL) {
149:       PetscMalloc(nc*gnx*gny*sizeof(int),&colors);
150:       ii = 0;
151:       for (j=gys; j<gys+gny; j++) {
152:         for (i=gxs; i<gxs+gnx; i++) {
153:           for (k=0; k<nc; k++) {
154:             colors[ii++] = k + nc*((i % col) + col*(j % col));
155:           }
156:         }
157:       }
158:       ISColoringCreate(comm,nc*gnx*gny,colors,coloring);
159:     } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
160:   }

162:   /* Create the matrix */
163:   if (J) {
164:     int bs = nc,dims[2],starts[2];
165:     /* create empty Jacobian matrix */
166:     ierr    = MatCreate(comm,nc*nx*ny,nc*nx*ny,PETSC_DECIDE,PETSC_DECIDE,J);

168:     PetscMalloc(col*col*nc*nc*sizeof(Scalar),&values);
169:     PetscMemzero(values,col*col*nc*nc*sizeof(Scalar));
170:     PetscMalloc(nc*sizeof(int),&rows);
171:     PetscMalloc(col*col*nc*nc*sizeof(int),&cols);
172:     DAGetISLocalToGlobalMapping(da,&ltog);

174:     /* determine the matrix preallocation information */
175:     MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
176:     for (i=xs; i<xs+nx; i++) {

178:       pstart = PetscMax(-s,-i);
179:       pend   = PetscMin(s,m-i-1);

181:       for (j=ys; j<ys+ny; j++) {
182:         slot = i - gxs + gnx*(j - gys);

184:         lstart = PetscMax(-s,-j);
185:         lend   = PetscMin(s,n-j-1);

187:         cnt  = 0;
188:         for (k=0; k<nc; k++) {
189:           for (l=lstart; l<lend+1; l++) {
190:             for (p=pstart; p<pend+1; p++) {
191:               if ((st == DA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
192:                 cols[cnt++]  = k + nc*(slot + gnx*l + p);
193:               }
194:             }
195:           }
196:           rows[k] = k + nc*(slot);
197:         }
198:         MatPreallocateSetLocal(ltog,nc,rows,cnt,cols,dnz,onz);
199:       }
200:     }
201:     /* set matrix type and preallocation information */
202:     if (size > 1) {
203:       MatSetType(*J,MATMPIAIJ);
204:     } else {
205:       MatSetType(*J,MATSEQAIJ);
206:     }
207:     MatSeqAIJSetPreallocation(*J,0,dnz);
208:     MatSeqBAIJSetPreallocation(*J,bs,0,dnz);
209:     MatMPIAIJSetPreallocation(*J,0,dnz,0,onz);
210:     MatMPIBAIJSetPreallocation(*J,bs,0,dnz,0,onz);
211:     MatPreallocateFinalize(dnz,onz);
212:     MatSetLocalToGlobalMapping(*J,ltog);
213:     DAGetGhostCorners(da,&starts[0],&starts[1],PETSC_IGNORE,&dims[0],&dims[1],PETSC_IGNORE);
214:     MatSetStencil(*J,2,dims,starts,nc);

216:     /*
217:       For each node in the grid: we get the neighbors in the local (on processor ordering
218:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
219:     PETSc ordering.
220:     */
221:     for (i=xs; i<xs+nx; i++) {

223:       pstart = PetscMax(-s,-i);
224:       pend   = PetscMin(s,m-i-1);

226:       for (j=ys; j<ys+ny; j++) {
227:         slot = i - gxs + gnx*(j - gys);

229:         lstart = PetscMax(-s,-j);
230:         lend   = PetscMin(s,n-j-1);

232:         cnt  = 0;
233:         for (k=0; k<nc; k++) {
234:           for (l=lstart; l<lend+1; l++) {
235:             for (p=pstart; p<pend+1; p++) {
236:               if ((st == DA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
237:                 cols[cnt++]  = k + nc*(slot + gnx*l + p);
238:               }
239:             }
240:           }
241:           rows[k]      = k + nc*(slot);
242:         }
243:         MatSetValuesLocal(*J,nc,rows,cnt,cols,values,INSERT_VALUES);
244:       }
245:     }
246:     PetscFree(values);
247:     PetscFree(rows);
248:     PetscFree(cols);
249:     MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
250:     MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
251:   }
252:   return(0);
253: }

255: /* ---------------------------------------------------------------------------------*/

257: int DAGetColoring3d_MPIAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
258: {
259:   int                    ierr,xs,ys,nx,ny,*colors,i,j,slot,gxs,gys,gnx,gny;
260:   int                    m,n,dim,s,*cols,k,nc,*rows,col,cnt,l,p,*dnz,*onz;
261:   int                    istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,size;
262:   MPI_Comm               comm;
263:   Scalar                 *values;
264:   DAPeriodicType         wrap;
265:   ISLocalToGlobalMapping ltog;
266:   DAStencilType          st;

269:   /*     
270:          nc - number of components per grid point 
271:          col - number of colors needed in one direction for single component problem
272:   
273:   */
274:   DAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);
275:   col    = 2*s + 1;
276:   if (DAXPeriodic(wrap) && (m % col)){
277:     SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisiblen
278:                  by 2*stencil_width + 1n");
279:   }
280:   if (DAYPeriodic(wrap) && (n % col)){
281:     SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisiblen
282:                  by 2*stencil_width + 1n");
283:   }
284:   if (DAZPeriodic(wrap) && (p % col)){
285:     SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisiblen
286:                  by 2*stencil_width + 1n");
287:   }

289:   DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
290:   DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
291:   PetscObjectGetComm((PetscObject)da,&comm);
292:   MPI_Comm_size(comm,&size);

294:   /* create the coloring */
295:   if (coloring) {
296:     if (ctype == IS_COLORING_GLOBAL) {
297:       PetscMalloc(nc*nx*ny*nz*sizeof(int),&colors);
298:       ii = 0;
299:       for (k=zs; k<zs+nz; k++) {
300:         for (j=ys; j<ys+ny; j++) {
301:           for (i=xs; i<xs+nx; i++) {
302:             for (l=0; l<nc; l++) {
303:               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
304:             }
305:           }
306:         }
307:       }
308:       ISColoringCreate(comm,nc*nx*ny*nz,colors,coloring);
309:     } else if (ctype == IS_COLORING_LOCAL) {
310:       PetscMalloc(nc*gnx*gny*gnz*sizeof(int),&colors);
311:       ii = 0;
312:       for (k=gzs; k<gzs+gnz; k++) {
313:         for (j=gys; j<gys+gny; j++) {
314:           for (i=gxs; i<gxs+gnx; i++) {
315:             for (l=0; l<nc; l++) {
316:               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
317:             }
318:           }
319:         }
320:       }
321:       ISColoringCreate(comm,nc*gnx*gny*gnz,colors,coloring);
322:     } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
323:   }

325:   /* create the matrix */
326:   if (J) {
327:     int bs = nc,dims[3],starts[3];
328:     /* create empty Jacobian matrix */
329:     MatCreate(comm,nc*nx*ny*nz,nc*nx*ny*nz,PETSC_DECIDE,PETSC_DECIDE,J);
330:     PetscMalloc(col*col*col*nc*nc*nc*sizeof(Scalar),&values);
331:     PetscMemzero(values,col*col*col*nc*nc*nc*sizeof(Scalar));
332:     PetscMalloc(nc*sizeof(int),&rows);
333:     PetscMalloc(col*col*col*nc*sizeof(int),&cols);
334:     DAGetISLocalToGlobalMapping(da,&ltog);

336:     /* determine the matrix preallocation information */
337:     MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
338:     for (i=xs; i<xs+nx; i++) {
339:       istart = PetscMax(-s,-i);
340:       iend   = PetscMin(s,m-i-1);
341:       for (j=ys; j<ys+ny; j++) {
342:         jstart = PetscMax(-s,-j);
343:         jend   = PetscMin(s,n-j-1);
344:         for (k=zs; k<zs+nz; k++) {
345:           kstart = PetscMax(-s,-k);
346:           kend   = PetscMin(s,p-k-1);

348:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

350:           cnt  = 0;
351:           for (l=0; l<nc; l++) {
352:             for (ii=istart; ii<iend+1; ii++) {
353:               for (jj=jstart; jj<jend+1; jj++) {
354:                 for (kk=kstart; kk<kend+1; kk++) {
355:                   if ((st == DA_STENCIL_BOX) || (!ii || !jj || !kk)) {  /* entries on star  */
356:                     cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
357:                   }
358:                 }
359:               }
360:             }
361:             rows[l] = l + nc*(slot);
362:           }
363:           MatPreallocateSetLocal(ltog,nc,rows,cnt,cols,dnz,onz);
364:         }
365:       }
366:     }
367:     /* set matrix type and preallocation */
368:     if (size > 1) {
369:       MatSetType(*J,MATMPIAIJ);
370:     } else {
371:       MatSetType(*J,MATSEQAIJ);
372:     }
373:     MatSeqAIJSetPreallocation(*J,0,dnz);
374:     MatSeqBAIJSetPreallocation(*J,bs,0,dnz);
375:     MatMPIAIJSetPreallocation(*J,0,dnz,0,onz);
376:     MatMPIBAIJSetPreallocation(*J,bs,0,dnz,0,onz);
377:     MatPreallocateFinalize(dnz,onz);
378:     MatSetLocalToGlobalMapping(*J,ltog);
379:     DAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
380:     MatSetStencil(*J,3,dims,starts,nc);

382:     /*
383:       For each node in the grid: we get the neighbors in the local (on processor ordering
384:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
385:     PETSc ordering.
386:     */
387:     for (i=xs; i<xs+nx; i++) {
388:       istart = PetscMax(-s,-i);
389:       iend   = PetscMin(s,m-i-1);
390:       for (j=ys; j<ys+ny; j++) {
391:         jstart = PetscMax(-s,-j);
392:         jend   = PetscMin(s,n-j-1);
393:         for (k=zs; k<zs+nz; k++) {
394:           kstart = PetscMax(-s,-k);
395:           kend   = PetscMin(s,p-k-1);

397:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

399:           cnt  = 0;
400:           for (l=0; l<nc; l++) {
401:             for (ii=istart; ii<iend+1; ii++) {
402:               for (jj=jstart; jj<jend+1; jj++) {
403:                 for (kk=kstart; kk<kend+1; kk++) {
404:                   if ((st == DA_STENCIL_BOX) || (!ii || !jj || !kk)) {  /* entries on star  */
405:                     cols[cnt++]  = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
406:                   }
407:                 }
408:               }
409:             }
410:             rows[l]      = l + nc*(slot);
411:           }
412:           MatSetValuesLocal(*J,nc,rows,cnt,cols,values,INSERT_VALUES);
413:         }
414:       }
415:     }
416:     PetscFree(values);
417:     PetscFree(rows);
418:     PetscFree(cols);
419:     MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
420:     MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
421:   }
422:   return(0);
423: }

425: /* ---------------------------------------------------------------------------------*/

427: int DAGetColoring1d_MPIAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
428: {
429:   int                    ierr,xs,nx,*colors,i,i1,slot,gxs,gnx;
430:   int                    m,dim,s,*cols,nc,*rows,col,cnt,l;
431:   int                    istart,iend,size;
432:   MPI_Comm               comm;
433:   Scalar                 *values;
434:   DAPeriodicType         wrap;
435:   ISLocalToGlobalMapping ltog;

438:   /*     
439:          nc - number of components per grid point 
440:          col - number of colors needed in one direction for single component problem
441:   
442:   */
443:   DAGetInfo(da,&dim,&m,0,0,0,0,0,&nc,&s,&wrap,0);
444:   col    = 2*s + 1;

446:   if (DAXPeriodic(wrap) && (m % col)) {
447:     SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points is divisiblen
448:                  by 2*stencil_width + 1n");
449:   }


452:   DAGetCorners(da,&xs,0,0,&nx,0,0);
453:   DAGetGhostCorners(da,&gxs,0,0,&gnx,0,0);
454:   PetscObjectGetComm((PetscObject)da,&comm);
455:   MPI_Comm_size(comm,&size);

457:   /* create the coloring */
458:   if (coloring) {
459:     if (ctype == IS_COLORING_GLOBAL) {
460:       PetscMalloc(nc*nx*sizeof(int),&colors);
461:       i1 = 0;
462:       for (i=xs; i<xs+nx; i++) {
463:         for (l=0; l<nc; l++) {
464:           colors[i1++] = l + nc*(i % col);
465:         }
466:       }
467:       ISColoringCreate(comm,nc*nx,colors,coloring);
468:     } else if (ctype == IS_COLORING_LOCAL) {
469:       PetscMalloc(nc*gnx*sizeof(int),&colors);
470:       i1 = 0;
471:       for (i=gxs; i<gxs+gnx; i++) {
472:         for (l=0; l<nc; l++) {
473:           colors[i1++] = l + nc*(i % col);
474:         }
475:       }
476:       ISColoringCreate(comm,nc*gnx,colors,coloring);
477:     } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
478:   }

480:   /* create empty Jacobian matrix */
481:   if (J) {
482:     int bs = nc,dims[1],starts[1];
483: 
484:     ierr    = MatCreate(comm,nc*nx,nc*nx,PETSC_DECIDE,PETSC_DECIDE,J);
485:     if (size > 1) {
486:       MatSetType(*J,MATMPIAIJ);
487:     } else {
488:       MatSetType(*J,MATSEQAIJ);
489:     }
490:     MatSeqAIJSetPreallocation(*J,col*nc,0);
491:     MatSeqBAIJSetPreallocation(*J,bs,col,0);
492:     MatMPIAIJSetPreallocation(*J,col*nc,0,0,0);
493:     MatMPIBAIJSetPreallocation(*J,bs,col,0,0,0);
494:     DAGetGhostCorners(da,&starts[0],PETSC_IGNORE,PETSC_IGNORE,&dims[0],PETSC_IGNORE,PETSC_IGNORE);
495:     MatSetStencil(*J,1,dims,starts,nc);

497:     PetscMalloc(col*nc*nc*sizeof(Scalar),&values);
498:     PetscMemzero(values,col*nc*nc*sizeof(Scalar));
499:     PetscMalloc(nc*sizeof(int),&rows);
500:     PetscMalloc(col*nc*sizeof(int),&cols);
501: 
502:     DAGetISLocalToGlobalMapping(da,&ltog);
503:     MatSetLocalToGlobalMapping(*J,ltog);

505:     /*
506:       For each node in the grid: we get the neighbors in the local (on processor ordering
507:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
508:     PETSc ordering.
509:     */
510:     for (i=xs; i<xs+nx; i++) {
511:       istart = PetscMax(-s,gxs - i);
512:       iend   = PetscMin(s,gxs + gnx - i - 1);
513:       slot   = i - gxs;

515:       cnt  = 0;
516:       for (l=0; l<nc; l++) {
517:         for (i1=istart; i1<iend+1; i1++) {
518:           cols[cnt++] = l + nc*(slot + i1);
519:         }
520:         rows[l]      = l + nc*(slot);
521:       }
522:       MatSetValuesLocal(*J,nc,rows,cnt,cols,values,INSERT_VALUES);
523:     }
524:     PetscFree(values);
525:     PetscFree(rows);
526:     PetscFree(cols);
527:     MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
528:     MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
529:   }
530:   return(0);
531: }

533: int DAGetColoring3d_MPIBAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
534: {
535:   int                    ierr,xs,ys,nx,ny,*colors,i,j,slot,gxs,gys,gnx,gny;
536:   int                    m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
537:   int                    istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
538:   MPI_Comm               comm;
539:   Scalar                 *values;
540:   DAPeriodicType         wrap;
541:   DAStencilType          st;
542:   ISLocalToGlobalMapping ltog;

545:   /*     
546:          nc - number of components per grid point 
547:          col - number of colors needed in one direction for single component problem
548:   
549:   */
550:   DAGetInfo(da,&dim,&m,&n,&p,0,0,0,&nc,&s,&wrap,&st);
551:   if (wrap != DA_NONPERIODIC) SETERRQ(PETSC_ERR_SUP,"Currently no support for periodic");
552:   col    = 2*s + 1;

554:   DAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
555:   DAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
556:   PetscObjectGetComm((PetscObject)da,&comm);

558:   /* create the coloring */
559:   if (coloring) {
560:     if (ctype == IS_COLORING_GLOBAL) {
561:       PetscMalloc(nx*ny*nz*sizeof(int),&colors);
562:       ii   = 0;
563:       for (k=zs; k<zs+nz; k++) {
564:         for (j=ys; j<ys+ny; j++) {
565:           for (i=xs; i<xs+nx; i++) {
566:             colors[ii++] = (i % col) + col*(j % col) + col*col*(k % col);
567:           }
568:         }
569:       }
570:       ISColoringCreate(comm,nx*ny*nz,colors,coloring);
571:     } else if (ctype == IS_COLORING_LOCAL) {
572:       PetscMalloc(gnx*gny*gnz*sizeof(int),&colors);
573:       ii   = 0;
574:       for (k=gzs; k<gzs+gnz; k++) {
575:         for (j=gys; j<gys+gny; j++) {
576:           for (i=gxs; i<gxs+gnx; i++) {
577:             colors[ii++] = (i % col) + col*(j % col) + col*col*(k % col);
578:           }
579:         }
580:       }
581:       ISColoringCreate(comm,gnx*gny*gnz,colors,coloring);
582:     } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
583:   }

585:   /* create the matrix */
586:   if (J) {

588:     ierr  = PetscMalloc(col*col*col*nc*nc*sizeof(Scalar),&values);
589:     ierr  = PetscMemzero(values,col*col*col*nc*nc*sizeof(Scalar));
590:     ierr  = PetscMalloc(col*col*col*sizeof(int),&cols);

592:     DAGetISLocalToGlobalMappingBlck(da,&ltog);

594:     /* determine the matrix preallocation information */
595:     MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
596:     for (i=xs; i<xs+nx; i++) {
597:       istart = PetscMax(-s,-i);
598:       iend   = PetscMin(s,m-i-1);
599:       for (j=ys; j<ys+ny; j++) {
600:         jstart = PetscMax(-s,-j);
601:         jend   = PetscMin(s,n-j-1);
602:         for (k=zs; k<zs+nz; k++) {
603:           kstart = PetscMax(-s,-k);
604:           kend   = PetscMin(s,p-k-1);

606:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

608:           /* Find block columns in block row */
609:           cnt  = 0;
610:           if (st == DA_STENCIL_BOX) {   /* if using BOX stencil */
611:             for (ii=istart; ii<iend+1; ii++) {
612:               for (jj=jstart; jj<jend+1; jj++) {
613:                 for (kk=kstart; kk<kend+1; kk++) {
614:                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
615:                 }
616:               }
617:             }
618:           } else {  /* Star stencil */
619:             cnt  = 0;
620:             for (ii=istart; ii<iend+1; ii++) {
621:               if (ii) {
622:                 /* jj and kk must be zero */
623:                 /* cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk; */
624:                 cols[cnt++]  = slot + ii;
625:               } else {
626:                 for (jj=jstart; jj<jend+1; jj++) {
627:                   if (jj) {
628:                   /* ii and kk must be zero */
629:                     cols[cnt++]  = slot + gnx*jj;
630:                   } else {
631:                     /* ii and jj must be zero */
632:                     for (kk=kstart; kk<kend+1; kk++) {
633:                       cols[cnt++]  = slot + gnx*gny*kk;
634:                     }
635:                   }
636:                 }
637:               }
638:             }
639:           }
640:           MatPreallocateSetLocal(ltog,1,&slot,cnt,cols,dnz,onz);
641:         }
642:       }
643:     }

645:         /* create empty Jacobian matrix */
646:     MatCreateMPIBAIJ(comm,nc,nc*nx*ny*nz,nc*nx*ny*nz,PETSC_DECIDE,
647:                             PETSC_DECIDE,0,dnz,0,onz,J);

649:     MatPreallocateFinalize(dnz,onz);
650:     MatSetLocalToGlobalMappingBlock(*J,ltog);

652:     /*
653:       For each node in the grid: we get the neighbors in the local (on processor ordering
654:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
655:     PETSc ordering.
656:     */

658:     for (i=xs; i<xs+nx; i++) {
659:       istart = PetscMax(-s,-i);
660:       iend   = PetscMin(s,m-i-1);
661:       for (j=ys; j<ys+ny; j++) {
662:         jstart = PetscMax(-s,-j);
663:         jend   = PetscMin(s,n-j-1);
664:         for (k=zs; k<zs+nz; k++) {
665:           kstart = PetscMax(-s,-k);
666:           kend   = PetscMin(s,p-k-1);

668:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

670:           cnt  = 0;
671:           if (st == DA_STENCIL_BOX) {   /* if using BOX stencil */
672:             for (ii=istart; ii<iend+1; ii++) {
673:               for (jj=jstart; jj<jend+1; jj++) {
674:                 for (kk=kstart; kk<kend+1; kk++) {
675:                   cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk;
676:                 }
677:               }
678:             }
679:           } else {  /* Star stencil */
680:             cnt  = 0;
681:             for (ii=istart; ii<iend+1; ii++) {
682:               if (ii) {
683:                 /* jj and kk must be zero */
684:                 /* cols[cnt++]  = slot + ii + gnx*jj + gnx*gny*kk; */
685:                 cols[cnt++]  = slot + ii;
686:               } else {
687:                 for (jj=jstart; jj<jend+1; jj++) {
688:                   if (jj) {
689:                   /* ii and kk must be zero */
690:                     cols[cnt++]  = slot + gnx*jj;
691:                   } else {
692:                     /* ii and jj must be zero */
693:                     for (kk=kstart; kk<kend+1; kk++) {
694:                       cols[cnt++]  = slot + gnx*gny*kk;
695:                     }
696:                   }
697:                 }
698:               }
699:             }
700:           }
701:           MatSetValuesBlockedLocal(*J,1,&slot,cnt,cols,values,INSERT_VALUES);
702:         }
703:       }
704:     }
705:     PetscFree(values);
706:     PetscFree(cols);
707:     MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
708:     MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
709:   }
710:   return(0);
711: }

713: int DAGetColoring2d_5pt_MPIAIJ(DA da,ISColoringType ctype,ISColoring *coloring,Mat *J)
714: {
715:   int                    ierr,xs,ys,nx,ny,*colors,i,j,ii,slot,gxs,gys,gnx,gny;
716:   int                    m,n,dim,w,s,*cols,k,nc,*rows,col,cnt,l,p;
717:   int                    lstart,lend,pstart,pend,*dnz,*onz,size;
718:   MPI_Comm               comm;
719:   Scalar                 *values;
720:   DAPeriodicType         wrap;
721:   ISLocalToGlobalMapping ltog;
722:   DAStencilType          st;

725:   /*     
726:          nc - number of components per grid point 
727:          col - number of colors needed in one direction for single component problem
728:   
729:   */
730:   DAGetInfo(da,&dim,&m,&n,0,0,0,0,&w,&s,&wrap,&st);
731:   nc     = w;
732:   col    = 2*s + 1;
733:   if (DAXPeriodic(wrap) && (m % col)){
734:     SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisiblen
735:                  by 2*stencil_width + 1n");
736:   }
737:   if (DAYPeriodic(wrap) && (n % col)){
738:     SETERRQ(PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisiblen
739:                  by 2*stencil_width + 1n");
740:   }
741:   DAGetCorners(da,&xs,&ys,0,&nx,&ny,0);
742:   DAGetGhostCorners(da,&gxs,&gys,0,&gnx,&gny,0);
743:   PetscObjectGetComm((PetscObject)da,&comm);
744:   MPI_Comm_size(comm,&size);

746:   /* create the coloring */
747:   if (coloring) {
748:     if (ctype == IS_COLORING_GLOBAL) {
749:       PetscMalloc(nc*nx*ny*sizeof(int),&colors);
750:       ii = 0;
751:       for (j=ys; j<ys+ny; j++) {
752:         for (i=xs; i<xs+nx; i++) {
753:           for (k=0; k<nc; k++) {
754:             colors[ii++] = k + nc*((i % col) + col*(j % col));
755:           }
756:         }
757:       }
758:       ISColoringCreate(comm,nc*nx*ny,colors,coloring);
759:     } else if (ctype == IS_COLORING_LOCAL) {
760:       PetscMalloc(nc*gnx*gny*sizeof(int),&colors);
761:       ii = 0;
762:       for (j=gys; j<gys+gny; j++) {
763:         for (i=gxs; i<gxs+gnx; i++) {
764:           for (k=0; k<nc; k++) {
765:             colors[ii++] = k + nc*((i % col) + col*(j % col));
766:           }
767:         }
768:       }
769:       ISColoringCreate(comm,nc*gnx*gny,colors,coloring);
770:     } else SETERRQ1(1,"Unknown ISColoringType %d",ctype);
771:   }

773:   /*
774:       This code below is identical to the general case; should reorganize
775:     the code to have less duplications
776:   */
777:   /* Create the matrix */
778:   if (J) {
779:     int bs = nc,dims[2],starts[2];
780:     /* create empty Jacobian matrix */
781:     ierr    = MatCreate(comm,nc*nx*ny,nc*nx*ny,PETSC_DECIDE,PETSC_DECIDE,J);

783:     PetscMalloc(col*col*nc*nc*sizeof(Scalar),&values);
784:     PetscMemzero(values,col*col*nc*nc*sizeof(Scalar));
785:     PetscMalloc(nc*sizeof(int),&rows);
786:     PetscMalloc(col*col*nc*nc*sizeof(int),&cols);
787:     DAGetISLocalToGlobalMapping(da,&ltog);

789:     /* determine the matrix preallocation information */
790:     MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
791:     for (i=xs; i<xs+nx; i++) {

793:       pstart = PetscMax(-s,-i);
794:       pend   = PetscMin(s,m-i-1);

796:       for (j=ys; j<ys+ny; j++) {
797:         slot = i - gxs + gnx*(j - gys);

799:         lstart = PetscMax(-s,-j);
800:         lend   = PetscMin(s,n-j-1);

802:         cnt  = 0;
803:         for (k=0; k<nc; k++) {
804:           for (l=lstart; l<lend+1; l++) {
805:             for (p=pstart; p<pend+1; p++) {
806:               if ((st == DA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
807:                 cols[cnt++]  = k + nc*(slot + gnx*l + p);
808:               }
809:             }
810:           }
811:           rows[k] = k + nc*(slot);
812:         }
813:         MatPreallocateSetLocal(ltog,nc,rows,cnt,cols,dnz,onz);
814:       }
815:     }
816:     /* set matrix type and preallocation information */
817:     if (size > 1) {
818:       MatSetType(*J,MATMPIAIJ);
819:     } else {
820:       MatSetType(*J,MATSEQAIJ);
821:     }
822:     MatSeqAIJSetPreallocation(*J,0,dnz);
823:     MatSeqBAIJSetPreallocation(*J,bs,0,dnz);
824:     MatMPIAIJSetPreallocation(*J,0,dnz,0,onz);
825:     MatMPIBAIJSetPreallocation(*J,bs,0,dnz,0,onz);
826:     MatPreallocateFinalize(dnz,onz);
827:     MatSetLocalToGlobalMapping(*J,ltog);
828:     DAGetGhostCorners(da,&starts[0],&starts[1],PETSC_IGNORE,&dims[0],&dims[1],PETSC_IGNORE);
829:     MatSetStencil(*J,2,dims,starts,nc);

831:     /*
832:       For each node in the grid: we get the neighbors in the local (on processor ordering
833:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
834:     PETSc ordering.
835:     */
836:     for (i=xs; i<xs+nx; i++) {

838:       pstart = PetscMax(-s,-i);
839:       pend   = PetscMin(s,m-i-1);

841:       for (j=ys; j<ys+ny; j++) {
842:         slot = i - gxs + gnx*(j - gys);

844:         lstart = PetscMax(-s,-j);
845:         lend   = PetscMin(s,n-j-1);

847:         cnt  = 0;
848:         for (k=0; k<nc; k++) {
849:           for (l=lstart; l<lend+1; l++) {
850:             for (p=pstart; p<pend+1; p++) {
851:               if ((st == DA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
852:                 cols[cnt++]  = k + nc*(slot + gnx*l + p);
853:               }
854:             }
855:           }
856:           rows[k]      = k + nc*(slot);
857:         }
858:         MatSetValuesLocal(*J,nc,rows,cnt,cols,values,INSERT_VALUES);
859:       }
860:     }
861:     PetscFree(values);
862:     PetscFree(rows);
863:     PetscFree(cols);
864:     MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
865:     MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
866:   }
867:   return(0);
868: }