Actual source code: mpiaij.c

  1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/

 3:  #include src/mat/impls/aij/mpi/mpiaij.h
 4:  #include src/vec/vecimpl.h
 5:  #include src/inline/spops.h

  7: /* 
  8:   Local utility routine that creates a mapping from the global column 
  9: number to the local number in the off-diagonal part of the local 
 10: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at 
 11: a slightly higher hash table cost; without it it is not scalable (each processor
 12: has an order N integer array but is fast to acess.
 13: */
 16: int CreateColmap_MPIAIJ_Private(Mat mat)
 17: {
 18:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
 19:   int        n = aij->B->n,i,ierr;

 22: #if defined (PETSC_USE_CTABLE)
 23:   PetscTableCreate(n,&aij->colmap);
 24:   for (i=0; i<n; i++){
 25:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
 26:   }
 27: #else
 28:   PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
 29:   PetscLogObjectMemory(mat,mat->N*sizeof(int));
 30:   PetscMemzero(aij->colmap,mat->N*sizeof(int));
 31:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
 32: #endif
 33:   return(0);
 34: }

 36: #define CHUNKSIZE   15
 37: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
 38: { \
 39:  \
 40:     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
 41:     rmax = aimax[row]; nrow = ailen[row];  \
 42:     col1 = col - shift; \
 43:      \
 44:     low = 0; high = nrow; \
 45:     while (high-low > 5) { \
 46:       t = (low+high)/2; \
 47:       if (rp[t] > col) high = t; \
 48:       else             low  = t; \
 49:     } \
 50:       for (_i=low; _i<high; _i++) { \
 51:         if (rp[_i] > col1) break; \
 52:         if (rp[_i] == col1) { \
 53:           if (addv == ADD_VALUES) ap[_i] += value;   \
 54:           else                  ap[_i] = value; \
 55:           goto a_noinsert; \
 56:         } \
 57:       }  \
 58:       if (nonew == 1) goto a_noinsert; \
 59:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
 60:       if (nrow >= rmax) { \
 61:         /* there is no extra room in row, therefore enlarge */ \
 62:         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
 63:         PetscScalar *new_a; \
 64:  \
 65:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
 66:  \
 67:         /* malloc new storage space */ \
 68:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
 69:         PetscMalloc(len,&new_a); \
 70:         new_j   = (int*)(new_a + new_nz); \
 71:         new_i   = new_j + new_nz; \
 72:  \
 73:         /* copy over old data into new slots */ \
 74:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
 75:         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
 76:         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
 77:         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
 78:         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
 79:                                                            len*sizeof(int)); \
 80:         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); \
 81:         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
 82:                                                            len*sizeof(PetscScalar));  \
 83:         /* free up old matrix storage */ \
 84:  \
 85:         PetscFree(a->a);  \
 86:         if (!a->singlemalloc) { \
 87:            PetscFree(a->i); \
 88:            PetscFree(a->j); \
 89:         } \
 90:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
 91:         a->singlemalloc = PETSC_TRUE; \
 92:  \
 93:         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
 94:         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
 95:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
 96:         a->maxnz += CHUNKSIZE; \
 97:         a->reallocs++; \
 98:       } \
 99:       N = nrow++ - 1; a->nz++; \
100:       /* shift up all the later entries in this row */ \
101:       for (ii=N; ii>=_i; ii--) { \
102:         rp[ii+1] = rp[ii]; \
103:         ap[ii+1] = ap[ii]; \
104:       } \
105:       rp[_i] = col1;  \
106:       ap[_i] = value;  \
107:       a_noinsert: ; \
108:       ailen[row] = nrow; \
109: } 

111: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112: { \
113:  \
114:     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115:     rmax = bimax[row]; nrow = bilen[row];  \
116:     col1 = col - shift; \
117:      \
118:     low = 0; high = nrow; \
119:     while (high-low > 5) { \
120:       t = (low+high)/2; \
121:       if (rp[t] > col) high = t; \
122:       else             low  = t; \
123:     } \
124:        for (_i=low; _i<high; _i++) { \
125:         if (rp[_i] > col1) break; \
126:         if (rp[_i] == col1) { \
127:           if (addv == ADD_VALUES) ap[_i] += value;   \
128:           else                  ap[_i] = value; \
129:           goto b_noinsert; \
130:         } \
131:       }  \
132:       if (nonew == 1) goto b_noinsert; \
133:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
134:       if (nrow >= rmax) { \
135:         /* there is no extra room in row, therefore enlarge */ \
136:         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
137:         PetscScalar *new_a; \
138:  \
139:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
140:  \
141:         /* malloc new storage space */ \
142:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
143:         PetscMalloc(len,&new_a); \
144:         new_j   = (int*)(new_a + new_nz); \
145:         new_i   = new_j + new_nz; \
146:  \
147:         /* copy over old data into new slots */ \
148:         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
149:         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
150:         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
151:         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
152:         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
153:                                                            len*sizeof(int)); \
154:         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); \
155:         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
156:                                                            len*sizeof(PetscScalar));  \
157:         /* free up old matrix storage */ \
158:  \
159:         PetscFree(b->a);  \
160:         if (!b->singlemalloc) { \
161:           PetscFree(b->i); \
162:           PetscFree(b->j); \
163:         } \
164:         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165:         b->singlemalloc = PETSC_TRUE; \
166:  \
167:         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168:         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169:         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
170:         b->maxnz += CHUNKSIZE; \
171:         b->reallocs++; \
172:       } \
173:       N = nrow++ - 1; b->nz++; \
174:       /* shift up all the later entries in this row */ \
175:       for (ii=N; ii>=_i; ii--) { \
176:         rp[ii+1] = rp[ii]; \
177:         ap[ii+1] = ap[ii]; \
178:       } \
179:       rp[_i] = col1;  \
180:       ap[_i] = value;  \
181:       b_noinsert: ; \
182:       bilen[row] = nrow; \
183: }

187: int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
188: {
189:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
190:   PetscScalar  value;
191:   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
192:   int          cstart = aij->cstart,cend = aij->cend,row,col;
193:   PetscTruth   roworiented = aij->roworiented;

195:   /* Some Variables required in the macro */
196:   Mat          A = aij->A;
197:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198:   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199:   PetscScalar  *aa = a->a;
200:   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201:   Mat          B = aij->B;
202:   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203:   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204:   PetscScalar  *ba = b->a;

206:   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207:   int          nonew = a->nonew,shift=0;
208:   PetscScalar  *ap;

211:   for (i=0; i<m; i++) {
212:     if (im[i] < 0) continue;
213: #if defined(PETSC_USE_BOPT_g)
214:     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215: #endif
216:     if (im[i] >= rstart && im[i] < rend) {
217:       row = im[i] - rstart;
218:       for (j=0; j<n; j++) {
219:         if (in[j] >= cstart && in[j] < cend){
220:           col = in[j] - cstart;
221:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222:           if (ignorezeroentries && value == 0.0) continue;
223:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224:           /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
225:         } else if (in[j] < 0) continue;
226: #if defined(PETSC_USE_BOPT_g)
227:         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228: #endif
229:         else {
230:           if (mat->was_assembled) {
231:             if (!aij->colmap) {
232:               CreateColmap_MPIAIJ_Private(mat);
233:             }
234: #if defined (PETSC_USE_CTABLE)
235:             PetscTableFind(aij->colmap,in[j]+1,&col);
236:             col--;
237: #else
238:             col = aij->colmap[in[j]] - 1;
239: #endif
240:             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241:               DisAssemble_MPIAIJ(mat);
242:               col =  in[j];
243:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244:               B = aij->B;
245:               b = (Mat_SeqAIJ*)B->data;
246:               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247:               ba = b->a;
248:             }
249:           } else col = in[j];
250:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251:           if (ignorezeroentries && value == 0.0) continue;
252:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253:           /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
254:         }
255:       }
256:     } else {
257:       if (!aij->donotstash) {
258:         if (roworiented) {
259:           if (ignorezeroentries && v[i*n] == 0.0) continue;
260:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
261:         } else {
262:           if (ignorezeroentries && v[i] == 0.0) continue;
263:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
264:         }
265:       }
266:     }
267:   }
268:   return(0);
269: }

273: int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274: {
275:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276:   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
277:   int        cstart = aij->cstart,cend = aij->cend,row,col;

280:   for (i=0; i<m; i++) {
281:     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
282:     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
283:     if (idxm[i] >= rstart && idxm[i] < rend) {
284:       row = idxm[i] - rstart;
285:       for (j=0; j<n; j++) {
286:         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
287:         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
288:         if (idxn[j] >= cstart && idxn[j] < cend){
289:           col = idxn[j] - cstart;
290:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
291:         } else {
292:           if (!aij->colmap) {
293:             CreateColmap_MPIAIJ_Private(mat);
294:           }
295: #if defined (PETSC_USE_CTABLE)
296:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
297:           col --;
298: #else
299:           col = aij->colmap[idxn[j]] - 1;
300: #endif
301:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
302:           else {
303:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
304:           }
305:         }
306:       }
307:     } else {
308:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
309:     }
310:   }
311:   return(0);
312: }

316: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317: {
318:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
319:   int         ierr,nstash,reallocs;
320:   InsertMode  addv;

323:   if (aij->donotstash) {
324:     return(0);
325:   }

327:   /* make sure all processors are either in INSERTMODE or ADDMODE */
328:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
329:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
330:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331:   }
332:   mat->insertmode = addv; /* in case this processor had no cache */

334:   MatStashScatterBegin_Private(&mat->stash,aij->rowners);
335:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
336:   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
337:   return(0);
338: }


343: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
344: {
345:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
346:   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
347:   int         i,j,rstart,ncols,n,ierr,flg;
348:   int         *row,*col,other_disassembled;
349:   PetscScalar *val;
350:   InsertMode  addv = mat->insertmode;

353:   if (!aij->donotstash) {
354:     while (1) {
355:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
356:       if (!flg) break;

358:       for (i=0; i<n;) {
359:         /* Now identify the consecutive vals belonging to the same row */
360:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361:         if (j < n) ncols = j-i;
362:         else       ncols = n-i;
363:         /* Now assemble all these values with a single function call */
364:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
365:         i = j;
366:       }
367:     }
368:     MatStashScatterEnd_Private(&mat->stash);
369:   }
370: 
371:   MatAssemblyBegin(aij->A,mode);
372:   MatAssemblyEnd(aij->A,mode);

374:   /* determine if any processor has disassembled, if so we must 
375:      also disassemble ourselfs, in order that we may reassemble. */
376:   /*
377:      if nonzero structure of submatrix B cannot change then we know that
378:      no processor disassembled thus we can skip this stuff
379:   */
380:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
381:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
382:     if (mat->was_assembled && !other_disassembled) {
383:       DisAssemble_MPIAIJ(mat);
384:     }
385:   }

387:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388:     MatSetUpMultiply_MPIAIJ(mat);
389:   }
390:   MatAssemblyBegin(aij->B,mode);
391:   MatAssemblyEnd(aij->B,mode);

393:   if (aij->rowvalues) {
394:     PetscFree(aij->rowvalues);
395:     aij->rowvalues = 0;
396:   }

398:   /* used by MatAXPY() */
399:   a->xtoy = 0; b->xtoy = 0;
400:   a->XtoY = 0; b->XtoY = 0;

402:   return(0);
403: }

407: int MatZeroEntries_MPIAIJ(Mat A)
408: {
409:   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
410:   int        ierr;

413:   MatZeroEntries(l->A);
414:   MatZeroEntries(l->B);
415:   return(0);
416: }

420: int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
421: {
422:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
423:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
424:   int            *nprocs,j,idx,nsends,row;
425:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
426:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
427:   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
428:   MPI_Comm       comm = A->comm;
429:   MPI_Request    *send_waits,*recv_waits;
430:   MPI_Status     recv_status,*send_status;
431:   IS             istmp;
432:   PetscTruth     found;

435:   ISGetLocalSize(is,&N);
436:   ISGetIndices(is,&rows);

438:   /*  first count number of contributors to each processor */
439:   PetscMalloc(2*size*sizeof(int),&nprocs);
440:   PetscMemzero(nprocs,2*size*sizeof(int));
441:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
442:   for (i=0; i<N; i++) {
443:     idx = rows[i];
444:     found = PETSC_FALSE;
445:     for (j=0; j<size; j++) {
446:       if (idx >= owners[j] && idx < owners[j+1]) {
447:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
448:       }
449:     }
450:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
451:   }
452:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

454:   /* inform other processors of number of messages and max length*/
455:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

457:   /* post receives:   */
458:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
459:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
460:   for (i=0; i<nrecvs; i++) {
461:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
462:   }

464:   /* do sends:
465:       1) starts[i] gives the starting index in svalues for stuff going to 
466:          the ith processor
467:   */
468:   PetscMalloc((N+1)*sizeof(int),&svalues);
469:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
470:   PetscMalloc((size+1)*sizeof(int),&starts);
471:   starts[0] = 0;
472:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
473:   for (i=0; i<N; i++) {
474:     svalues[starts[owner[i]]++] = rows[i];
475:   }
476:   ISRestoreIndices(is,&rows);

478:   starts[0] = 0;
479:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480:   count = 0;
481:   for (i=0; i<size; i++) {
482:     if (nprocs[2*i+1]) {
483:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
484:     }
485:   }
486:   PetscFree(starts);

488:   base = owners[rank];

490:   /*  wait on receives */
491:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
492:   source = lens + nrecvs;
493:   count  = nrecvs; slen = 0;
494:   while (count) {
495:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
496:     /* unpack receives into our local space */
497:     MPI_Get_count(&recv_status,MPI_INT,&n);
498:     source[imdex]  = recv_status.MPI_SOURCE;
499:     lens[imdex]    = n;
500:     slen          += n;
501:     count--;
502:   }
503:   PetscFree(recv_waits);
504: 
505:   /* move the data into the send scatter */
506:   PetscMalloc((slen+1)*sizeof(int),&lrows);
507:   count = 0;
508:   for (i=0; i<nrecvs; i++) {
509:     values = rvalues + i*nmax;
510:     for (j=0; j<lens[i]; j++) {
511:       lrows[count++] = values[j] - base;
512:     }
513:   }
514:   PetscFree(rvalues);
515:   PetscFree(lens);
516:   PetscFree(owner);
517:   PetscFree(nprocs);
518: 
519:   /* actually zap the local rows */
520:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
521:   PetscLogObjectParent(A,istmp);

523:   /*
524:         Zero the required rows. If the "diagonal block" of the matrix
525:      is square and the user wishes to set the diagonal we use seperate
526:      code so that MatSetValues() is not called for each diagonal allocating
527:      new memory, thus calling lots of mallocs and slowing things down.

529:        Contributed by: Mathew Knepley
530:   */
531:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532:   MatZeroRows(l->B,istmp,0);
533:   if (diag && (l->A->M == l->A->N)) {
534:     MatZeroRows(l->A,istmp,diag);
535:   } else if (diag) {
536:     MatZeroRows(l->A,istmp,0);
537:     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
539: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540:     }
541:     for (i = 0; i < slen; i++) {
542:       row  = lrows[i] + rstart;
543:       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
544:     }
545:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
546:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
547:   } else {
548:     MatZeroRows(l->A,istmp,0);
549:   }
550:   ISDestroy(istmp);
551:   PetscFree(lrows);

553:   /* wait on sends */
554:   if (nsends) {
555:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
556:     MPI_Waitall(nsends,send_waits,send_status);
557:     PetscFree(send_status);
558:   }
559:   PetscFree(send_waits);
560:   PetscFree(svalues);

562:   return(0);
563: }

567: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
568: {
569:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
570:   int        ierr,nt;

573:   VecGetLocalSize(xx,&nt);
574:   if (nt != A->n) {
575:     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
576:   }
577:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
578:   (*a->A->ops->mult)(a->A,xx,yy);
579:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
580:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
581:   return(0);
582: }

586: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
587: {
588:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589:   int        ierr;

592:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
593:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
594:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
595:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
596:   return(0);
597: }

601: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602: {
603:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604:   int        ierr;

607:   /* do nondiagonal part */
608:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
609:   /* send it on its way */
610:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
611:   /* do local part */
612:   (*a->A->ops->multtranspose)(a->A,xx,yy);
613:   /* receive remote parts: note this assumes the values are not actually */
614:   /* inserted in yy until the next line, which is true for my implementation*/
615:   /* but is not perhaps always true. */
616:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
617:   return(0);
618: }

620: EXTERN_C_BEGIN
623: int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
624: {
625:   MPI_Comm comm;
626:   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
627:   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
628:   IS         Me,Notme;
629:   int        M,N,first,last,*notme,ntids,i, ierr;


633:   /* Easy test: symmetric diagonal block */
634:   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
635:   MatIsSymmetric(Adia,Bdia,f);
636:   if (!*f) return(0);
637:   PetscObjectGetComm((PetscObject)Amat,&comm);
638:   MPI_Comm_size(comm,&ntids);
639:   if (ntids==1) return(0);

641:   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
642:   MatGetSize(Amat,&M,&N);
643:   MatGetOwnershipRange(Amat,&first,&last);
644:   PetscMalloc((N-last+first)*sizeof(int),&notme);
645:   for (i=0; i<first; i++) notme[i] = i;
646:   for (i=last; i<M; i++) notme[i-last+first] = i;
647:   ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
648:   ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
649:   MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
650:   Aoff = Aoffs[0];
651:   MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
652:   Boff = Boffs[0];
653:   MatIsSymmetric(Aoff,Boff,f);
654:   MatDestroyMatrices(1,&Aoffs);
655:   MatDestroyMatrices(1,&Boffs);
656:   ISDestroy(Me);
657:   ISDestroy(Notme);

659:   return(0);
660: }
661: EXTERN_C_END

665: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
666: {
667:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
668:   int        ierr;

671:   /* do nondiagonal part */
672:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
673:   /* send it on its way */
674:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
675:   /* do local part */
676:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
677:   /* receive remote parts: note this assumes the values are not actually */
678:   /* inserted in yy until the next line, which is true for my implementation*/
679:   /* but is not perhaps always true. */
680:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
681:   return(0);
682: }

684: /*
685:   This only works correctly for square matrices where the subblock A->A is the 
686:    diagonal block
687: */
690: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
691: {
692:   int        ierr;
693:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

696:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
697:   if (a->rstart != a->cstart || a->rend != a->cend) {
698:     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
699:   }
700:   MatGetDiagonal(a->A,v);
701:   return(0);
702: }

706: int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
707: {
708:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
709:   int        ierr;

712:   MatScale(aa,a->A);
713:   MatScale(aa,a->B);
714:   return(0);
715: }

719: int MatDestroy_MPIAIJ(Mat mat)
720: {
721:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
722:   int        ierr;

725: #if defined(PETSC_USE_LOG)
726:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
727: #endif
728:   MatStashDestroy_Private(&mat->stash);
729:   PetscFree(aij->rowners);
730:   MatDestroy(aij->A);
731:   MatDestroy(aij->B);
732: #if defined (PETSC_USE_CTABLE)
733:   if (aij->colmap) {PetscTableDelete(aij->colmap);}
734: #else
735:   if (aij->colmap) {PetscFree(aij->colmap);}
736: #endif
737:   if (aij->garray) {PetscFree(aij->garray);}
738:   if (aij->lvec)   {VecDestroy(aij->lvec);}
739:   if (aij->Mvctx)  {VecScatterDestroy(aij->Mvctx);}
740:   if (aij->rowvalues) {PetscFree(aij->rowvalues);}
741:   PetscFree(aij);
742:   return(0);
743: }

747: int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
748: {
749:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
750:   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
751:   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
752:   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
753:   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
754:   PetscScalar       *column_values;

757:   MPI_Comm_rank(mat->comm,&rank);
758:   MPI_Comm_size(mat->comm,&size);
759:   nz   = A->nz + B->nz;
760:   if (rank == 0) {
761:     header[0] = MAT_FILE_COOKIE;
762:     header[1] = mat->M;
763:     header[2] = mat->N;
764:     MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);
765:     PetscViewerBinaryGetDescriptor(viewer,&fd);
766:     PetscBinaryWrite(fd,header,4,PETSC_INT,1);
767:     /* get largest number of rows any processor has */
768:     rlen = mat->m;
769:     PetscMapGetGlobalRange(mat->rmap,&range);
770:     for (i=1; i<size; i++) {
771:       rlen = PetscMax(rlen,range[i+1] - range[i]);
772:     }
773:   } else {
774:     MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);
775:     rlen = mat->m;
776:   }

778:   /* load up the local row counts */
779:   PetscMalloc((rlen+1)*sizeof(int),&row_lengths);
780:   for (i=0; i<mat->m; i++) {
781:     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
782:   }

784:   /* store the row lengths to the file */
785:   if (rank == 0) {
786:     MPI_Status status;
787:     PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);
788:     for (i=1; i<size; i++) {
789:       rlen = range[i+1] - range[i];
790:       MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);
791:       PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);
792:     }
793:   } else {
794:     MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);
795:   }
796:   PetscFree(row_lengths);

798:   /* load up the local column indices */
799:   nzmax = nz; /* )th processor needs space a largest processor needs */
800:   MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);
801:   PetscMalloc((nzmax+1)*sizeof(int),&column_indices);
802:   cnt  = 0;
803:   for (i=0; i<mat->m; i++) {
804:     for (j=B->i[i]; j<B->i[i+1]; j++) {
805:       if ( (col = garray[B->j[j]]) > cstart) break;
806:       column_indices[cnt++] = col;
807:     }
808:     for (k=A->i[i]; k<A->i[i+1]; k++) {
809:       column_indices[cnt++] = A->j[k] + cstart;
810:     }
811:     for (; j<B->i[i+1]; j++) {
812:       column_indices[cnt++] = garray[B->j[j]];
813:     }
814:   }
815:   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);

817:   /* store the column indices to the file */
818:   if (rank == 0) {
819:     MPI_Status status;
820:     PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);
821:     for (i=1; i<size; i++) {
822:       MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
823:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
824:       MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);
825:       PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);
826:     }
827:   } else {
828:     MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
829:     MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);
830:   }
831:   PetscFree(column_indices);

833:   /* load up the local column values */
834:   PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
835:   cnt  = 0;
836:   for (i=0; i<mat->m; i++) {
837:     for (j=B->i[i]; j<B->i[i+1]; j++) {
838:       if ( garray[B->j[j]] > cstart) break;
839:       column_values[cnt++] = B->a[j];
840:     }
841:     for (k=A->i[i]; k<A->i[i+1]; k++) {
842:       column_values[cnt++] = A->a[k];
843:     }
844:     for (; j<B->i[i+1]; j++) {
845:       column_values[cnt++] = B->a[j];
846:     }
847:   }
848:   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);

850:   /* store the column values to the file */
851:   if (rank == 0) {
852:     MPI_Status status;
853:     PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);
854:     for (i=1; i<size; i++) {
855:       MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
856:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
857:       MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
858:       PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);
859:     }
860:   } else {
861:     MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
862:     MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
863:   }
864:   PetscFree(column_values);
865:   return(0);
866: }

870: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
871: {
872:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
873:   int               ierr,rank = aij->rank,size = aij->size;
874:   PetscTruth        isdraw,isascii,flg,isbinary;
875:   PetscViewer       sviewer;
876:   PetscViewerFormat format;

879:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
880:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
881:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
882:   if (isascii) {
883:     PetscViewerGetFormat(viewer,&format);
884:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885:       MatInfo info;
886:       MPI_Comm_rank(mat->comm,&rank);
887:       MatGetInfo(mat,MAT_LOCAL,&info);
888:       PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
889:       if (flg) {
890:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
891:                                               rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
892:       } else {
893:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
894:                     rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
895:       }
896:       MatGetInfo(aij->A,MAT_LOCAL,&info);
897:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
898:       MatGetInfo(aij->B,MAT_LOCAL,&info);
899:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
900:       PetscViewerFlush(viewer);
901:       VecScatterView(aij->Mvctx,viewer);
902:       return(0);
903:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904:       return(0);
905:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
906:       return(0);
907:     }
908:   } else if (isbinary) {
909:     if (size == 1) {
910:       PetscObjectSetName((PetscObject)aij->A,mat->name);
911:       MatView(aij->A,viewer);
912:     } else {
913:       MatView_MPIAIJ_Binary(mat,viewer);
914:     }
915:     return(0);
916:   } else if (isdraw) {
917:     PetscDraw  draw;
918:     PetscTruth isnull;
919:     PetscViewerDrawGetDraw(viewer,0,&draw);
920:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
921:   }

923:   if (size == 1) {
924:     PetscObjectSetName((PetscObject)aij->A,mat->name);
925:     MatView(aij->A,viewer);
926:   } else {
927:     /* assemble the entire matrix onto first processor. */
928:     Mat         A;
929:     Mat_SeqAIJ *Aloc;
930:     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
931:     PetscScalar *a;

933:     if (!rank) {
934:       MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
935:     } else {
936:       MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
937:     }
938:     PetscLogObjectParent(mat,A);

940:     /* copy over the A part */
941:     Aloc = (Mat_SeqAIJ*)aij->A->data;
942:     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943:     row = aij->rstart;
944:     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
945:     for (i=0; i<m; i++) {
946:       MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
947:       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
948:     }
949:     aj = Aloc->j;
950:     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}

952:     /* copy over the B part */
953:     Aloc = (Mat_SeqAIJ*)aij->B->data;
954:     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955:     row  = aij->rstart;
956:     PetscMalloc((ai[m]+1)*sizeof(int),&cols);
957:     ct   = cols;
958:     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
959:     for (i=0; i<m; i++) {
960:       MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
961:       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
962:     }
963:     PetscFree(ct);
964:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
965:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
966:     /* 
967:        Everyone has to call to draw the matrix since the graphics waits are
968:        synchronized across all processors that share the PetscDraw object
969:     */
970:     PetscViewerGetSingleton(viewer,&sviewer);
971:     if (!rank) {
972:       PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
973:       MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
974:     }
975:     PetscViewerRestoreSingleton(viewer,&sviewer);
976:     MatDestroy(A);
977:   }
978:   return(0);
979: }

983: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
984: {
985:   int        ierr;
986:   PetscTruth isascii,isdraw,issocket,isbinary;
987: 
989:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
990:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
991:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
992:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
993:   if (isascii || isdraw || isbinary || issocket) {
994:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
995:   } else {
996:     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
997:   }
998:   return(0);
999: }



1005: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1006: {
1007:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1008:   int          ierr;
1009:   Vec          bb1;
1010:   PetscScalar  mone=-1.0;

1013:   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);

1015:   VecDuplicate(bb,&bb1);

1017:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1018:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1019:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1020:       its--;
1021:     }
1022: 
1023:     while (its--) {
1024:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1025:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1027:       /* update rhs: bb1 = bb - B*x */
1028:       VecScale(&mone,mat->lvec);
1029:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1031:       /* local sweep */
1032:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1033: 
1034:     }
1035:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1036:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1037:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1038:       its--;
1039:     }
1040:     while (its--) {
1041:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1042:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1044:       /* update rhs: bb1 = bb - B*x */
1045:       VecScale(&mone,mat->lvec);
1046:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1048:       /* local sweep */
1049:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1050: 
1051:     }
1052:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1053:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1054:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1055:       its--;
1056:     }
1057:     while (its--) {
1058:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1059:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1061:       /* update rhs: bb1 = bb - B*x */
1062:       VecScale(&mone,mat->lvec);
1063:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1065:       /* local sweep */
1066:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1067: 
1068:     }
1069:   } else {
1070:     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1071:   }

1073:   VecDestroy(bb1);
1074:   return(0);
1075: }

1079: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080: {
1081:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1082:   Mat        A = mat->A,B = mat->B;
1083:   int        ierr;
1084:   PetscReal  isend[5],irecv[5];

1087:   info->block_size     = 1.0;
1088:   MatGetInfo(A,MAT_LOCAL,info);
1089:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090:   isend[3] = info->memory;  isend[4] = info->mallocs;
1091:   MatGetInfo(B,MAT_LOCAL,info);
1092:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093:   isend[3] += info->memory;  isend[4] += info->mallocs;
1094:   if (flag == MAT_LOCAL) {
1095:     info->nz_used      = isend[0];
1096:     info->nz_allocated = isend[1];
1097:     info->nz_unneeded  = isend[2];
1098:     info->memory       = isend[3];
1099:     info->mallocs      = isend[4];
1100:   } else if (flag == MAT_GLOBAL_MAX) {
1101:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1102:     info->nz_used      = irecv[0];
1103:     info->nz_allocated = irecv[1];
1104:     info->nz_unneeded  = irecv[2];
1105:     info->memory       = irecv[3];
1106:     info->mallocs      = irecv[4];
1107:   } else if (flag == MAT_GLOBAL_SUM) {
1108:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1109:     info->nz_used      = irecv[0];
1110:     info->nz_allocated = irecv[1];
1111:     info->nz_unneeded  = irecv[2];
1112:     info->memory       = irecv[3];
1113:     info->mallocs      = irecv[4];
1114:   }
1115:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1116:   info->fill_ratio_needed = 0;
1117:   info->factor_mallocs    = 0;
1118:   info->rows_global       = (double)matin->M;
1119:   info->columns_global    = (double)matin->N;
1120:   info->rows_local        = (double)matin->m;
1121:   info->columns_local     = (double)matin->N;

1123:   return(0);
1124: }

1128: int MatSetOption_MPIAIJ(Mat A,MatOption op)
1129: {
1130:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1131:   int        ierr;

1134:   switch (op) {
1135:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1136:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1137:   case MAT_COLUMNS_UNSORTED:
1138:   case MAT_COLUMNS_SORTED:
1139:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1140:   case MAT_KEEP_ZEROED_ROWS:
1141:   case MAT_NEW_NONZERO_LOCATION_ERR:
1142:   case MAT_USE_INODES:
1143:   case MAT_DO_NOT_USE_INODES:
1144:   case MAT_IGNORE_ZERO_ENTRIES:
1145:     MatSetOption(a->A,op);
1146:     MatSetOption(a->B,op);
1147:     break;
1148:   case MAT_ROW_ORIENTED:
1149:     a->roworiented = PETSC_TRUE;
1150:     MatSetOption(a->A,op);
1151:     MatSetOption(a->B,op);
1152:     break;
1153:   case MAT_ROWS_SORTED:
1154:   case MAT_ROWS_UNSORTED:
1155:   case MAT_YES_NEW_DIAGONALS:
1156:     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1157:     break;
1158:   case MAT_COLUMN_ORIENTED:
1159:     a->roworiented = PETSC_FALSE;
1160:     MatSetOption(a->A,op);
1161:     MatSetOption(a->B,op);
1162:     break;
1163:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1164:     a->donotstash = PETSC_TRUE;
1165:     break;
1166:   case MAT_NO_NEW_DIAGONALS:
1167:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1168:   default:
1169:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1170:   }
1171:   return(0);
1172: }

1176: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1177: {
1178:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1179:   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1180:   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1181:   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1182:   int          *cmap,*idx_p;

1185:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1186:   mat->getrowactive = PETSC_TRUE;

1188:   if (!mat->rowvalues && (idx || v)) {
1189:     /*
1190:         allocate enough space to hold information from the longest row.
1191:     */
1192:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1193:     int     max = 1,tmp;
1194:     for (i=0; i<matin->m; i++) {
1195:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1196:       if (max < tmp) { max = tmp; }
1197:     }
1198:     PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1199:     mat->rowindices = (int*)(mat->rowvalues + max);
1200:   }

1202:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1203:   lrow = row - rstart;

1205:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1206:   if (!v)   {pvA = 0; pvB = 0;}
1207:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1208:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1209:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1210:   nztot = nzA + nzB;

1212:   cmap  = mat->garray;
1213:   if (v  || idx) {
1214:     if (nztot) {
1215:       /* Sort by increasing column numbers, assuming A and B already sorted */
1216:       int imark = -1;
1217:       if (v) {
1218:         *v = v_p = mat->rowvalues;
1219:         for (i=0; i<nzB; i++) {
1220:           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1221:           else break;
1222:         }
1223:         imark = i;
1224:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1225:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1226:       }
1227:       if (idx) {
1228:         *idx = idx_p = mat->rowindices;
1229:         if (imark > -1) {
1230:           for (i=0; i<imark; i++) {
1231:             idx_p[i] = cmap[cworkB[i]];
1232:           }
1233:         } else {
1234:           for (i=0; i<nzB; i++) {
1235:             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1236:             else break;
1237:           }
1238:           imark = i;
1239:         }
1240:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1241:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1242:       }
1243:     } else {
1244:       if (idx) *idx = 0;
1245:       if (v)   *v   = 0;
1246:     }
1247:   }
1248:   *nz = nztot;
1249:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1250:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1251:   return(0);
1252: }

1256: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1257: {
1258:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1261:   if (aij->getrowactive == PETSC_FALSE) {
1262:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1263:   }
1264:   aij->getrowactive = PETSC_FALSE;
1265:   return(0);
1266: }

1270: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1271: {
1272:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1273:   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1274:   int          ierr,i,j,cstart = aij->cstart;
1275:   PetscReal    sum = 0.0;
1276:   PetscScalar  *v;

1279:   if (aij->size == 1) {
1280:      MatNorm(aij->A,type,norm);
1281:   } else {
1282:     if (type == NORM_FROBENIUS) {
1283:       v = amat->a;
1284:       for (i=0; i<amat->nz; i++) {
1285: #if defined(PETSC_USE_COMPLEX)
1286:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1287: #else
1288:         sum += (*v)*(*v); v++;
1289: #endif
1290:       }
1291:       v = bmat->a;
1292:       for (i=0; i<bmat->nz; i++) {
1293: #if defined(PETSC_USE_COMPLEX)
1294:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1295: #else
1296:         sum += (*v)*(*v); v++;
1297: #endif
1298:       }
1299:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1300:       *norm = sqrt(*norm);
1301:     } else if (type == NORM_1) { /* max column norm */
1302:       PetscReal *tmp,*tmp2;
1303:       int    *jj,*garray = aij->garray;
1304:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1305:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1306:       PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1307:       *norm = 0.0;
1308:       v = amat->a; jj = amat->j;
1309:       for (j=0; j<amat->nz; j++) {
1310:         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1311:       }
1312:       v = bmat->a; jj = bmat->j;
1313:       for (j=0; j<bmat->nz; j++) {
1314:         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1315:       }
1316:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1317:       for (j=0; j<mat->N; j++) {
1318:         if (tmp2[j] > *norm) *norm = tmp2[j];
1319:       }
1320:       PetscFree(tmp);
1321:       PetscFree(tmp2);
1322:     } else if (type == NORM_INFINITY) { /* max row norm */
1323:       PetscReal ntemp = 0.0;
1324:       for (j=0; j<aij->A->m; j++) {
1325:         v = amat->a + amat->i[j];
1326:         sum = 0.0;
1327:         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1328:           sum += PetscAbsScalar(*v); v++;
1329:         }
1330:         v = bmat->a + bmat->i[j];
1331:         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1332:           sum += PetscAbsScalar(*v); v++;
1333:         }
1334:         if (sum > ntemp) ntemp = sum;
1335:       }
1336:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1337:     } else {
1338:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1339:     }
1340:   }
1341:   return(0);
1342: }

1346: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1347: {
1348:   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1349:   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1350:   int          ierr;
1351:   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1352:   Mat          B;
1353:   PetscScalar  *array;

1356:   if (!matout && M != N) {
1357:     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1358:   }

1360:   MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);

1362:   /* copy over the A part */
1363:   Aloc = (Mat_SeqAIJ*)a->A->data;
1364:   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1365:   row = a->rstart;
1366:   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1367:   for (i=0; i<m; i++) {
1368:     MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1369:     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1370:   }
1371:   aj = Aloc->j;
1372:   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}

1374:   /* copy over the B part */
1375:   Aloc = (Mat_SeqAIJ*)a->B->data;
1376:   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1377:   row  = a->rstart;
1378:   PetscMalloc((1+ai[m])*sizeof(int),&cols);
1379:   ct   = cols;
1380:   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1381:   for (i=0; i<m; i++) {
1382:     MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1383:     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1384:   }
1385:   PetscFree(ct);
1386:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1387:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1388:   if (matout) {
1389:     *matout = B;
1390:   } else {
1391:     MatHeaderCopy(A,B);
1392:   }
1393:   return(0);
1394: }

1398: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1399: {
1400:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1401:   Mat        a = aij->A,b = aij->B;
1402:   int        ierr,s1,s2,s3;

1405:   MatGetLocalSize(mat,&s2,&s3);
1406:   if (rr) {
1407:     VecGetLocalSize(rr,&s1);
1408:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1409:     /* Overlap communication with computation. */
1410:     VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1411:   }
1412:   if (ll) {
1413:     VecGetLocalSize(ll,&s1);
1414:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1415:     (*b->ops->diagonalscale)(b,ll,0);
1416:   }
1417:   /* scale  the diagonal block */
1418:   (*a->ops->diagonalscale)(a,ll,rr);

1420:   if (rr) {
1421:     /* Do a scatter end and then right scale the off-diagonal block */
1422:     VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1423:     (*b->ops->diagonalscale)(b,0,aij->lvec);
1424:   }
1425: 
1426:   return(0);
1427: }


1432: int MatPrintHelp_MPIAIJ(Mat A)
1433: {
1434:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1435:   int        ierr;

1438:   if (!a->rank) {
1439:     MatPrintHelp_SeqAIJ(a->A);
1440:   }
1441:   return(0);
1442: }

1446: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1447: {
1449:   *bs = 1;
1450:   return(0);
1451: }
1454: int MatSetUnfactored_MPIAIJ(Mat A)
1455: {
1456:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1457:   int        ierr;

1460:   MatSetUnfactored(a->A);
1461:   return(0);
1462: }

1466: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1467: {
1468:   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1469:   Mat        a,b,c,d;
1470:   PetscTruth flg;
1471:   int        ierr;

1474:   a = matA->A; b = matA->B;
1475:   c = matB->A; d = matB->B;

1477:   MatEqual(a,c,&flg);
1478:   if (flg == PETSC_TRUE) {
1479:     MatEqual(b,d,&flg);
1480:   }
1481:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1482:   return(0);
1483: }

1487: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1488: {
1489:   int        ierr;
1490:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1491:   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;

1494:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1495:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1496:     /* because of the column compression in the off-processor part of the matrix a->B,
1497:        the number of columns in a->B and b->B may be different, hence we cannot call
1498:        the MatCopy() directly on the two parts. If need be, we can provide a more 
1499:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1500:        then copying the submatrices */
1501:     MatCopy_Basic(A,B,str);
1502:   } else {
1503:     MatCopy(a->A,b->A,str);
1504:     MatCopy(a->B,b->B,str);
1505:   }
1506:   return(0);
1507: }

1511: int MatSetUpPreallocation_MPIAIJ(Mat A)
1512: {
1513:   int        ierr;

1516:    MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1517:   return(0);
1518: }

1520:  #include petscblaslapack.h
1523: int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1524: {
1525:   int        ierr,one=1,i;
1526:   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1527:   Mat_SeqAIJ *x,*y;

1530:   if (str == SAME_NONZERO_PATTERN) {
1531:     x = (Mat_SeqAIJ *)xx->A->data;
1532:     y = (Mat_SeqAIJ *)yy->A->data;
1533:     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1534:     x = (Mat_SeqAIJ *)xx->B->data;
1535:     y = (Mat_SeqAIJ *)yy->B->data;
1536:     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1537:   } else if (str == SUBSET_NONZERO_PATTERN) {
1538:     MatAXPY_SeqAIJ(a,xx->A,yy->A,str);

1540:     x = (Mat_SeqAIJ *)xx->B->data;
1541:     y = (Mat_SeqAIJ *)yy->B->data;
1542:     if (y->xtoy && y->XtoY != xx->B) {
1543:       PetscFree(y->xtoy);
1544:       MatDestroy(y->XtoY);
1545:     }
1546:     if (!y->xtoy) { /* get xtoy */
1547:       MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1548:       y->XtoY = xx->B;
1549:     }
1550:     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1551:   } else {
1552:     MatAXPY_Basic(a,X,Y,str);
1553:   }
1554:   return(0);
1555: }

1557: /* -------------------------------------------------------------------*/
1558: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1559:        MatGetRow_MPIAIJ,
1560:        MatRestoreRow_MPIAIJ,
1561:        MatMult_MPIAIJ,
1562: /* 4*/ MatMultAdd_MPIAIJ,
1563:        MatMultTranspose_MPIAIJ,
1564:        MatMultTransposeAdd_MPIAIJ,
1565:        0,
1566:        0,
1567:        0,
1568: /*10*/ 0,
1569:        0,
1570:        0,
1571:        MatRelax_MPIAIJ,
1572:        MatTranspose_MPIAIJ,
1573: /*15*/ MatGetInfo_MPIAIJ,
1574:        MatEqual_MPIAIJ,
1575:        MatGetDiagonal_MPIAIJ,
1576:        MatDiagonalScale_MPIAIJ,
1577:        MatNorm_MPIAIJ,
1578: /*20*/ MatAssemblyBegin_MPIAIJ,
1579:        MatAssemblyEnd_MPIAIJ,
1580:        0,
1581:        MatSetOption_MPIAIJ,
1582:        MatZeroEntries_MPIAIJ,
1583: /*25*/ MatZeroRows_MPIAIJ,
1584: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1585:        MatLUFactorSymbolic_MPIAIJ_TFS,
1586: #else
1587:        0,
1588: #endif
1589:        0,
1590:        0,
1591:        0,
1592: /*30*/ MatSetUpPreallocation_MPIAIJ,
1593:        0,
1594:        0,
1595:        0,
1596:        0,
1597: /*35*/ MatDuplicate_MPIAIJ,
1598:        0,
1599:        0,
1600:        0,
1601:        0,
1602: /*40*/ MatAXPY_MPIAIJ,
1603:        MatGetSubMatrices_MPIAIJ,
1604:        MatIncreaseOverlap_MPIAIJ,
1605:        MatGetValues_MPIAIJ,
1606:        MatCopy_MPIAIJ,
1607: /*45*/ MatPrintHelp_MPIAIJ,
1608:        MatScale_MPIAIJ,
1609:        0,
1610:        0,
1611:        0,
1612: /*50*/ MatGetBlockSize_MPIAIJ,
1613:        0,
1614:        0,
1615:        0,
1616:        0,
1617: /*55*/ MatFDColoringCreate_MPIAIJ,
1618:        0,
1619:        MatSetUnfactored_MPIAIJ,
1620:        0,
1621:        0,
1622: /*60*/ MatGetSubMatrix_MPIAIJ,
1623:        MatDestroy_MPIAIJ,
1624:        MatView_MPIAIJ,
1625:        MatGetPetscMaps_Petsc,
1626:        0,
1627: /*65*/ 0,
1628:        0,
1629:        0,
1630:        0,
1631:        0,
1632: /*70*/ 0,
1633:        0,
1634:        MatSetColoring_MPIAIJ,
1635:        MatSetValuesAdic_MPIAIJ,
1636:        MatSetValuesAdifor_MPIAIJ,
1637: /*75*/ 0,
1638:        0,
1639:        0,
1640:        0,
1641:        0,
1642: /*80*/ 0,
1643:        0,
1644:        0,
1645:        0,
1646:        0,
1647: /*85*/ MatLoad_MPIAIJ};

1649: /* ----------------------------------------------------------------------------------------*/

1651: EXTERN_C_BEGIN
1654: int MatStoreValues_MPIAIJ(Mat mat)
1655: {
1656:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1657:   int        ierr;

1660:   MatStoreValues(aij->A);
1661:   MatStoreValues(aij->B);
1662:   return(0);
1663: }
1664: EXTERN_C_END

1666: EXTERN_C_BEGIN
1669: int MatRetrieveValues_MPIAIJ(Mat mat)
1670: {
1671:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1672:   int        ierr;

1675:   MatRetrieveValues(aij->A);
1676:   MatRetrieveValues(aij->B);
1677:   return(0);
1678: }
1679: EXTERN_C_END

1681:  #include petscpc.h
1682: EXTERN_C_BEGIN
1685: int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1686: {
1687:   Mat_MPIAIJ   *b;
1688:   int          ierr,i;

1691:   B->preallocated = PETSC_TRUE;
1692:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1693:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1694:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1695:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1696:   if (d_nnz) {
1697:     for (i=0; i<B->m; i++) {
1698:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1699:     }
1700:   }
1701:   if (o_nnz) {
1702:     for (i=0; i<B->m; i++) {
1703:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1704:     }
1705:   }
1706:   b = (Mat_MPIAIJ*)B->data;

1708:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
1709:   PetscLogObjectParent(B,b->A);
1710:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
1711:   PetscLogObjectParent(B,b->B);

1713:   return(0);
1714: }
1715: EXTERN_C_END

1717: /*MC
1718:    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.

1720:    Options Database Keys:
1721: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()

1723:   Level: beginner

1725: .seealso: MatCreateMPIAIJ
1726: M*/

1728: EXTERN_C_BEGIN
1731: int MatCreate_MPIAIJ(Mat B)
1732: {
1733:   Mat_MPIAIJ *b;
1734:   int        ierr,i,size;

1737:   MPI_Comm_size(B->comm,&size);

1739:   PetscNew(Mat_MPIAIJ,&b);
1740:   B->data         = (void*)b;
1741:   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1742:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1743:   B->factor       = 0;
1744:   B->assembled    = PETSC_FALSE;
1745:   B->mapping      = 0;

1747:   B->insertmode      = NOT_SET_VALUES;
1748:   b->size            = size;
1749:   MPI_Comm_rank(B->comm,&b->rank);

1751:   PetscSplitOwnership(B->comm,&B->m,&B->M);
1752:   PetscSplitOwnership(B->comm,&B->n,&B->N);

1754:   /* the information in the maps duplicates the information computed below, eventually 
1755:      we should remove the duplicate information that is not contained in the maps */
1756:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1757:   PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);

1759:   /* build local table of row and column ownerships */
1760:   PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1761:   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1762:   b->cowners = b->rowners + b->size + 2;
1763:   MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1764:   b->rowners[0] = 0;
1765:   for (i=2; i<=b->size; i++) {
1766:     b->rowners[i] += b->rowners[i-1];
1767:   }
1768:   b->rstart = b->rowners[b->rank];
1769:   b->rend   = b->rowners[b->rank+1];
1770:   MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1771:   b->cowners[0] = 0;
1772:   for (i=2; i<=b->size; i++) {
1773:     b->cowners[i] += b->cowners[i-1];
1774:   }
1775:   b->cstart = b->cowners[b->rank];
1776:   b->cend   = b->cowners[b->rank+1];

1778:   /* build cache for off array entries formed */
1779:   MatStashCreate_Private(B->comm,1,&B->stash);
1780:   b->donotstash  = PETSC_FALSE;
1781:   b->colmap      = 0;
1782:   b->garray      = 0;
1783:   b->roworiented = PETSC_TRUE;

1785:   /* stuff used for matrix vector multiply */
1786:   b->lvec      = PETSC_NULL;
1787:   b->Mvctx     = PETSC_NULL;

1789:   /* stuff for MatGetRow() */
1790:   b->rowindices   = 0;
1791:   b->rowvalues    = 0;
1792:   b->getrowactive = PETSC_FALSE;

1794:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1795:                                      "MatStoreValues_MPIAIJ",
1796:                                      MatStoreValues_MPIAIJ);
1797:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1798:                                      "MatRetrieveValues_MPIAIJ",
1799:                                      MatRetrieveValues_MPIAIJ);
1800:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1801:                                      "MatGetDiagonalBlock_MPIAIJ",
1802:                                      MatGetDiagonalBlock_MPIAIJ);
1803:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1804:                                      "MatIsSymmetric_MPIAIJ",
1805:                                      MatIsSymmetric_MPIAIJ);
1806:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1807:                                      "MatMPIAIJSetPreallocation_MPIAIJ",
1808:                                      MatMPIAIJSetPreallocation_MPIAIJ);
1809:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1810:                                      "MatDiagonalScaleLocal_MPIAIJ",
1811:                                      MatDiagonalScaleLocal_MPIAIJ);
1812:   return(0);
1813: }
1814: EXTERN_C_END

1816: /*MC
1817:    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.

1819:    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1820:    and MATMPIAIJ otherwise.

1822:    Options Database Keys:
1823: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()

1825:   Level: beginner

1827: .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1828: M*/

1830: EXTERN_C_BEGIN
1833: int MatCreate_AIJ(Mat A) {
1834:   int ierr,size;

1837:   PetscObjectChangeTypeName((PetscObject)A,MATAIJ);
1838:   MPI_Comm_size(A->comm,&size);
1839:   if (size == 1) {
1840:     MatSetType(A,MATSEQAIJ);
1841:   } else {
1842:     MatSetType(A,MATMPIAIJ);
1843:   }
1844:   return(0);
1845: }
1846: EXTERN_C_END

1850: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1851: {
1852:   Mat        mat;
1853:   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1854:   int        ierr;

1857:   *newmat       = 0;
1858:   MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1859:   MatSetType(mat,MATMPIAIJ);
1860:   a    = (Mat_MPIAIJ*)mat->data;
1861:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1862:   mat->factor       = matin->factor;
1863:   mat->assembled    = PETSC_TRUE;
1864:   mat->insertmode   = NOT_SET_VALUES;
1865:   mat->preallocated = PETSC_TRUE;

1867:   a->rstart       = oldmat->rstart;
1868:   a->rend         = oldmat->rend;
1869:   a->cstart       = oldmat->cstart;
1870:   a->cend         = oldmat->cend;
1871:   a->size         = oldmat->size;
1872:   a->rank         = oldmat->rank;
1873:   a->donotstash   = oldmat->donotstash;
1874:   a->roworiented  = oldmat->roworiented;
1875:   a->rowindices   = 0;
1876:   a->rowvalues    = 0;
1877:   a->getrowactive = PETSC_FALSE;

1879:   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1880:   MatStashCreate_Private(matin->comm,1,&mat->stash);
1881:   if (oldmat->colmap) {
1882: #if defined (PETSC_USE_CTABLE)
1883:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1884: #else
1885:     PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1886:     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1887:     PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1888: #endif
1889:   } else a->colmap = 0;
1890:   if (oldmat->garray) {
1891:     int len;
1892:     len  = oldmat->B->n;
1893:     PetscMalloc((len+1)*sizeof(int),&a->garray);
1894:     PetscLogObjectMemory(mat,len*sizeof(int));
1895:     if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1896:   } else a->garray = 0;
1897: 
1898:    VecDuplicate(oldmat->lvec,&a->lvec);
1899:   PetscLogObjectParent(mat,a->lvec);
1900:    VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1901:   PetscLogObjectParent(mat,a->Mvctx);
1902:    MatDuplicate(oldmat->A,cpvalues,&a->A);
1903:   PetscLogObjectParent(mat,a->A);
1904:    MatDuplicate(oldmat->B,cpvalues,&a->B);
1905:   PetscLogObjectParent(mat,a->B);
1906:   PetscFListDuplicate(matin->qlist,&mat->qlist);
1907:   *newmat = mat;
1908:   return(0);
1909: }

1911:  #include petscsys.h

1915: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1916: {
1917:   Mat          A;
1918:   PetscScalar  *vals,*svals;
1919:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1920:   MPI_Status   status;
1921:   int          i,nz,ierr,j,rstart,rend,fd;
1922:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1923:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1924:   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;

1927:   MPI_Comm_size(comm,&size);
1928:   MPI_Comm_rank(comm,&rank);
1929:   if (!rank) {
1930:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1931:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1932:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1933:     if (header[3] < 0) {
1934:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1935:     }
1936:   }

1938:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1939:   M = header[1]; N = header[2];
1940:   /* determine ownership of all rows */
1941:   m = M/size + ((M % size) > rank);
1942:   PetscMalloc((size+2)*sizeof(int),&rowners);
1943:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1944:   rowners[0] = 0;
1945:   for (i=2; i<=size; i++) {
1946:     rowners[i] += rowners[i-1];
1947:   }
1948:   rstart = rowners[rank];
1949:   rend   = rowners[rank+1];

1951:   /* distribute row lengths to all processors */
1952:   PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1953:   offlens = ourlens + (rend-rstart);
1954:   if (!rank) {
1955:     PetscMalloc(M*sizeof(int),&rowlengths);
1956:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1957:     PetscMalloc(size*sizeof(int),&sndcounts);
1958:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1959:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1960:     PetscFree(sndcounts);
1961:   } else {
1962:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1963:   }

1965:   if (!rank) {
1966:     /* calculate the number of nonzeros on each processor */
1967:     PetscMalloc(size*sizeof(int),&procsnz);
1968:     PetscMemzero(procsnz,size*sizeof(int));
1969:     for (i=0; i<size; i++) {
1970:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1971:         procsnz[i] += rowlengths[j];
1972:       }
1973:     }
1974:     PetscFree(rowlengths);

1976:     /* determine max buffer needed and allocate it */
1977:     maxnz = 0;
1978:     for (i=0; i<size; i++) {
1979:       maxnz = PetscMax(maxnz,procsnz[i]);
1980:     }
1981:     PetscMalloc(maxnz*sizeof(int),&cols);

1983:     /* read in my part of the matrix column indices  */
1984:     nz   = procsnz[0];
1985:     PetscMalloc(nz*sizeof(int),&mycols);
1986:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1988:     /* read in every one elses and ship off */
1989:     for (i=1; i<size; i++) {
1990:       nz   = procsnz[i];
1991:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1992:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1993:     }
1994:     PetscFree(cols);
1995:   } else {
1996:     /* determine buffer space needed for message */
1997:     nz = 0;
1998:     for (i=0; i<m; i++) {
1999:       nz += ourlens[i];
2000:     }
2001:     PetscMalloc((nz+1)*sizeof(int),&mycols);

2003:     /* receive message of column indices*/
2004:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
2005:     MPI_Get_count(&status,MPI_INT,&maxnz);
2006:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2007:   }

2009:   /* determine column ownership if matrix is not square */
2010:   if (N != M) {
2011:     n      = N/size + ((N % size) > rank);
2012:     MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
2013:     cstart = cend - n;
2014:   } else {
2015:     cstart = rstart;
2016:     cend   = rend;
2017:     n      = cend - cstart;
2018:   }

2020:   /* loop over local rows, determining number of off diagonal entries */
2021:   PetscMemzero(offlens,m*sizeof(int));
2022:   jj = 0;
2023:   for (i=0; i<m; i++) {
2024:     for (j=0; j<ourlens[i]; j++) {
2025:       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2026:       jj++;
2027:     }
2028:   }

2030:   /* create our matrix */
2031:   for (i=0; i<m; i++) {
2032:     ourlens[i] -= offlens[i];
2033:   }
2034:   MatCreate(comm,m,n,M,N,&A);
2035:   MatSetType(A,type);
2036:   MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);

2038:   MatSetOption(A,MAT_COLUMNS_SORTED);
2039:   for (i=0; i<m; i++) {
2040:     ourlens[i] += offlens[i];
2041:   }

2043:   if (!rank) {
2044:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

2046:     /* read in my part of the matrix numerical values  */
2047:     nz   = procsnz[0];
2048:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2049: 
2050:     /* insert into matrix */
2051:     jj      = rstart;
2052:     smycols = mycols;
2053:     svals   = vals;
2054:     for (i=0; i<m; i++) {
2055:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2056:       smycols += ourlens[i];
2057:       svals   += ourlens[i];
2058:       jj++;
2059:     }

2061:     /* read in other processors and ship out */
2062:     for (i=1; i<size; i++) {
2063:       nz   = procsnz[i];
2064:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2065:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2066:     }
2067:     PetscFree(procsnz);
2068:   } else {
2069:     /* receive numeric values */
2070:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

2072:     /* receive message of values*/
2073:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2074:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2075:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

2077:     /* insert into matrix */
2078:     jj      = rstart;
2079:     smycols = mycols;
2080:     svals   = vals;
2081:     for (i=0; i<m; i++) {
2082:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2083:       smycols += ourlens[i];
2084:       svals   += ourlens[i];
2085:       jj++;
2086:     }
2087:   }
2088:   PetscFree(ourlens);
2089:   PetscFree(vals);
2090:   PetscFree(mycols);
2091:   PetscFree(rowners);

2093:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2094:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2095:   *newmat = A;
2096:   return(0);
2097: }

2101: /*
2102:     Not great since it makes two copies of the submatrix, first an SeqAIJ 
2103:   in local and then by concatenating the local matrices the end result.
2104:   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2105: */
2106: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2107: {
2108:   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2109:   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2110:   Mat          *local,M,Mreuse;
2111:   PetscScalar  *vwork,*aa;
2112:   MPI_Comm     comm = mat->comm;
2113:   Mat_SeqAIJ   *aij;


2117:   MPI_Comm_rank(comm,&rank);
2118:   MPI_Comm_size(comm,&size);

2120:   if (call ==  MAT_REUSE_MATRIX) {
2121:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2122:     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2123:     local = &Mreuse;
2124:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2125:   } else {
2126:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2127:     Mreuse = *local;
2128:     PetscFree(local);
2129:   }

2131:   /* 
2132:       m - number of local rows
2133:       n - number of columns (same on all processors)
2134:       rstart - first row in new global matrix generated
2135:   */
2136:   MatGetSize(Mreuse,&m,&n);
2137:   if (call == MAT_INITIAL_MATRIX) {
2138:     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2139:     ii  = aij->i;
2140:     jj  = aij->j;

2142:     /*
2143:         Determine the number of non-zeros in the diagonal and off-diagonal 
2144:         portions of the matrix in order to do correct preallocation
2145:     */

2147:     /* first get start and end of "diagonal" columns */
2148:     if (csize == PETSC_DECIDE) {
2149:       ISGetSize(isrow,&mglobal);
2150:       if (mglobal == n) { /* square matrix */
2151:         nlocal = m;
2152:       } else {
2153:         nlocal = n/size + ((n % size) > rank);
2154:       }
2155:     } else {
2156:       nlocal = csize;
2157:     }
2158:     MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
2159:     rstart = rend - nlocal;
2160:     if (rank == size - 1 && rend != n) {
2161:       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2162:     }

2164:     /* next, compute all the lengths */
2165:     PetscMalloc((2*m+1)*sizeof(int),&dlens);
2166:     olens = dlens + m;
2167:     for (i=0; i<m; i++) {
2168:       jend = ii[i+1] - ii[i];
2169:       olen = 0;
2170:       dlen = 0;
2171:       for (j=0; j<jend; j++) {
2172:         if (*jj < rstart || *jj >= rend) olen++;
2173:         else dlen++;
2174:         jj++;
2175:       }
2176:       olens[i] = olen;
2177:       dlens[i] = dlen;
2178:     }
2179:     MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
2180:     PetscFree(dlens);
2181:   } else {
2182:     int ml,nl;

2184:     M = *newmat;
2185:     MatGetLocalSize(M,&ml,&nl);
2186:     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2187:     MatZeroEntries(M);
2188:     /*
2189:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2190:        rather than the slower MatSetValues().
2191:     */
2192:     M->was_assembled = PETSC_TRUE;
2193:     M->assembled     = PETSC_FALSE;
2194:   }
2195:   MatGetOwnershipRange(M,&rstart,&rend);
2196:   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2197:   ii  = aij->i;
2198:   jj  = aij->j;
2199:   aa  = aij->a;
2200:   for (i=0; i<m; i++) {
2201:     row   = rstart + i;
2202:     nz    = ii[i+1] - ii[i];
2203:     cwork = jj;     jj += nz;
2204:     vwork = aa;     aa += nz;
2205:     MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2206:   }

2208:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2209:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2210:   *newmat = M;

2212:   /* save submatrix used in processor for next request */
2213:   if (call ==  MAT_INITIAL_MATRIX) {
2214:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2215:     PetscObjectDereference((PetscObject)Mreuse);
2216:   }

2218:   return(0);
2219: }

2223: /*@C
2224:    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2225:    (the default parallel PETSc format).  For good matrix assembly performance
2226:    the user should preallocate the matrix storage by setting the parameters 
2227:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2228:    performance can be increased by more than a factor of 50.

2230:    Collective on MPI_Comm

2232:    Input Parameters:
2233: +  A - the matrix 
2234: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2235:            (same value is used for all local rows)
2236: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2237:            DIAGONAL portion of the local submatrix (possibly different for each row)
2238:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2239:            The size of this array is equal to the number of local rows, i.e 'm'. 
2240:            You must leave room for the diagonal entry even if it is zero.
2241: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2242:            submatrix (same value is used for all local rows).
2243: -  o_nnz - array containing the number of nonzeros in the various rows of the
2244:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2245:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2246:            structure. The size of this array is equal to the number 
2247:            of local rows, i.e 'm'. 

2249:    The AIJ format (also called the Yale sparse matrix format or
2250:    compressed row storage), is fully compatible with standard Fortran 77
2251:    storage.  That is, the stored row and column indices can begin at
2252:    either one (as in Fortran) or zero.  See the users manual for details.

2254:    The user MUST specify either the local or global matrix dimensions
2255:    (possibly both).

2257:    The parallel matrix is partitioned such that the first m0 rows belong to 
2258:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2259:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2261:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2262:    as the submatrix which is obtained by extraction the part corresponding 
2263:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2264:    first row that belongs to the processor, and r2 is the last row belonging 
2265:    to the this processor. This is a square mxm matrix. The remaining portion 
2266:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2268:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2270:    By default, this format uses inodes (identical nodes) when possible.
2271:    We search for consecutive rows with the same nonzero structure, thereby
2272:    reusing matrix information to achieve increased efficiency.

2274:    Options Database Keys:
2275: +  -mat_aij_no_inode  - Do not use inodes
2276: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2277: -  -mat_aij_oneindex - Internally use indexing starting at 1
2278:         rather than 0.  Note that when calling MatSetValues(),
2279:         the user still MUST index entries starting at 0!

2281:    Example usage:
2282:   
2283:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2284:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2285:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2286:    as follows:

2288: .vb
2289:             1  2  0  |  0  3  0  |  0  4
2290:     Proc0   0  5  6  |  7  0  0  |  8  0
2291:             9  0 10  | 11  0  0  | 12  0
2292:     -------------------------------------
2293:            13  0 14  | 15 16 17  |  0  0
2294:     Proc1   0 18  0  | 19 20 21  |  0  0 
2295:             0  0  0  | 22 23  0  | 24  0
2296:     -------------------------------------
2297:     Proc2  25 26 27  |  0  0 28  | 29  0
2298:            30  0  0  | 31 32 33  |  0 34
2299: .ve

2301:    This can be represented as a collection of submatrices as:

2303: .vb
2304:       A B C
2305:       D E F
2306:       G H I
2307: .ve

2309:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2310:    owned by proc1, G,H,I are owned by proc2.

2312:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2313:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2314:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2316:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2317:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2318:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2319:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2320:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2321:    matrix, ans [DF] as another SeqAIJ matrix.

2323:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2324:    allocated for every row of the local diagonal submatrix, and o_nz
2325:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2326:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2327:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2328:    In this case, the values of d_nz,o_nz are:
2329: .vb
2330:      proc0 : dnz = 2, o_nz = 2
2331:      proc1 : dnz = 3, o_nz = 2
2332:      proc2 : dnz = 1, o_nz = 4
2333: .ve
2334:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2335:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2336:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2337:    34 values.

2339:    When d_nnz, o_nnz parameters are specified, the storage is specified
2340:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2341:    In the above case the values for d_nnz,o_nnz are:
2342: .vb
2343:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2344:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2345:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2346: .ve
2347:    Here the space allocated is sum of all the above values i.e 34, and
2348:    hence pre-allocation is perfect.

2350:    Level: intermediate

2352: .keywords: matrix, aij, compressed row, sparse, parallel

2354: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2355: @*/
2356: int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2357: {
2358:   int ierr,(*f)(Mat,int,const int[],int,const int[]);

2361:   PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
2362:   if (f) {
2363:     (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
2364:   }
2365:   return(0);
2366: }

2370: /*@C
2371:    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2372:    (the default parallel PETSc format).  For good matrix assembly performance
2373:    the user should preallocate the matrix storage by setting the parameters 
2374:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2375:    performance can be increased by more than a factor of 50.

2377:    Collective on MPI_Comm

2379:    Input Parameters:
2380: +  comm - MPI communicator
2381: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2382:            This value should be the same as the local size used in creating the 
2383:            y vector for the matrix-vector product y = Ax.
2384: .  n - This value should be the same as the local size used in creating the 
2385:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2386:        calculated if N is given) For square matrices n is almost always m.
2387: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2388: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2389: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2390:            (same value is used for all local rows)
2391: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2392:            DIAGONAL portion of the local submatrix (possibly different for each row)
2393:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2394:            The size of this array is equal to the number of local rows, i.e 'm'. 
2395:            You must leave room for the diagonal entry even if it is zero.
2396: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2397:            submatrix (same value is used for all local rows).
2398: -  o_nnz - array containing the number of nonzeros in the various rows of the
2399:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2400:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2401:            structure. The size of this array is equal to the number 
2402:            of local rows, i.e 'm'. 

2404:    Output Parameter:
2405: .  A - the matrix 

2407:    Notes:
2408:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2409:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2410:    storage requirements for this matrix.

2412:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one 
2413:    processor than it must be used on all processors that share the object for 
2414:    that argument.

2416:    The AIJ format (also called the Yale sparse matrix format or
2417:    compressed row storage), is fully compatible with standard Fortran 77
2418:    storage.  That is, the stored row and column indices can begin at
2419:    either one (as in Fortran) or zero.  See the users manual for details.

2421:    The user MUST specify either the local or global matrix dimensions
2422:    (possibly both).

2424:    The parallel matrix is partitioned such that the first m0 rows belong to 
2425:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2426:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2428:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2429:    as the submatrix which is obtained by extraction the part corresponding 
2430:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2431:    first row that belongs to the processor, and r2 is the last row belonging 
2432:    to the this processor. This is a square mxm matrix. The remaining portion 
2433:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2435:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2437:    When calling this routine with a single process communicator, a matrix of
2438:    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2439:    type of communicator, use the construction mechanism:
2440:      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);

2442:    By default, this format uses inodes (identical nodes) when possible.
2443:    We search for consecutive rows with the same nonzero structure, thereby
2444:    reusing matrix information to achieve increased efficiency.

2446:    Options Database Keys:
2447: +  -mat_aij_no_inode  - Do not use inodes
2448: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2449: -  -mat_aij_oneindex - Internally use indexing starting at 1
2450:         rather than 0.  Note that when calling MatSetValues(),
2451:         the user still MUST index entries starting at 0!


2454:    Example usage:
2455:   
2456:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2457:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2458:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2459:    as follows:

2461: .vb
2462:             1  2  0  |  0  3  0  |  0  4
2463:     Proc0   0  5  6  |  7  0  0  |  8  0
2464:             9  0 10  | 11  0  0  | 12  0
2465:     -------------------------------------
2466:            13  0 14  | 15 16 17  |  0  0
2467:     Proc1   0 18  0  | 19 20 21  |  0  0 
2468:             0  0  0  | 22 23  0  | 24  0
2469:     -------------------------------------
2470:     Proc2  25 26 27  |  0  0 28  | 29  0
2471:            30  0  0  | 31 32 33  |  0 34
2472: .ve

2474:    This can be represented as a collection of submatrices as:

2476: .vb
2477:       A B C
2478:       D E F
2479:       G H I
2480: .ve

2482:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2483:    owned by proc1, G,H,I are owned by proc2.

2485:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2486:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2487:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2489:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2490:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2491:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2492:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2493:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2494:    matrix, ans [DF] as another SeqAIJ matrix.

2496:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2497:    allocated for every row of the local diagonal submatrix, and o_nz
2498:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2499:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2500:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2501:    In this case, the values of d_nz,o_nz are:
2502: .vb
2503:      proc0 : dnz = 2, o_nz = 2
2504:      proc1 : dnz = 3, o_nz = 2
2505:      proc2 : dnz = 1, o_nz = 4
2506: .ve
2507:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2508:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2509:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2510:    34 values.

2512:    When d_nnz, o_nnz parameters are specified, the storage is specified
2513:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2514:    In the above case the values for d_nnz,o_nnz are:
2515: .vb
2516:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2517:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2518:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2519: .ve
2520:    Here the space allocated is sum of all the above values i.e 34, and
2521:    hence pre-allocation is perfect.

2523:    Level: intermediate

2525: .keywords: matrix, aij, compressed row, sparse, parallel

2527: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2528: @*/
2529: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2530: {
2531:   int ierr,size;

2534:   MatCreate(comm,m,n,M,N,A);
2535:   MPI_Comm_size(comm,&size);
2536:   if (size > 1) {
2537:     MatSetType(*A,MATMPIAIJ);
2538:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2539:   } else {
2540:     MatSetType(*A,MATSEQAIJ);
2541:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2542:   }
2543:   return(0);
2544: }

2548: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2549: {
2550:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2552:   *Ad     = a->A;
2553:   *Ao     = a->B;
2554:   *colmap = a->garray;
2555:   return(0);
2556: }

2560: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2561: {
2562:   int        ierr,i;
2563:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

2566:   if (coloring->ctype == IS_COLORING_LOCAL) {
2567:     ISColoringValue *allcolors,*colors;
2568:     ISColoring      ocoloring;

2570:     /* set coloring for diagonal portion */
2571:     MatSetColoring_SeqAIJ(a->A,coloring);

2573:     /* set coloring for off-diagonal portion */
2574:     ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2575:     PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2576:     for (i=0; i<a->B->n; i++) {
2577:       colors[i] = allcolors[a->garray[i]];
2578:     }
2579:     PetscFree(allcolors);
2580:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2581:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2582:     ISColoringDestroy(ocoloring);
2583:   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2584:     ISColoringValue *colors;
2585:     int             *larray;
2586:     ISColoring      ocoloring;

2588:     /* set coloring for diagonal portion */
2589:     PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2590:     for (i=0; i<a->A->n; i++) {
2591:       larray[i] = i + a->cstart;
2592:     }
2593:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2594:     PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);
2595:     for (i=0; i<a->A->n; i++) {
2596:       colors[i] = coloring->colors[larray[i]];
2597:     }
2598:     PetscFree(larray);
2599:     ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2600:     MatSetColoring_SeqAIJ(a->A,ocoloring);
2601:     ISColoringDestroy(ocoloring);

2603:     /* set coloring for off-diagonal portion */
2604:     PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2605:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2606:     PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2607:     for (i=0; i<a->B->n; i++) {
2608:       colors[i] = coloring->colors[larray[i]];
2609:     }
2610:     PetscFree(larray);
2611:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2612:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2613:     ISColoringDestroy(ocoloring);
2614:   } else {
2615:     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2616:   }

2618:   return(0);
2619: }

2623: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2624: {
2625:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2626:   int        ierr;

2629:   MatSetValuesAdic_SeqAIJ(a->A,advalues);
2630:   MatSetValuesAdic_SeqAIJ(a->B,advalues);
2631:   return(0);
2632: }

2636: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2637: {
2638:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2639:   int        ierr;

2642:   MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2643:   MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2644:   return(0);
2645: }

2649: /*@C
2650:       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2651:                  matrices from each processor

2653:     Collective on MPI_Comm

2655:    Input Parameters:
2656: +    comm - the communicators the parallel matrix will live on
2657: -    inmat - the input sequential matrices

2659:    Output Parameter:
2660: .    outmat - the parallel matrix generated

2662:     Level: advanced

2664:    Notes: The number of columns of the matrix in EACH of the seperate files
2665:       MUST be the same.

2667: @*/
2668: int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2669: {
2670:   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2671:   PetscScalar *values;
2672:   PetscMap    columnmap,rowmap;

2675: 
2676:   MatGetSize(inmat,&m,&n);

2678:   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2679:   PetscMapCreate(comm,&columnmap);
2680:   PetscMapSetSize(columnmap,n);
2681:   PetscMapSetType(columnmap,MAP_MPI);
2682:   PetscMapGetLocalSize(columnmap,&n);
2683:   PetscMapDestroy(columnmap);

2685:   PetscMapCreate(comm,&rowmap);
2686:   PetscMapSetLocalSize(rowmap,m);
2687:   PetscMapSetType(rowmap,MAP_MPI);
2688:   PetscMapGetLocalRange(rowmap,&rstart,0);
2689:   PetscMapDestroy(rowmap);

2691:   MatPreallocateInitialize(comm,m,n,dnz,onz);
2692:   for (i=0;i<m;i++) {
2693:     MatGetRow(inmat,i,&nnz,&indx,&values);
2694:     MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2695:     MatRestoreRow(inmat,i,&nnz,&indx,&values);
2696:   }
2697:   MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);
2698:   MatPreallocateFinalize(dnz,onz);

2700:   for (i=0;i<m;i++) {
2701:     MatGetRow(inmat,i,&nnz,&indx,&values);
2702:     I    = i + rstart;
2703:     MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2704:     MatRestoreRow(inmat,i,&nnz,&indx,&values);
2705:   }
2706:   MatDestroy(inmat);
2707:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2708:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);

2710:   return(0);
2711: }

2715: int MatFileSplit(Mat A,char *outfile)
2716: {
2717:   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2718:   PetscViewer out;
2719:   char        *name;
2720:   Mat         B;
2721:   PetscScalar *values;

2724: 
2725:   MatGetLocalSize(A,&m,0);
2726:   MatGetSize(A,0,&N);
2727:   MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);
2728:   MatGetOwnershipRange(A,&rstart,0);
2729:   for (i=0;i<m;i++) {
2730:     MatGetRow(A,i+rstart,&nnz,&indx,&values);
2731:     MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2732:     MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2733:   }
2734:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2735:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2737:   MPI_Comm_rank(A->comm,&rank);
2738:   PetscStrlen(outfile,&len);
2739:   PetscMalloc((len+5)*sizeof(char),&name);
2740:   sprintf(name,"%s.%d",outfile,rank);
2741:   PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);
2742:   PetscFree(name);
2743:   MatView(B,out);
2744:   PetscViewerDestroy(out);
2745:   MatDestroy(B);
2746:   return(0);
2747: }