Actual source code: mpiaij.c

  1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/

 3:  #include src/mat/impls/aij/mpi/mpiaij.h
 4:  #include src/vec/vecimpl.h
 5:  #include src/inline/spops.h

  7: EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
  8: EXTERN int DisAssemble_MPIAIJ(Mat);
  9: EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
 10: EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
 11: EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
 12: EXTERN int MatPrintHelp_SeqAIJ(Mat);
 13: EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);

 15: /* 
 16:   Local utility routine that creates a mapping from the global column 
 17: number to the local number in the off-diagonal part of the local 
 18: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at 
 19: a slightly higher hash table cost; without it it is not scalable (each processor
 20: has an order N integer array but is fast to acess.
 21: */
 22: int CreateColmap_MPIAIJ_Private(Mat mat)
 23: {
 24:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
 25:   int        n = aij->B->n,i,ierr;

 28: #if defined (PETSC_USE_CTABLE)
 29:   PetscTableCreate(n,&aij->colmap);
 30:   for (i=0; i<n; i++){
 31:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
 32:   }
 33: #else
 34:   PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
 35:   PetscLogObjectMemory(mat,mat->N*sizeof(int));
 36:   PetscMemzero(aij->colmap,mat->N*sizeof(int));
 37:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
 38: #endif
 39:   return(0);
 40: }

 42: #define CHUNKSIZE   15
 43: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) 
 44: { 
 45:  
 46:     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; 
 47:     rmax = aimax[row]; nrow = ailen[row];  
 48:     col1 = col - shift; 
 49:      
 50:     low = 0; high = nrow; 
 51:     while (high-low > 5) { 
 52:       t = (low+high)/2; 
 53:       if (rp[t] > col) high = t; 
 54:       else             low  = t; 
 55:     } 
 56:       for (_i=low; _i<high; _i++) { 
 57:         if (rp[_i] > col1) break; 
 58:         if (rp[_i] == col1) { 
 59:           if (addv == ADD_VALUES) ap[_i] += value;   
 60:           else                  ap[_i] = value; 
 61:           goto a_noinsert; 
 62:         } 
 63:       }  
 64:       if (nonew == 1) goto a_noinsert; 
 65:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
 66:       if (nrow >= rmax) { 
 67:         /* there is no extra room in row, therefore enlarge */ 
 68:         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; 
 69:         PetscScalar *new_a; 
 70:  
 71:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
 72:  
 73:         /* malloc new storage space */ 
 74:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); 
 75:         ierr    = PetscMalloc(len,&new_a); 
 76:         new_j   = (int*)(new_a + new_nz); 
 77:         new_i   = new_j + new_nz; 
 78:  
 79:         /* copy over old data into new slots */ 
 80:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 
 81:         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 
 82:         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); 
 83:         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); 
 84:         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, 
 85:                                                            len*sizeof(int)); 
 86:         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); 
 87:         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, 
 88:                                                            len*sizeof(PetscScalar));  
 89:         /* free up old matrix storage */ 
 90:  
 91:         PetscFree(a->a);  
 92:         if (!a->singlemalloc) { 
 93:            PetscFree(a->i); 
 94:            PetscFree(a->j); 
 95:         } 
 96:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  
 97:         a->singlemalloc = PETSC_TRUE; 
 98:  
 99:         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; 
100:         rmax = aimax[row] = aimax[row] + CHUNKSIZE; 
101:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); 
102:         a->maxnz += CHUNKSIZE; 
103:         a->reallocs++; 
104:       } 
105:       N = nrow++ - 1; a->nz++; 
106:       /* shift up all the later entries in this row */ 
107:       for (ii=N; ii>=_i; ii--) { 
108:         rp[ii+1] = rp[ii]; 
109:         ap[ii+1] = ap[ii]; 
110:       } 
111:       rp[_i] = col1;  
112:       ap[_i] = value;  
113:       a_noinsert: ; 
114:       ailen[row] = nrow; 
115: } 

117: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) 
118: { 
119:  
120:     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; 
121:     rmax = bimax[row]; nrow = bilen[row];  
122:     col1 = col - shift; 
123:      
124:     low = 0; high = nrow; 
125:     while (high-low > 5) { 
126:       t = (low+high)/2; 
127:       if (rp[t] > col) high = t; 
128:       else             low  = t; 
129:     } 
130:        for (_i=low; _i<high; _i++) { 
131:         if (rp[_i] > col1) break; 
132:         if (rp[_i] == col1) { 
133:           if (addv == ADD_VALUES) ap[_i] += value;   
134:           else                  ap[_i] = value; 
135:           goto b_noinsert; 
136:         } 
137:       }  
138:       if (nonew == 1) goto b_noinsert; 
139:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
140:       if (nrow >= rmax) { 
141:         /* there is no extra room in row, therefore enlarge */ 
142:         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; 
143:         PetscScalar *new_a; 
144:  
145:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
146:  
147:         /* malloc new storage space */ 
148:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); 
149:         ierr    = PetscMalloc(len,&new_a); 
150:         new_j   = (int*)(new_a + new_nz); 
151:         new_i   = new_j + new_nz; 
152:  
153:         /* copy over old data into new slots */ 
154:         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} 
155:         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} 
156:         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); 
157:         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); 
158:         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, 
159:                                                            len*sizeof(int)); 
160:         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); 
161:         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, 
162:                                                            len*sizeof(PetscScalar));  
163:         /* free up old matrix storage */ 
164:  
165:         PetscFree(b->a);  
166:         if (!b->singlemalloc) { 
167:           PetscFree(b->i); 
168:           PetscFree(b->j); 
169:         } 
170:         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  
171:         b->singlemalloc = PETSC_TRUE; 
172:  
173:         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; 
174:         rmax = bimax[row] = bimax[row] + CHUNKSIZE; 
175:         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); 
176:         b->maxnz += CHUNKSIZE; 
177:         b->reallocs++; 
178:       } 
179:       N = nrow++ - 1; b->nz++; 
180:       /* shift up all the later entries in this row */ 
181:       for (ii=N; ii>=_i; ii--) { 
182:         rp[ii+1] = rp[ii]; 
183:         ap[ii+1] = ap[ii]; 
184:       } 
185:       rp[_i] = col1;  
186:       ap[_i] = value;  
187:       b_noinsert: ; 
188:       bilen[row] = nrow; 
189: }

191: int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
192: {
193:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
194:   PetscScalar  value;
195:   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
196:   int          cstart = aij->cstart,cend = aij->cend,row,col;
197:   PetscTruth   roworiented = aij->roworiented;

199:   /* Some Variables required in the macro */
200:   Mat          A = aij->A;
201:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
202:   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
203:   PetscScalar  *aa = a->a;
204:   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
205:   Mat          B = aij->B;
206:   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
207:   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
208:   PetscScalar  *ba = b->a;

210:   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
211:   int          nonew = a->nonew,shift = a->indexshift;
212:   PetscScalar  *ap;

215:   for (i=0; i<m; i++) {
216:     if (im[i] < 0) continue;
217: #if defined(PETSC_USE_BOPT_g)
218:     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
219: #endif
220:     if (im[i] >= rstart && im[i] < rend) {
221:       row = im[i] - rstart;
222:       for (j=0; j<n; j++) {
223:         if (in[j] >= cstart && in[j] < cend){
224:           col = in[j] - cstart;
225:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226:           if (ignorezeroentries && value == 0.0) continue;
227:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
228:           /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
229:         } else if (in[j] < 0) continue;
230: #if defined(PETSC_USE_BOPT_g)
231:         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
232: #endif
233:         else {
234:           if (mat->was_assembled) {
235:             if (!aij->colmap) {
236:               CreateColmap_MPIAIJ_Private(mat);
237:             }
238: #if defined (PETSC_USE_CTABLE)
239:             PetscTableFind(aij->colmap,in[j]+1,&col);
240:             col--;
241: #else
242:             col = aij->colmap[in[j]] - 1;
243: #endif
244:             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245:               DisAssemble_MPIAIJ(mat);
246:               col =  in[j];
247:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248:               B = aij->B;
249:               b = (Mat_SeqAIJ*)B->data;
250:               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251:               ba = b->a;
252:             }
253:           } else col = in[j];
254:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
255:           if (ignorezeroentries && value == 0.0) continue;
256:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
257:           /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
258:         }
259:       }
260:     } else {
261:       if (!aij->donotstash) {
262:         if (roworiented) {
263:           if (ignorezeroentries && v[i*n] == 0.0) continue;
264:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
265:         } else {
266:           if (ignorezeroentries && v[i] == 0.0) continue;
267:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
268:         }
269:       }
270:     }
271:   }
272:   return(0);
273: }

275: int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
276: {
277:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
278:   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
279:   int        cstart = aij->cstart,cend = aij->cend,row,col;

282:   for (i=0; i<m; i++) {
283:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
284:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
285:     if (idxm[i] >= rstart && idxm[i] < rend) {
286:       row = idxm[i] - rstart;
287:       for (j=0; j<n; j++) {
288:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
289:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
290:         if (idxn[j] >= cstart && idxn[j] < cend){
291:           col = idxn[j] - cstart;
292:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
293:         } else {
294:           if (!aij->colmap) {
295:             CreateColmap_MPIAIJ_Private(mat);
296:           }
297: #if defined (PETSC_USE_CTABLE)
298:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
299:           col --;
300: #else
301:           col = aij->colmap[idxn[j]] - 1;
302: #endif
303:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
304:           else {
305:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
306:           }
307:         }
308:       }
309:     } else {
310:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
311:     }
312:   }
313:   return(0);
314: }

316: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317: {
318:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
319:   int         ierr,nstash,reallocs;
320:   InsertMode  addv;

323:   if (aij->donotstash) {
324:     return(0);
325:   }

327:   /* make sure all processors are either in INSERTMODE or ADDMODE */
328:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
329:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
330:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331:   }
332:   mat->insertmode = addv; /* in case this processor had no cache */

334:   MatStashScatterBegin_Private(&mat->stash,aij->rowners);
335:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
336:   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
337:   return(0);
338: }


341: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
342: {
343:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
344:   int         i,j,rstart,ncols,n,ierr,flg;
345:   int         *row,*col,other_disassembled;
346:   PetscScalar *val;
347:   InsertMode  addv = mat->insertmode;
348: #if defined(PETSC_HAVE_SUPERLUDIST) 
349:   PetscTruth  flag;
350: #endif

353:   if (!aij->donotstash) {
354:     while (1) {
355:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
356:       if (!flg) break;

358:       for (i=0; i<n;) {
359:         /* Now identify the consecutive vals belonging to the same row */
360:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361:         if (j < n) ncols = j-i;
362:         else       ncols = n-i;
363:         /* Now assemble all these values with a single function call */
364:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
365:         i = j;
366:       }
367:     }
368:     MatStashScatterEnd_Private(&mat->stash);
369:   }
370: 
371:   MatAssemblyBegin(aij->A,mode);
372:   MatAssemblyEnd(aij->A,mode);

374:   /* determine if any processor has disassembled, if so we must 
375:      also disassemble ourselfs, in order that we may reassemble. */
376:   /*
377:      if nonzero structure of submatrix B cannot change then we know that
378:      no processor disassembled thus we can skip this stuff
379:   */
380:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
381:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
382:     if (mat->was_assembled && !other_disassembled) {
383:       DisAssemble_MPIAIJ(mat);
384:     }
385:   }

387:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388:     MatSetUpMultiply_MPIAIJ(mat);
389:   }
390:   MatAssemblyBegin(aij->B,mode);
391:   MatAssemblyEnd(aij->B,mode);

393:   if (aij->rowvalues) {
394:     PetscFree(aij->rowvalues);
395:     aij->rowvalues = 0;
396:   }
397: #if defined(PETSC_HAVE_SUPERLUDIST) 
398:   PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);
399:   if (flag) { MatUseSuperLU_DIST_MPIAIJ(mat); }
400: #endif 
401:   return(0);
402: }

404: int MatZeroEntries_MPIAIJ(Mat A)
405: {
406:   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
407:   int        ierr;

410:   MatZeroEntries(l->A);
411:   MatZeroEntries(l->B);
412:   return(0);
413: }

415: int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
416: {
417:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
418:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
419:   int            *procs,*nprocs,j,idx,nsends,*work,row;
420:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
421:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
422:   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
423:   MPI_Comm       comm = A->comm;
424:   MPI_Request    *send_waits,*recv_waits;
425:   MPI_Status     recv_status,*send_status;
426:   IS             istmp;
427:   PetscTruth     found;

430:   ISGetLocalSize(is,&N);
431:   ISGetIndices(is,&rows);

433:   /*  first count number of contributors to each processor */
434:   PetscMalloc(2*size*sizeof(int),&nprocs);
435:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
436:   procs  = nprocs + size;
437:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
438:   for (i=0; i<N; i++) {
439:     idx = rows[i];
440:     found = PETSC_FALSE;
441:     for (j=0; j<size; j++) {
442:       if (idx >= owners[j] && idx < owners[j+1]) {
443:         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
444:       }
445:     }
446:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
447:   }
448:   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}

450:   /* inform other processors of number of messages and max length*/
451:   PetscMalloc(2*size*sizeof(int),&work);
452:   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
453:   nrecvs = work[size+rank];
454:   nmax   = work[rank];
455:   ierr   = PetscFree(work);

457:   /* post receives:   */
458:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
459:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
460:   for (i=0; i<nrecvs; i++) {
461:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
462:   }

464:   /* do sends:
465:       1) starts[i] gives the starting index in svalues for stuff going to 
466:          the ith processor
467:   */
468:   PetscMalloc((N+1)*sizeof(int),&svalues);
469:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
470:   PetscMalloc((size+1)*sizeof(int),&starts);
471:   starts[0] = 0;
472:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
473:   for (i=0; i<N; i++) {
474:     svalues[starts[owner[i]]++] = rows[i];
475:   }
476:   ISRestoreIndices(is,&rows);

478:   starts[0] = 0;
479:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
480:   count = 0;
481:   for (i=0; i<size; i++) {
482:     if (procs[i]) {
483:       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
484:     }
485:   }
486:   PetscFree(starts);

488:   base = owners[rank];

490:   /*  wait on receives */
491:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
492:   source = lens + nrecvs;
493:   count  = nrecvs; slen = 0;
494:   while (count) {
495:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
496:     /* unpack receives into our local space */
497:     MPI_Get_count(&recv_status,MPI_INT,&n);
498:     source[imdex]  = recv_status.MPI_SOURCE;
499:     lens[imdex]    = n;
500:     slen          += n;
501:     count--;
502:   }
503:   PetscFree(recv_waits);
504: 
505:   /* move the data into the send scatter */
506:   PetscMalloc((slen+1)*sizeof(int),&lrows);
507:   count = 0;
508:   for (i=0; i<nrecvs; i++) {
509:     values = rvalues + i*nmax;
510:     for (j=0; j<lens[i]; j++) {
511:       lrows[count++] = values[j] - base;
512:     }
513:   }
514:   PetscFree(rvalues);
515:   PetscFree(lens);
516:   PetscFree(owner);
517:   PetscFree(nprocs);
518: 
519:   /* actually zap the local rows */
520:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
521:   PetscLogObjectParent(A,istmp);

523:   /*
524:         Zero the required rows. If the "diagonal block" of the matrix
525:      is square and the user wishes to set the diagonal we use seperate
526:      code so that MatSetValues() is not called for each diagonal allocating
527:      new memory, thus calling lots of mallocs and slowing things down.

529:        Contributed by: Mathew Knepley
530:   */
531:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532:   MatZeroRows(l->B,istmp,0);
533:   if (diag && (l->A->M == l->A->N)) {
534:     ierr      = MatZeroRows(l->A,istmp,diag);
535:   } else if (diag) {
536:     MatZeroRows(l->A,istmp,0);
537:     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat optionsn
539: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540:     }
541:     for (i = 0; i < slen; i++) {
542:       row  = lrows[i] + rstart;
543:       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
544:     }
545:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
546:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
547:   } else {
548:     MatZeroRows(l->A,istmp,0);
549:   }
550:   ISDestroy(istmp);
551:   PetscFree(lrows);

553:   /* wait on sends */
554:   if (nsends) {
555:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
556:     MPI_Waitall(nsends,send_waits,send_status);
557:     PetscFree(send_status);
558:   }
559:   PetscFree(send_waits);
560:   PetscFree(svalues);

562:   return(0);
563: }

565: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
566: {
567:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
568:   int        ierr,nt;

571:   VecGetLocalSize(xx,&nt);
572:   if (nt != A->n) {
573:     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
574:   }
575:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
576:   (*a->A->ops->mult)(a->A,xx,yy);
577:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
578:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
579:   return(0);
580: }

582: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
583: {
584:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
585:   int        ierr;

588:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
589:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
590:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
591:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
592:   return(0);
593: }

595: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
596: {
597:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
598:   int        ierr;

601:   /* do nondiagonal part */
602:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
603:   /* send it on its way */
604:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
605:   /* do local part */
606:   (*a->A->ops->multtranspose)(a->A,xx,yy);
607:   /* receive remote parts: note this assumes the values are not actually */
608:   /* inserted in yy until the next line, which is true for my implementation*/
609:   /* but is not perhaps always true. */
610:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
611:   return(0);
612: }

614: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
615: {
616:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617:   int        ierr;

620:   /* do nondiagonal part */
621:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
622:   /* send it on its way */
623:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
624:   /* do local part */
625:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
626:   /* receive remote parts: note this assumes the values are not actually */
627:   /* inserted in yy until the next line, which is true for my implementation*/
628:   /* but is not perhaps always true. */
629:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
630:   return(0);
631: }

633: /*
634:   This only works correctly for square matrices where the subblock A->A is the 
635:    diagonal block
636: */
637: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
638: {
639:   int        ierr;
640:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

643:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
644:   if (a->rstart != a->cstart || a->rend != a->cend) {
645:     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
646:   }
647:   MatGetDiagonal(a->A,v);
648:   return(0);
649: }

651: int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
652: {
653:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
654:   int        ierr;

657:   MatScale(aa,a->A);
658:   MatScale(aa,a->B);
659:   return(0);
660: }

662: int MatDestroy_MPIAIJ(Mat mat)
663: {
664:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
665:   int        ierr;

668: #if defined(PETSC_USE_LOG)
669:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
670: #endif
671:   MatStashDestroy_Private(&mat->stash);
672:   PetscFree(aij->rowners);
673:   MatDestroy(aij->A);
674:   MatDestroy(aij->B);
675: #if defined (PETSC_USE_CTABLE)
676:   if (aij->colmap) {PetscTableDelete(aij->colmap);}
677: #else
678:   if (aij->colmap) {PetscFree(aij->colmap);}
679: #endif
680:   if (aij->garray) {PetscFree(aij->garray);}
681:   if (aij->lvec)   {VecDestroy(aij->lvec);}
682:   if (aij->Mvctx)  {VecScatterDestroy(aij->Mvctx);}
683:   if (aij->rowvalues) {PetscFree(aij->rowvalues);}
684:   PetscFree(aij);
685:   return(0);
686: }

688: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
689: {
690:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
691:   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
692:   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
693:   PetscTruth        isdraw,isascii,flg;
694:   PetscViewer       sviewer;
695:   PetscViewerFormat format;

698:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
699:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
700:   if (isascii) {
701:     PetscViewerGetFormat(viewer,&format);
702:     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
703:       MatInfo info;
704:       MPI_Comm_rank(mat->comm,&rank);
705:       MatGetInfo(mat,MAT_LOCAL,&info);
706:       PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
707:       if (flg) {
708:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routinesn",
709:                                               rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
710:       } else {
711:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routinesn",
712:                     rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
713:       }
714:       MatGetInfo(aij->A,MAT_LOCAL,&info);
715:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used);
716:       MatGetInfo(aij->B,MAT_LOCAL,&info);
717:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used);
718:       PetscViewerFlush(viewer);
719:       VecScatterView(aij->Mvctx,viewer);
720:       return(0);
721:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
722:       return(0);
723:     }
724:   } else if (isdraw) {
725:     PetscDraw       draw;
726:     PetscTruth isnull;
727:     PetscViewerDrawGetDraw(viewer,0,&draw);
728:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
729:   }

731:   if (size == 1) {
732:     PetscObjectSetName((PetscObject)aij->A,mat->name);
733:     MatView(aij->A,viewer);
734:   } else {
735:     /* assemble the entire matrix onto first processor. */
736:     Mat         A;
737:     Mat_SeqAIJ *Aloc;
738:     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
739:     PetscScalar *a;

741:     if (!rank) {
742:       MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
743:     } else {
744:       MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
745:     }
746:     PetscLogObjectParent(mat,A);

748:     /* copy over the A part */
749:     Aloc = (Mat_SeqAIJ*)aij->A->data;
750:     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
751:     row = aij->rstart;
752:     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
753:     for (i=0; i<m; i++) {
754:       MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
755:       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
756:     }
757:     aj = Aloc->j;
758:     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}

760:     /* copy over the B part */
761:     Aloc = (Mat_SeqAIJ*)aij->B->data;
762:     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
763:     row  = aij->rstart;
764:     PetscMalloc((ai[m]+1)*sizeof(int),&cols);
765:     ct   = cols;
766:     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
767:     for (i=0; i<m; i++) {
768:       MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
769:       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
770:     }
771:     PetscFree(ct);
772:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
773:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
774:     /* 
775:        Everyone has to call to draw the matrix since the graphics waits are
776:        synchronized across all processors that share the PetscDraw object
777:     */
778:     PetscViewerGetSingleton(viewer,&sviewer);
779:     if (!rank) {
780:       PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
781:       MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
782:     }
783:     PetscViewerRestoreSingleton(viewer,&sviewer);
784:     MatDestroy(A);
785:   }
786:   return(0);
787: }

789: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
790: {
791:   int        ierr;
792:   PetscTruth isascii,isdraw,issocket,isbinary;
793: 
795:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
796:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
797:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
798:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
799:   if (isascii || isdraw || isbinary || issocket) {
800:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
801:   } else {
802:     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
803:   }
804:   return(0);
805: }



809: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
810: {
811:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
812:   int          ierr;
813:   Vec          bb1;
814:   PetscScalar  mone=-1.0;

817:   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);

819:   VecDuplicate(bb,&bb1);

821:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
822:     if (flag & SOR_ZERO_INITIAL_GUESS) {
823:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
824:       its--;
825:     }
826: 
827:     while (its--) {
828:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
829:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

831:       /* update rhs: bb1 = bb - B*x */
832:       VecScale(&mone,mat->lvec);
833:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

835:       /* local sweep */
836:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
837: 
838:     }
839:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
840:     if (flag & SOR_ZERO_INITIAL_GUESS) {
841:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
842:       its--;
843:     }
844:     while (its--) {
845:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
846:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

848:       /* update rhs: bb1 = bb - B*x */
849:       VecScale(&mone,mat->lvec);
850:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

852:       /* local sweep */
853:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
854: 
855:     }
856:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
857:     if (flag & SOR_ZERO_INITIAL_GUESS) {
858:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
859:       its--;
860:     }
861:     while (its--) {
862:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
863:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

865:       /* update rhs: bb1 = bb - B*x */
866:       VecScale(&mone,mat->lvec);
867:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

869:       /* local sweep */
870:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
871: 
872:     }
873:   } else {
874:     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
875:   }

877:   VecDestroy(bb1);
878:   return(0);
879: }

881: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
882: {
883:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
884:   Mat        A = mat->A,B = mat->B;
885:   int        ierr;
886:   PetscReal  isend[5],irecv[5];

889:   info->block_size     = 1.0;
890:   MatGetInfo(A,MAT_LOCAL,info);
891:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
892:   isend[3] = info->memory;  isend[4] = info->mallocs;
893:   MatGetInfo(B,MAT_LOCAL,info);
894:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
895:   isend[3] += info->memory;  isend[4] += info->mallocs;
896:   if (flag == MAT_LOCAL) {
897:     info->nz_used      = isend[0];
898:     info->nz_allocated = isend[1];
899:     info->nz_unneeded  = isend[2];
900:     info->memory       = isend[3];
901:     info->mallocs      = isend[4];
902:   } else if (flag == MAT_GLOBAL_MAX) {
903:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
904:     info->nz_used      = irecv[0];
905:     info->nz_allocated = irecv[1];
906:     info->nz_unneeded  = irecv[2];
907:     info->memory       = irecv[3];
908:     info->mallocs      = irecv[4];
909:   } else if (flag == MAT_GLOBAL_SUM) {
910:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
911:     info->nz_used      = irecv[0];
912:     info->nz_allocated = irecv[1];
913:     info->nz_unneeded  = irecv[2];
914:     info->memory       = irecv[3];
915:     info->mallocs      = irecv[4];
916:   }
917:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
918:   info->fill_ratio_needed = 0;
919:   info->factor_mallocs    = 0;
920:   info->rows_global       = (double)matin->M;
921:   info->columns_global    = (double)matin->N;
922:   info->rows_local        = (double)matin->m;
923:   info->columns_local     = (double)matin->N;

925:   return(0);
926: }

928: int MatSetOption_MPIAIJ(Mat A,MatOption op)
929: {
930:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
931:   int        ierr;

934:   switch (op) {
935:   case MAT_NO_NEW_NONZERO_LOCATIONS:
936:   case MAT_YES_NEW_NONZERO_LOCATIONS:
937:   case MAT_COLUMNS_UNSORTED:
938:   case MAT_COLUMNS_SORTED:
939:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
940:   case MAT_KEEP_ZEROED_ROWS:
941:   case MAT_NEW_NONZERO_LOCATION_ERR:
942:   case MAT_USE_INODES:
943:   case MAT_DO_NOT_USE_INODES:
944:   case MAT_IGNORE_ZERO_ENTRIES:
945:     MatSetOption(a->A,op);
946:     MatSetOption(a->B,op);
947:     break;
948:   case MAT_ROW_ORIENTED:
949:     a->roworiented = PETSC_TRUE;
950:     MatSetOption(a->A,op);
951:     MatSetOption(a->B,op);
952:     break;
953:   case MAT_ROWS_SORTED:
954:   case MAT_ROWS_UNSORTED:
955:   case MAT_YES_NEW_DIAGONALS:
956:   case MAT_USE_SINGLE_PRECISION_SOLVES:
957:     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignoredn");
958:     break;
959:   case MAT_COLUMN_ORIENTED:
960:     a->roworiented = PETSC_FALSE;
961:     MatSetOption(a->A,op);
962:     MatSetOption(a->B,op);
963:     break;
964:   case MAT_IGNORE_OFF_PROC_ENTRIES:
965:     a->donotstash = PETSC_TRUE;
966:     break;
967:   case MAT_NO_NEW_DIAGONALS:
968:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
969:   default:
970:     SETERRQ(PETSC_ERR_SUP,"unknown option");
971:   }
972:   return(0);
973: }

975: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
976: {
977:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
978:   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
979:   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
980:   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
981:   int          *cmap,*idx_p;

984:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
985:   mat->getrowactive = PETSC_TRUE;

987:   if (!mat->rowvalues && (idx || v)) {
988:     /*
989:         allocate enough space to hold information from the longest row.
990:     */
991:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
992:     int     max = 1,tmp;
993:     for (i=0; i<matin->m; i++) {
994:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
995:       if (max < tmp) { max = tmp; }
996:     }
997:     PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
998:     mat->rowindices = (int*)(mat->rowvalues + max);
999:   }

1001:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1002:   lrow = row - rstart;

1004:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1005:   if (!v)   {pvA = 0; pvB = 0;}
1006:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1007:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1008:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1009:   nztot = nzA + nzB;

1011:   cmap  = mat->garray;
1012:   if (v  || idx) {
1013:     if (nztot) {
1014:       /* Sort by increasing column numbers, assuming A and B already sorted */
1015:       int imark = -1;
1016:       if (v) {
1017:         *v = v_p = mat->rowvalues;
1018:         for (i=0; i<nzB; i++) {
1019:           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1020:           else break;
1021:         }
1022:         imark = i;
1023:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1024:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1025:       }
1026:       if (idx) {
1027:         *idx = idx_p = mat->rowindices;
1028:         if (imark > -1) {
1029:           for (i=0; i<imark; i++) {
1030:             idx_p[i] = cmap[cworkB[i]];
1031:           }
1032:         } else {
1033:           for (i=0; i<nzB; i++) {
1034:             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1035:             else break;
1036:           }
1037:           imark = i;
1038:         }
1039:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1040:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1041:       }
1042:     } else {
1043:       if (idx) *idx = 0;
1044:       if (v)   *v   = 0;
1045:     }
1046:   }
1047:   *nz = nztot;
1048:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1049:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1050:   return(0);
1051: }

1053: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1054: {
1055:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1058:   if (aij->getrowactive == PETSC_FALSE) {
1059:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1060:   }
1061:   aij->getrowactive = PETSC_FALSE;
1062:   return(0);
1063: }

1065: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1066: {
1067:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1068:   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1069:   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1070:   PetscReal    sum = 0.0;
1071:   PetscScalar  *v;

1074:   if (aij->size == 1) {
1075:      MatNorm(aij->A,type,norm);
1076:   } else {
1077:     if (type == NORM_FROBENIUS) {
1078:       v = amat->a;
1079:       for (i=0; i<amat->nz; i++) {
1080: #if defined(PETSC_USE_COMPLEX)
1081:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1082: #else
1083:         sum += (*v)*(*v); v++;
1084: #endif
1085:       }
1086:       v = bmat->a;
1087:       for (i=0; i<bmat->nz; i++) {
1088: #if defined(PETSC_USE_COMPLEX)
1089:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1090: #else
1091:         sum += (*v)*(*v); v++;
1092: #endif
1093:       }
1094:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1095:       *norm = sqrt(*norm);
1096:     } else if (type == NORM_1) { /* max column norm */
1097:       PetscReal *tmp,*tmp2;
1098:       int    *jj,*garray = aij->garray;
1099:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1100:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1101:       PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1102:       *norm = 0.0;
1103:       v = amat->a; jj = amat->j;
1104:       for (j=0; j<amat->nz; j++) {
1105:         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1106:       }
1107:       v = bmat->a; jj = bmat->j;
1108:       for (j=0; j<bmat->nz; j++) {
1109:         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1110:       }
1111:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1112:       for (j=0; j<mat->N; j++) {
1113:         if (tmp2[j] > *norm) *norm = tmp2[j];
1114:       }
1115:       PetscFree(tmp);
1116:       PetscFree(tmp2);
1117:     } else if (type == NORM_INFINITY) { /* max row norm */
1118:       PetscReal ntemp = 0.0;
1119:       for (j=0; j<aij->A->m; j++) {
1120:         v = amat->a + amat->i[j] + shift;
1121:         sum = 0.0;
1122:         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1123:           sum += PetscAbsScalar(*v); v++;
1124:         }
1125:         v = bmat->a + bmat->i[j] + shift;
1126:         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1127:           sum += PetscAbsScalar(*v); v++;
1128:         }
1129:         if (sum > ntemp) ntemp = sum;
1130:       }
1131:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1132:     } else {
1133:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1134:     }
1135:   }
1136:   return(0);
1137: }

1139: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1140: {
1141:   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1142:   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1143:   int          ierr,shift = Aloc->indexshift;
1144:   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1145:   Mat          B;
1146:   PetscScalar  *array;

1149:   if (!matout && M != N) {
1150:     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1151:   }

1153:   MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);

1155:   /* copy over the A part */
1156:   Aloc = (Mat_SeqAIJ*)a->A->data;
1157:   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1158:   row = a->rstart;
1159:   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1160:   for (i=0; i<m; i++) {
1161:     MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1162:     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1163:   }
1164:   aj = Aloc->j;
1165:   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}

1167:   /* copy over the B part */
1168:   Aloc = (Mat_SeqAIJ*)a->B->data;
1169:   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1170:   row  = a->rstart;
1171:   PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);
1172:   ct   = cols;
1173:   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1174:   for (i=0; i<m; i++) {
1175:     MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1176:     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1177:   }
1178:   PetscFree(ct);
1179:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1180:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1181:   if (matout) {
1182:     *matout = B;
1183:   } else {
1184:     MatHeaderCopy(A,B);
1185:   }
1186:   return(0);
1187: }

1189: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1190: {
1191:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1192:   Mat        a = aij->A,b = aij->B;
1193:   int        ierr,s1,s2,s3;

1196:   MatGetLocalSize(mat,&s2,&s3);
1197:   if (rr) {
1198:     VecGetLocalSize(rr,&s1);
1199:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1200:     /* Overlap communication with computation. */
1201:     VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1202:   }
1203:   if (ll) {
1204:     VecGetLocalSize(ll,&s1);
1205:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1206:     (*b->ops->diagonalscale)(b,ll,0);
1207:   }
1208:   /* scale  the diagonal block */
1209:   (*a->ops->diagonalscale)(a,ll,rr);

1211:   if (rr) {
1212:     /* Do a scatter end and then right scale the off-diagonal block */
1213:     VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1214:     (*b->ops->diagonalscale)(b,0,aij->lvec);
1215:   }
1216: 
1217:   return(0);
1218: }


1221: int MatPrintHelp_MPIAIJ(Mat A)
1222: {
1223:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1224:   int        ierr;

1227:   if (!a->rank) {
1228:     MatPrintHelp_SeqAIJ(a->A);
1229:   }
1230:   return(0);
1231: }

1233: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1234: {
1236:   *bs = 1;
1237:   return(0);
1238: }
1239: int MatSetUnfactored_MPIAIJ(Mat A)
1240: {
1241:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1242:   int        ierr;

1245:   MatSetUnfactored(a->A);
1246:   return(0);
1247: }

1249: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1250: {
1251:   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1252:   Mat        a,b,c,d;
1253:   PetscTruth flg;
1254:   int        ierr;

1257:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1258:   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1259:   a = matA->A; b = matA->B;
1260:   c = matB->A; d = matB->B;

1262:   MatEqual(a,c,&flg);
1263:   if (flg == PETSC_TRUE) {
1264:     MatEqual(b,d,&flg);
1265:   }
1266:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1267:   return(0);
1268: }

1270: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1271: {
1272:   int        ierr;
1273:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1274:   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1275:   PetscTruth flg;

1278:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1279:   if (str != SAME_NONZERO_PATTERN || !flg) {
1280:     /* because of the column compression in the off-processor part of the matrix a->B,
1281:        the number of columns in a->B and b->B may be different, hence we cannot call
1282:        the MatCopy() directly on the two parts. If need be, we can provide a more 
1283:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1284:        then copying the submatrices */
1285:     MatCopy_Basic(A,B,str);
1286:   } else {
1287:     MatCopy(a->A,b->A,str);
1288:     MatCopy(a->B,b->B,str);
1289:   }
1290:   return(0);
1291: }

1293: int MatSetUpPreallocation_MPIAIJ(Mat A)
1294: {
1295:   int        ierr;

1298:    MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1299:   return(0);
1300: }

1302: EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1303: EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1304: EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1305: EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1306: EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1307: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1308: EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1309: #endif

1311:  #include petscblaslapack.h

1313: int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1314: {
1315:   int        ierr,one;
1316:   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1317:   Mat_SeqAIJ *x,*y;

1320:   if (str == SAME_NONZERO_PATTERN) {
1321:     x  = (Mat_SeqAIJ *)xx->A->data;
1322:     y  = (Mat_SeqAIJ *)yy->A->data;
1323:     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1324:     x  = (Mat_SeqAIJ *)xx->B->data;
1325:     y  = (Mat_SeqAIJ *)yy->B->data;
1326:     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1327:   } else {
1328:     MatAXPY_Basic(a,X,Y,str);
1329:   }
1330:   return(0);
1331: }

1333: /* -------------------------------------------------------------------*/
1334: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1335:        MatGetRow_MPIAIJ,
1336:        MatRestoreRow_MPIAIJ,
1337:        MatMult_MPIAIJ,
1338:        MatMultAdd_MPIAIJ,
1339:        MatMultTranspose_MPIAIJ,
1340:        MatMultTransposeAdd_MPIAIJ,
1341:        0,
1342:        0,
1343:        0,
1344:        0,
1345:        0,
1346:        0,
1347:        MatRelax_MPIAIJ,
1348:        MatTranspose_MPIAIJ,
1349:        MatGetInfo_MPIAIJ,
1350:        MatEqual_MPIAIJ,
1351:        MatGetDiagonal_MPIAIJ,
1352:        MatDiagonalScale_MPIAIJ,
1353:        MatNorm_MPIAIJ,
1354:        MatAssemblyBegin_MPIAIJ,
1355:        MatAssemblyEnd_MPIAIJ,
1356:        0,
1357:        MatSetOption_MPIAIJ,
1358:        MatZeroEntries_MPIAIJ,
1359:        MatZeroRows_MPIAIJ,
1360: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1361:        MatLUFactorSymbolic_MPIAIJ_TFS,
1362: #else
1363:        0,
1364: #endif
1365:        0,
1366:        0,
1367:        0,
1368:        MatSetUpPreallocation_MPIAIJ,
1369:        0,
1370:        0,
1371:        0,
1372:        0,
1373:        MatDuplicate_MPIAIJ,
1374:        0,
1375:        0,
1376:        0,
1377:        0,
1378:        MatAXPY_MPIAIJ,
1379:        MatGetSubMatrices_MPIAIJ,
1380:        MatIncreaseOverlap_MPIAIJ,
1381:        MatGetValues_MPIAIJ,
1382:        MatCopy_MPIAIJ,
1383:        MatPrintHelp_MPIAIJ,
1384:        MatScale_MPIAIJ,
1385:        0,
1386:        0,
1387:        0,
1388:        MatGetBlockSize_MPIAIJ,
1389:        0,
1390:        0,
1391:        0,
1392:        0,
1393:        MatFDColoringCreate_MPIAIJ,
1394:        0,
1395:        MatSetUnfactored_MPIAIJ,
1396:        0,
1397:        0,
1398:        MatGetSubMatrix_MPIAIJ,
1399:        MatDestroy_MPIAIJ,
1400:        MatView_MPIAIJ,
1401:        MatGetPetscMaps_Petsc,
1402:        0,
1403:        0,
1404:        0,
1405:        0,
1406:        0,
1407:        0,
1408:        0,
1409:        0,
1410:        MatSetColoring_MPIAIJ,
1411:        MatSetValuesAdic_MPIAIJ,
1412:        MatSetValuesAdifor_MPIAIJ
1413: };

1415: /* ----------------------------------------------------------------------------------------*/

1417: EXTERN_C_BEGIN
1418: int MatStoreValues_MPIAIJ(Mat mat)
1419: {
1420:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1421:   int        ierr;

1424:   MatStoreValues(aij->A);
1425:   MatStoreValues(aij->B);
1426:   return(0);
1427: }
1428: EXTERN_C_END

1430: EXTERN_C_BEGIN
1431: int MatRetrieveValues_MPIAIJ(Mat mat)
1432: {
1433:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1434:   int        ierr;

1437:   MatRetrieveValues(aij->A);
1438:   MatRetrieveValues(aij->B);
1439:   return(0);
1440: }
1441: EXTERN_C_END

1443:  #include petscpc.h
1444: EXTERN_C_BEGIN
1445: EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1446: EXTERN_C_END

1448: EXTERN_C_BEGIN
1449: int MatCreate_MPIAIJ(Mat B)
1450: {
1451:   Mat_MPIAIJ *b;
1452:   int        ierr,i,size;
1453: #if defined(PETSC_HAVE_SUPERLUDIST) 
1454:   PetscTruth flg;
1455: #endif 


1459:   MPI_Comm_size(B->comm,&size);

1461:   ierr            = PetscNew(Mat_MPIAIJ,&b);
1462:   B->data         = (void*)b;
1463:   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));
1464:   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1465:   B->factor       = 0;
1466:   B->assembled    = PETSC_FALSE;
1467:   B->mapping      = 0;

1469:   B->insertmode      = NOT_SET_VALUES;
1470:   b->size            = size;
1471:   MPI_Comm_rank(B->comm,&b->rank);

1473:   PetscSplitOwnership(B->comm,&B->m,&B->M);
1474:   PetscSplitOwnership(B->comm,&B->n,&B->N);

1476:   /* the information in the maps duplicates the information computed below, eventually 
1477:      we should remove the duplicate information that is not contained in the maps */
1478:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1479:   PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);

1481:   /* build local table of row and column ownerships */
1482:   PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1483:   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1484:   b->cowners = b->rowners + b->size + 2;
1485:   MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1486:   b->rowners[0] = 0;
1487:   for (i=2; i<=b->size; i++) {
1488:     b->rowners[i] += b->rowners[i-1];
1489:   }
1490:   b->rstart = b->rowners[b->rank];
1491:   b->rend   = b->rowners[b->rank+1];
1492:   MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1493:   b->cowners[0] = 0;
1494:   for (i=2; i<=b->size; i++) {
1495:     b->cowners[i] += b->cowners[i-1];
1496:   }
1497:   b->cstart = b->cowners[b->rank];
1498:   b->cend   = b->cowners[b->rank+1];

1500:   /* build cache for off array entries formed */
1501:   MatStashCreate_Private(B->comm,1,&B->stash);
1502:   b->donotstash  = PETSC_FALSE;
1503:   b->colmap      = 0;
1504:   b->garray      = 0;
1505:   b->roworiented = PETSC_TRUE;

1507:   /* stuff used for matrix vector multiply */
1508:   b->lvec      = PETSC_NULL;
1509:   b->Mvctx     = PETSC_NULL;

1511:   /* stuff for MatGetRow() */
1512:   b->rowindices   = 0;
1513:   b->rowvalues    = 0;
1514:   b->getrowactive = PETSC_FALSE;

1516: #if defined(PETSC_HAVE_SUPERLUDIST) 
1517:   PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);
1518:   if (flg) { MatUseSuperLU_DIST_MPIAIJ(B); }
1519: #endif

1521:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1522:                                      "MatStoreValues_MPIAIJ",
1523:                                      MatStoreValues_MPIAIJ);
1524:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1525:                                      "MatRetrieveValues_MPIAIJ",
1526:                                      MatRetrieveValues_MPIAIJ);
1527:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1528:                                      "MatGetDiagonalBlock_MPIAIJ",
1529:                                      MatGetDiagonalBlock_MPIAIJ);

1531:   return(0);
1532: }
1533: EXTERN_C_END

1535: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1536: {
1537:   Mat        mat;
1538:   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1539:   int        ierr;

1542:   *newmat       = 0;
1543:   MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1544:   MatSetType(mat,MATMPIAIJ);
1545:   a    = (Mat_MPIAIJ*)mat->data;
1546:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1547:   mat->factor       = matin->factor;
1548:   mat->assembled    = PETSC_TRUE;
1549:   mat->insertmode   = NOT_SET_VALUES;
1550:   mat->preallocated = PETSC_TRUE;

1552:   a->rstart       = oldmat->rstart;
1553:   a->rend         = oldmat->rend;
1554:   a->cstart       = oldmat->cstart;
1555:   a->cend         = oldmat->cend;
1556:   a->size         = oldmat->size;
1557:   a->rank         = oldmat->rank;
1558:   a->donotstash   = oldmat->donotstash;
1559:   a->roworiented  = oldmat->roworiented;
1560:   a->rowindices   = 0;
1561:   a->rowvalues    = 0;
1562:   a->getrowactive = PETSC_FALSE;

1564:   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1565:   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);
1566:   if (oldmat->colmap) {
1567: #if defined (PETSC_USE_CTABLE)
1568:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1569: #else
1570:     PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1571:     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1572:     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1573: #endif
1574:   } else a->colmap = 0;
1575:   if (oldmat->garray) {
1576:     int len;
1577:     len  = oldmat->B->n;
1578:     PetscMalloc((len+1)*sizeof(int),&a->garray);
1579:     PetscLogObjectMemory(mat,len*sizeof(int));
1580:     if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1581:   } else a->garray = 0;
1582: 
1583:    VecDuplicate(oldmat->lvec,&a->lvec);
1584:   PetscLogObjectParent(mat,a->lvec);
1585:    VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1586:   PetscLogObjectParent(mat,a->Mvctx);
1587:    MatDuplicate(oldmat->A,cpvalues,&a->A);
1588:   PetscLogObjectParent(mat,a->A);
1589:    MatDuplicate(oldmat->B,cpvalues,&a->B);
1590:   PetscLogObjectParent(mat,a->B);
1591:   PetscFListDuplicate(matin->qlist,&mat->qlist);
1592:   *newmat = mat;
1593:   return(0);
1594: }

1596:  #include petscsys.h

1598: EXTERN_C_BEGIN
1599: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1600: {
1601:   Mat          A;
1602:   PetscScalar  *vals,*svals;
1603:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1604:   MPI_Status   status;
1605:   int          i,nz,ierr,j,rstart,rend,fd;
1606:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1607:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1608:   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;

1611:   MPI_Comm_size(comm,&size);
1612:   MPI_Comm_rank(comm,&rank);
1613:   if (!rank) {
1614:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1615:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1616:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1617:     if (header[3] < 0) {
1618:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1619:     }
1620:   }

1622:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1623:   M = header[1]; N = header[2];
1624:   /* determine ownership of all rows */
1625:   m = M/size + ((M % size) > rank);
1626:   PetscMalloc((size+2)*sizeof(int),&rowners);
1627:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1628:   rowners[0] = 0;
1629:   for (i=2; i<=size; i++) {
1630:     rowners[i] += rowners[i-1];
1631:   }
1632:   rstart = rowners[rank];
1633:   rend   = rowners[rank+1];

1635:   /* distribute row lengths to all processors */
1636:   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1637:   offlens = ourlens + (rend-rstart);
1638:   if (!rank) {
1639:     PetscMalloc(M*sizeof(int),&rowlengths);
1640:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1641:     PetscMalloc(size*sizeof(int),&sndcounts);
1642:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1643:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1644:     PetscFree(sndcounts);
1645:   } else {
1646:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1647:   }

1649:   if (!rank) {
1650:     /* calculate the number of nonzeros on each processor */
1651:     PetscMalloc(size*sizeof(int),&procsnz);
1652:     PetscMemzero(procsnz,size*sizeof(int));
1653:     for (i=0; i<size; i++) {
1654:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1655:         procsnz[i] += rowlengths[j];
1656:       }
1657:     }
1658:     PetscFree(rowlengths);

1660:     /* determine max buffer needed and allocate it */
1661:     maxnz = 0;
1662:     for (i=0; i<size; i++) {
1663:       maxnz = PetscMax(maxnz,procsnz[i]);
1664:     }
1665:     PetscMalloc(maxnz*sizeof(int),&cols);

1667:     /* read in my part of the matrix column indices  */
1668:     nz   = procsnz[0];
1669:     PetscMalloc(nz*sizeof(int),&mycols);
1670:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1672:     /* read in every one elses and ship off */
1673:     for (i=1; i<size; i++) {
1674:       nz   = procsnz[i];
1675:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1676:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1677:     }
1678:     PetscFree(cols);
1679:   } else {
1680:     /* determine buffer space needed for message */
1681:     nz = 0;
1682:     for (i=0; i<m; i++) {
1683:       nz += ourlens[i];
1684:     }
1685:     PetscMalloc((nz+1)*sizeof(int),&mycols);

1687:     /* receive message of column indices*/
1688:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1689:     MPI_Get_count(&status,MPI_INT,&maxnz);
1690:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1691:   }

1693:   /* determine column ownership if matrix is not square */
1694:   if (N != M) {
1695:     n      = N/size + ((N % size) > rank);
1696:     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
1697:     cstart = cend - n;
1698:   } else {
1699:     cstart = rstart;
1700:     cend   = rend;
1701:     n      = cend - cstart;
1702:   }

1704:   /* loop over local rows, determining number of off diagonal entries */
1705:   PetscMemzero(offlens,m*sizeof(int));
1706:   jj = 0;
1707:   for (i=0; i<m; i++) {
1708:     for (j=0; j<ourlens[i]; j++) {
1709:       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1710:       jj++;
1711:     }
1712:   }

1714:   /* create our matrix */
1715:   for (i=0; i<m; i++) {
1716:     ourlens[i] -= offlens[i];
1717:   }
1718:   MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);
1719:   A = *newmat;
1720:   MatSetOption(A,MAT_COLUMNS_SORTED);
1721:   for (i=0; i<m; i++) {
1722:     ourlens[i] += offlens[i];
1723:   }

1725:   if (!rank) {
1726:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1728:     /* read in my part of the matrix numerical values  */
1729:     nz   = procsnz[0];
1730:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1731: 
1732:     /* insert into matrix */
1733:     jj      = rstart;
1734:     smycols = mycols;
1735:     svals   = vals;
1736:     for (i=0; i<m; i++) {
1737:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1738:       smycols += ourlens[i];
1739:       svals   += ourlens[i];
1740:       jj++;
1741:     }

1743:     /* read in other processors and ship out */
1744:     for (i=1; i<size; i++) {
1745:       nz   = procsnz[i];
1746:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1747:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1748:     }
1749:     PetscFree(procsnz);
1750:   } else {
1751:     /* receive numeric values */
1752:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

1754:     /* receive message of values*/
1755:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1756:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1757:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1759:     /* insert into matrix */
1760:     jj      = rstart;
1761:     smycols = mycols;
1762:     svals   = vals;
1763:     for (i=0; i<m; i++) {
1764:       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1765:       smycols += ourlens[i];
1766:       svals   += ourlens[i];
1767:       jj++;
1768:     }
1769:   }
1770:   PetscFree(ourlens);
1771:   PetscFree(vals);
1772:   PetscFree(mycols);
1773:   PetscFree(rowners);

1775:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1776:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1777:   return(0);
1778: }
1779: EXTERN_C_END

1781: /*
1782:     Not great since it makes two copies of the submatrix, first an SeqAIJ 
1783:   in local and then by concatenating the local matrices the end result.
1784:   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1785: */
1786: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1787: {
1788:   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1789:   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1790:   Mat          *local,M,Mreuse;
1791:   PetscScalar  *vwork,*aa;
1792:   MPI_Comm     comm = mat->comm;
1793:   Mat_SeqAIJ   *aij;


1797:   MPI_Comm_rank(comm,&rank);
1798:   MPI_Comm_size(comm,&size);

1800:   if (call ==  MAT_REUSE_MATRIX) {
1801:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
1802:     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1803:     local = &Mreuse;
1804:     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
1805:   } else {
1806:     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
1807:     Mreuse = *local;
1808:     ierr   = PetscFree(local);
1809:   }

1811:   /* 
1812:       m - number of local rows
1813:       n - number of columns (same on all processors)
1814:       rstart - first row in new global matrix generated
1815:   */
1816:   MatGetSize(Mreuse,&m,&n);
1817:   if (call == MAT_INITIAL_MATRIX) {
1818:     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1819:     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1820:     ii  = aij->i;
1821:     jj  = aij->j;

1823:     /*
1824:         Determine the number of non-zeros in the diagonal and off-diagonal 
1825:         portions of the matrix in order to do correct preallocation
1826:     */

1828:     /* first get start and end of "diagonal" columns */
1829:     if (csize == PETSC_DECIDE) {
1830:       nlocal = n/size + ((n % size) > rank);
1831:     } else {
1832:       nlocal = csize;
1833:     }
1834:     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
1835:     rstart = rend - nlocal;
1836:     if (rank == size - 1 && rend != n) {
1837:       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1838:     }

1840:     /* next, compute all the lengths */
1841:     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);
1842:     olens = dlens + m;
1843:     for (i=0; i<m; i++) {
1844:       jend = ii[i+1] - ii[i];
1845:       olen = 0;
1846:       dlen = 0;
1847:       for (j=0; j<jend; j++) {
1848:         if (*jj < rstart || *jj >= rend) olen++;
1849:         else dlen++;
1850:         jj++;
1851:       }
1852:       olens[i] = olen;
1853:       dlens[i] = dlen;
1854:     }
1855:     MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
1856:     PetscFree(dlens);
1857:   } else {
1858:     int ml,nl;

1860:     M = *newmat;
1861:     MatGetLocalSize(M,&ml,&nl);
1862:     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1863:     MatZeroEntries(M);
1864:     /*
1865:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1866:        rather than the slower MatSetValues().
1867:     */
1868:     M->was_assembled = PETSC_TRUE;
1869:     M->assembled     = PETSC_FALSE;
1870:   }
1871:   MatGetOwnershipRange(M,&rstart,&rend);
1872:   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1873:   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1874:   ii  = aij->i;
1875:   jj  = aij->j;
1876:   aa  = aij->a;
1877:   for (i=0; i<m; i++) {
1878:     row   = rstart + i;
1879:     nz    = ii[i+1] - ii[i];
1880:     cwork = jj;     jj += nz;
1881:     vwork = aa;     aa += nz;
1882:     MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
1883:   }

1885:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
1886:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
1887:   *newmat = M;

1889:   /* save submatrix used in processor for next request */
1890:   if (call ==  MAT_INITIAL_MATRIX) {
1891:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
1892:     PetscObjectDereference((PetscObject)Mreuse);
1893:   }

1895:   return(0);
1896: }

1898: /*@C
1899:    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1900:    (the default parallel PETSc format).  For good matrix assembly performance
1901:    the user should preallocate the matrix storage by setting the parameters 
1902:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1903:    performance can be increased by more than a factor of 50.

1905:    Collective on MPI_Comm

1907:    Input Parameters:
1908: +  A - the matrix 
1909: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1910:            (same value is used for all local rows)
1911: .  d_nnz - array containing the number of nonzeros in the various rows of the 
1912:            DIAGONAL portion of the local submatrix (possibly different for each row)
1913:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
1914:            The size of this array is equal to the number of local rows, i.e 'm'. 
1915:            You must leave room for the diagonal entry even if it is zero.
1916: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1917:            submatrix (same value is used for all local rows).
1918: -  o_nnz - array containing the number of nonzeros in the various rows of the
1919:            OFF-DIAGONAL portion of the local submatrix (possibly different for
1920:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
1921:            structure. The size of this array is equal to the number 
1922:            of local rows, i.e 'm'. 

1924:    The AIJ format (also called the Yale sparse matrix format or
1925:    compressed row storage), is fully compatible with standard Fortran 77
1926:    storage.  That is, the stored row and column indices can begin at
1927:    either one (as in Fortran) or zero.  See the users manual for details.

1929:    The user MUST specify either the local or global matrix dimensions
1930:    (possibly both).

1932:    The parallel matrix is partitioned such that the first m0 rows belong to 
1933:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
1934:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

1936:    The DIAGONAL portion of the local submatrix of a processor can be defined 
1937:    as the submatrix which is obtained by extraction the part corresponding 
1938:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
1939:    first row that belongs to the processor, and r2 is the last row belonging 
1940:    to the this processor. This is a square mxm matrix. The remaining portion 
1941:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

1943:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

1945:    By default, this format uses inodes (identical nodes) when possible.
1946:    We search for consecutive rows with the same nonzero structure, thereby
1947:    reusing matrix information to achieve increased efficiency.

1949:    Options Database Keys:
1950: +  -mat_aij_no_inode  - Do not use inodes
1951: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1952: -  -mat_aij_oneindex - Internally use indexing starting at 1
1953:         rather than 0.  Note that when calling MatSetValues(),
1954:         the user still MUST index entries starting at 0!

1956:    Example usage:
1957:   
1958:    Consider the following 8x8 matrix with 34 non-zero values, that is 
1959:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1960:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
1961:    as follows:

1963: .vb
1964:             1  2  0  |  0  3  0  |  0  4
1965:     Proc0   0  5  6  |  7  0  0  |  8  0
1966:             9  0 10  | 11  0  0  | 12  0
1967:     -------------------------------------
1968:            13  0 14  | 15 16 17  |  0  0
1969:     Proc1   0 18  0  | 19 20 21  |  0  0 
1970:             0  0  0  | 22 23  0  | 24  0
1971:     -------------------------------------
1972:     Proc2  25 26 27  |  0  0 28  | 29  0
1973:            30  0  0  | 31 32 33  |  0 34
1974: .ve

1976:    This can be represented as a collection of submatrices as:

1978: .vb
1979:       A B C
1980:       D E F
1981:       G H I
1982: .ve

1984:    Where the submatrices A,B,C are owned by proc0, D,E,F are
1985:    owned by proc1, G,H,I are owned by proc2.

1987:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1988:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1989:    The 'M','N' parameters are 8,8, and have the same values on all procs.

1991:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1992:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1993:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1994:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1995:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1996:    matrix, ans [DF] as another SeqAIJ matrix.

1998:    When d_nz, o_nz parameters are specified, d_nz storage elements are
1999:    allocated for every row of the local diagonal submatrix, and o_nz
2000:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2001:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2002:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2003:    In this case, the values of d_nz,o_nz are:
2004: .vb
2005:      proc0 : dnz = 2, o_nz = 2
2006:      proc1 : dnz = 3, o_nz = 2
2007:      proc2 : dnz = 1, o_nz = 4
2008: .ve
2009:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2010:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2011:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2012:    34 values.

2014:    When d_nnz, o_nnz parameters are specified, the storage is specified
2015:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2016:    In the above case the values for d_nnz,o_nnz are:
2017: .vb
2018:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2019:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2020:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2021: .ve
2022:    Here the space allocated is sum of all the above values i.e 34, and
2023:    hence pre-allocation is perfect.

2025:    Level: intermediate

2027: .keywords: matrix, aij, compressed row, sparse, parallel

2029: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2030: @*/
2031: int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2032: {
2033:   Mat_MPIAIJ   *b;
2034:   int          ierr,i;
2035:   PetscTruth   flg2;

2038:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);
2039:   if (!flg2) return(0);
2040:   B->preallocated = PETSC_TRUE;
2041:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2042:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2043:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2044:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2045:   if (d_nnz) {
2046:     for (i=0; i<B->m; i++) {
2047:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2048:     }
2049:   }
2050:   if (o_nnz) {
2051:     for (i=0; i<B->m; i++) {
2052:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2053:     }
2054:   }
2055:   b = (Mat_MPIAIJ*)B->data;

2057:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
2058:   PetscLogObjectParent(B,b->A);
2059:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
2060:   PetscLogObjectParent(B,b->B);

2062:   return(0);
2063: }

2065: /*@C
2066:    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2067:    (the default parallel PETSc format).  For good matrix assembly performance
2068:    the user should preallocate the matrix storage by setting the parameters 
2069:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2070:    performance can be increased by more than a factor of 50.

2072:    Collective on MPI_Comm

2074:    Input Parameters:
2075: +  comm - MPI communicator
2076: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2077:            This value should be the same as the local size used in creating the 
2078:            y vector for the matrix-vector product y = Ax.
2079: .  n - This value should be the same as the local size used in creating the 
2080:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2081:        calculated if N is given) For square matrices n is almost always m.
2082: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2083: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2084: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2085:            (same value is used for all local rows)
2086: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2087:            DIAGONAL portion of the local submatrix (possibly different for each row)
2088:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2089:            The size of this array is equal to the number of local rows, i.e 'm'. 
2090:            You must leave room for the diagonal entry even if it is zero.
2091: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2092:            submatrix (same value is used for all local rows).
2093: -  o_nnz - array containing the number of nonzeros in the various rows of the
2094:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2095:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2096:            structure. The size of this array is equal to the number 
2097:            of local rows, i.e 'm'. 

2099:    Output Parameter:
2100: .  A - the matrix 

2102:    Notes:
2103:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2104:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2105:    storage requirements for this matrix.

2107:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one 
2108:    processor than it must be used on all processors that share the object for 
2109:    that argument.

2111:    The AIJ format (also called the Yale sparse matrix format or
2112:    compressed row storage), is fully compatible with standard Fortran 77
2113:    storage.  That is, the stored row and column indices can begin at
2114:    either one (as in Fortran) or zero.  See the users manual for details.

2116:    The user MUST specify either the local or global matrix dimensions
2117:    (possibly both).

2119:    The parallel matrix is partitioned such that the first m0 rows belong to 
2120:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2121:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2123:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2124:    as the submatrix which is obtained by extraction the part corresponding 
2125:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2126:    first row that belongs to the processor, and r2 is the last row belonging 
2127:    to the this processor. This is a square mxm matrix. The remaining portion 
2128:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2130:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2132:    By default, this format uses inodes (identical nodes) when possible.
2133:    We search for consecutive rows with the same nonzero structure, thereby
2134:    reusing matrix information to achieve increased efficiency.

2136:    Options Database Keys:
2137: +  -mat_aij_no_inode  - Do not use inodes
2138: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2139: -  -mat_aij_oneindex - Internally use indexing starting at 1
2140:         rather than 0.  Note that when calling MatSetValues(),
2141:         the user still MUST index entries starting at 0!


2144:    Example usage:
2145:   
2146:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2147:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2148:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2149:    as follows:

2151: .vb
2152:             1  2  0  |  0  3  0  |  0  4
2153:     Proc0   0  5  6  |  7  0  0  |  8  0
2154:             9  0 10  | 11  0  0  | 12  0
2155:     -------------------------------------
2156:            13  0 14  | 15 16 17  |  0  0
2157:     Proc1   0 18  0  | 19 20 21  |  0  0 
2158:             0  0  0  | 22 23  0  | 24  0
2159:     -------------------------------------
2160:     Proc2  25 26 27  |  0  0 28  | 29  0
2161:            30  0  0  | 31 32 33  |  0 34
2162: .ve

2164:    This can be represented as a collection of submatrices as:

2166: .vb
2167:       A B C
2168:       D E F
2169:       G H I
2170: .ve

2172:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2173:    owned by proc1, G,H,I are owned by proc2.

2175:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2176:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2177:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2179:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2180:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2181:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2182:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2183:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2184:    matrix, ans [DF] as another SeqAIJ matrix.

2186:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2187:    allocated for every row of the local diagonal submatrix, and o_nz
2188:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2189:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2190:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2191:    In this case, the values of d_nz,o_nz are:
2192: .vb
2193:      proc0 : dnz = 2, o_nz = 2
2194:      proc1 : dnz = 3, o_nz = 2
2195:      proc2 : dnz = 1, o_nz = 4
2196: .ve
2197:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2198:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2199:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2200:    34 values.

2202:    When d_nnz, o_nnz parameters are specified, the storage is specified
2203:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2204:    In the above case the values for d_nnz,o_nnz are:
2205: .vb
2206:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2207:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2208:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2209: .ve
2210:    Here the space allocated is sum of all the above values i.e 34, and
2211:    hence pre-allocation is perfect.

2213:    Level: intermediate

2215: .keywords: matrix, aij, compressed row, sparse, parallel

2217: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2218: @*/
2219: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2220: {
2221:   int ierr,size;

2224:   MatCreate(comm,m,n,M,N,A);
2225:   MPI_Comm_size(comm,&size);
2226:   if (size > 1) {
2227:     MatSetType(*A,MATMPIAIJ);
2228:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2229:   } else {
2230:     MatSetType(*A,MATSEQAIJ);
2231:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2232:   }
2233:   return(0);
2234: }

2236: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2237: {
2238:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2240:   *Ad     = a->A;
2241:   *Ao     = a->B;
2242:   *colmap = a->garray;
2243:   return(0);
2244: }

2246: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2247: {
2248:   int        ierr;
2249:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

2252:   if (coloring->ctype == IS_COLORING_LOCAL) {
2253:     int        *allcolors,*colors,i;
2254:     ISColoring ocoloring;

2256:     /* set coloring for diagonal portion */
2257:     MatSetColoring_SeqAIJ(a->A,coloring);

2259:     /* set coloring for off-diagonal portion */
2260:     ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2261:     PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2262:     for (i=0; i<a->B->n; i++) {
2263:       colors[i] = allcolors[a->garray[i]];
2264:     }
2265:     PetscFree(allcolors);
2266:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2267:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2268:     ISColoringDestroy(ocoloring);
2269:   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2270:     int        *colors,i,*larray;
2271:     ISColoring ocoloring;

2273:     /* set coloring for diagonal portion */
2274:     PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2275:     for (i=0; i<a->A->n; i++) {
2276:       larray[i] = i + a->cstart;
2277:     }
2278:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2279:     PetscMalloc((a->A->n+1)*sizeof(int),&colors);
2280:     for (i=0; i<a->A->n; i++) {
2281:       colors[i] = coloring->colors[larray[i]];
2282:     }
2283:     PetscFree(larray);
2284:     ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);
2285:     MatSetColoring_SeqAIJ(a->A,ocoloring);
2286:     ISColoringDestroy(ocoloring);

2288:     /* set coloring for off-diagonal portion */
2289:     PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2290:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2291:     PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2292:     for (i=0; i<a->B->n; i++) {
2293:       colors[i] = coloring->colors[larray[i]];
2294:     }
2295:     PetscFree(larray);
2296:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2297:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2298:     ISColoringDestroy(ocoloring);
2299:   } else {
2300:     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2301:   }

2303:   return(0);
2304: }

2306: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2307: {
2308:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2309:   int        ierr;

2312:   MatSetValuesAdic_SeqAIJ(a->A,advalues);
2313:   MatSetValuesAdic_SeqAIJ(a->B,advalues);
2314:   return(0);
2315: }

2317: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2318: {
2319:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2320:   int        ierr;

2323:   MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2324:   MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2325:   return(0);
2326: }