Actual source code: mpiaij.c

 2:  #include src/mat/impls/aij/mpi/mpiaij.h
 3:  #include src/inline/spops.h

  5: /* 
  6:   Local utility routine that creates a mapping from the global column 
  7: number to the local number in the off-diagonal part of the local 
  8: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at 
  9: a slightly higher hash table cost; without it it is not scalable (each processor
 10: has an order N integer array but is fast to acess.
 11: */
 14: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
 15: {
 16:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
 18:   PetscInt       n = aij->B->n,i;

 21: #if defined (PETSC_USE_CTABLE)
 22:   PetscTableCreate(n,&aij->colmap);
 23:   for (i=0; i<n; i++){
 24:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
 25:   }
 26: #else
 27:   PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);
 28:   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
 29:   PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));
 30:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
 31: #endif
 32:   return(0);
 33: }

 35: #define CHUNKSIZE   15
 36: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
 37: { \
 38:  \
 39:     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
 40:     rmax = aimax[row]; nrow = ailen[row];  \
 41:     col1 = col - shift; \
 42:      \
 43:     low = 0; high = nrow; \
 44:     while (high-low > 5) { \
 45:       t = (low+high)/2; \
 46:       if (rp[t] > col) high = t; \
 47:       else             low  = t; \
 48:     } \
 49:       for (_i=low; _i<high; _i++) { \
 50:         if (rp[_i] > col1) break; \
 51:         if (rp[_i] == col1) { \
 52:           if (addv == ADD_VALUES) ap[_i] += value;   \
 53:           else                  ap[_i] = value; \
 54:           goto a_noinsert; \
 55:         } \
 56:       }  \
 57:       if (nonew == 1) goto a_noinsert; \
 58:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
 59:       if (nrow >= rmax) { \
 60:         /* there is no extra room in row, therefore enlarge */ \
 61:         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
 62:         PetscScalar *new_a; \
 63:  \
 64:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
 65:  \
 66:         /* malloc new storage space */ \
 67:         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
 68:         PetscMalloc(len,&new_a); \
 69:         new_j   = (PetscInt*)(new_a + new_nz); \
 70:         new_i   = new_j + new_nz; \
 71:  \
 72:         /* copy over old data into new slots */ \
 73:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
 74:         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
 75:         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt)); \
 76:         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
 77:         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
 78:                                                            len*sizeof(PetscInt)); \
 79:         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); \
 80:         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
 81:                                                            len*sizeof(PetscScalar));  \
 82:         /* free up old matrix storage */ \
 83:  \
 84:         PetscFree(a->a);  \
 85:         if (!a->singlemalloc) { \
 86:            PetscFree(a->i); \
 87:            PetscFree(a->j); \
 88:         } \
 89:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
 90:         a->singlemalloc = PETSC_TRUE; \
 91:  \
 92:         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
 93:         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
 94:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
 95:         a->maxnz += CHUNKSIZE; \
 96:         a->reallocs++; \
 97:       } \
 98:       N = nrow++ - 1; a->nz++; \
 99:       /* shift up all the later entries in this row */ \
100:       for (ii=N; ii>=_i; ii--) { \
101:         rp[ii+1] = rp[ii]; \
102:         ap[ii+1] = ap[ii]; \
103:       } \
104:       rp[_i] = col1;  \
105:       ap[_i] = value;  \
106:       a_noinsert: ; \
107:       ailen[row] = nrow; \
108: } 

110: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111: { \
112:  \
113:     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114:     rmax = bimax[row]; nrow = bilen[row];  \
115:     col1 = col - shift; \
116:      \
117:     low = 0; high = nrow; \
118:     while (high-low > 5) { \
119:       t = (low+high)/2; \
120:       if (rp[t] > col) high = t; \
121:       else             low  = t; \
122:     } \
123:        for (_i=low; _i<high; _i++) { \
124:         if (rp[_i] > col1) break; \
125:         if (rp[_i] == col1) { \
126:           if (addv == ADD_VALUES) ap[_i] += value;   \
127:           else                  ap[_i] = value; \
128:           goto b_noinsert; \
129:         } \
130:       }  \
131:       if (nonew == 1) goto b_noinsert; \
132:       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133:       if (nrow >= rmax) { \
134:         /* there is no extra room in row, therefore enlarge */ \
135:         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136:         PetscScalar *new_a; \
137:  \
138:         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139:  \
140:         /* malloc new storage space */ \
141:         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142:         PetscMalloc(len,&new_a); \
143:         new_j   = (PetscInt*)(new_a + new_nz); \
144:         new_i   = new_j + new_nz; \
145:  \
146:         /* copy over old data into new slots */ \
147:         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148:         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149:         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt)); \
150:         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151:         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152:                                                            len*sizeof(PetscInt)); \
153:         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); \
154:         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155:                                                            len*sizeof(PetscScalar));  \
156:         /* free up old matrix storage */ \
157:  \
158:         PetscFree(b->a);  \
159:         if (!b->singlemalloc) { \
160:           PetscFree(b->i); \
161:           PetscFree(b->j); \
162:         } \
163:         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164:         b->singlemalloc = PETSC_TRUE; \
165:  \
166:         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167:         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168:         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169:         b->maxnz += CHUNKSIZE; \
170:         b->reallocs++; \
171:       } \
172:       N = nrow++ - 1; b->nz++; \
173:       /* shift up all the later entries in this row */ \
174:       for (ii=N; ii>=_i; ii--) { \
175:         rp[ii+1] = rp[ii]; \
176:         ap[ii+1] = ap[ii]; \
177:       } \
178:       rp[_i] = col1;  \
179:       ap[_i] = value;  \
180:       b_noinsert: ; \
181:       bilen[row] = nrow; \
182: }

186: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187: {
188:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189:   PetscScalar    value;
191:   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192:   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193:   PetscTruth     roworiented = aij->roworiented;

195:   /* Some Variables required in the macro */
196:   Mat            A = aij->A;
197:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198:   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199:   PetscScalar    *aa = a->a;
200:   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201:   Mat            B = aij->B;
202:   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203:   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204:   PetscScalar    *ba = b->a;

206:   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207:   PetscInt       nonew = a->nonew,shift=0;
208:   PetscScalar    *ap;

211:   for (i=0; i<m; i++) {
212:     if (im[i] < 0) continue;
213: #if defined(PETSC_USE_BOPT_g)
214:     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215: #endif
216:     if (im[i] >= rstart && im[i] < rend) {
217:       row = im[i] - rstart;
218:       for (j=0; j<n; j++) {
219:         if (in[j] >= cstart && in[j] < cend){
220:           col = in[j] - cstart;
221:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222:           if (ignorezeroentries && value == 0.0) continue;
223:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224:           /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
225:         } else if (in[j] < 0) continue;
226: #if defined(PETSC_USE_BOPT_g)
227:         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228: #endif
229:         else {
230:           if (mat->was_assembled) {
231:             if (!aij->colmap) {
232:               CreateColmap_MPIAIJ_Private(mat);
233:             }
234: #if defined (PETSC_USE_CTABLE)
235:             PetscTableFind(aij->colmap,in[j]+1,&col);
236:             col--;
237: #else
238:             col = aij->colmap[in[j]] - 1;
239: #endif
240:             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241:               DisAssemble_MPIAIJ(mat);
242:               col =  in[j];
243:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244:               B = aij->B;
245:               b = (Mat_SeqAIJ*)B->data;
246:               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247:               ba = b->a;
248:             }
249:           } else col = in[j];
250:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251:           if (ignorezeroentries && value == 0.0) continue;
252:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253:           /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
254:         }
255:       }
256:     } else {
257:       if (!aij->donotstash) {
258:         if (roworiented) {
259:           if (ignorezeroentries && v[i*n] == 0.0) continue;
260:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
261:         } else {
262:           if (ignorezeroentries && v[i] == 0.0) continue;
263:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
264:         }
265:       }
266:     }
267:   }
268:   return(0);
269: }

273: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274: {
275:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
277:   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278:   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;

281:   for (i=0; i<m; i++) {
282:     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283:     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284:     if (idxm[i] >= rstart && idxm[i] < rend) {
285:       row = idxm[i] - rstart;
286:       for (j=0; j<n; j++) {
287:         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288:         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289:         if (idxn[j] >= cstart && idxn[j] < cend){
290:           col = idxn[j] - cstart;
291:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
292:         } else {
293:           if (!aij->colmap) {
294:             CreateColmap_MPIAIJ_Private(mat);
295:           }
296: #if defined (PETSC_USE_CTABLE)
297:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
298:           col --;
299: #else
300:           col = aij->colmap[idxn[j]] - 1;
301: #endif
302:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303:           else {
304:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
305:           }
306:         }
307:       }
308:     } else {
309:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310:     }
311:   }
312:   return(0);
313: }

317: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318: {
319:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
321:   PetscInt       nstash,reallocs;
322:   InsertMode     addv;

325:   if (aij->donotstash) {
326:     return(0);
327:   }

329:   /* make sure all processors are either in INSERTMODE or ADDMODE */
330:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
331:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333:   }
334:   mat->insertmode = addv; /* in case this processor had no cache */

336:   MatStashScatterBegin_Private(&mat->stash,aij->rowners);
337:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
338:   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339:   return(0);
340: }


345: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346: {
347:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348:   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350:   PetscMPIInt    n;
351:   PetscInt       i,j,rstart,ncols,flg;
352:   PetscInt       *row,*col,other_disassembled;
353:   PetscScalar    *val;
354:   InsertMode     addv = mat->insertmode;

357:   if (!aij->donotstash) {
358:     while (1) {
359:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
360:       if (!flg) break;

362:       for (i=0; i<n;) {
363:         /* Now identify the consecutive vals belonging to the same row */
364:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
365:         if (j < n) ncols = j-i;
366:         else       ncols = n-i;
367:         /* Now assemble all these values with a single function call */
368:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
369:         i = j;
370:       }
371:     }
372:     MatStashScatterEnd_Private(&mat->stash);
373:   }
374: 
375:   MatAssemblyBegin(aij->A,mode);
376:   MatAssemblyEnd(aij->A,mode);

378:   /* determine if any processor has disassembled, if so we must 
379:      also disassemble ourselfs, in order that we may reassemble. */
380:   /*
381:      if nonzero structure of submatrix B cannot change then we know that
382:      no processor disassembled thus we can skip this stuff
383:   */
384:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
385:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
386:     if (mat->was_assembled && !other_disassembled) {
387:       DisAssemble_MPIAIJ(mat);
388:       /* reaccess the b because aij->B was changed */
389:       b    = (Mat_SeqAIJ *)aij->B->data;
390:     }
391:   }

393:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394:     MatSetUpMultiply_MPIAIJ(mat);
395:   }
396:   MatAssemblyBegin(aij->B,mode);
397:   MatAssemblyEnd(aij->B,mode);

399:   if (aij->rowvalues) {
400:     PetscFree(aij->rowvalues);
401:     aij->rowvalues = 0;
402:   }

404:   /* used by MatAXPY() */
405:   a->xtoy = 0; b->xtoy = 0;
406:   a->XtoY = 0; b->XtoY = 0;

408:   return(0);
409: }

413: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
414: {
415:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;

419:   MatZeroEntries(l->A);
420:   MatZeroEntries(l->B);
421:   return(0);
422: }

426: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
427: {
428:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
430:   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
431:   PetscInt       i,N,*rows,*owners = l->rowners;
432:   PetscInt       *nprocs,j,idx,nsends,row;
433:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
434:   PetscInt       *rvalues,count,base,slen,*source;
435:   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
436:   MPI_Comm       comm = A->comm;
437:   MPI_Request    *send_waits,*recv_waits;
438:   MPI_Status     recv_status,*send_status;
439:   IS             istmp;
440:   PetscTruth     found;

443:   ISGetLocalSize(is,&N);
444:   ISGetIndices(is,&rows);

446:   /*  first count number of contributors to each processor */
447:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
448:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
449:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
450:   for (i=0; i<N; i++) {
451:     idx = rows[i];
452:     found = PETSC_FALSE;
453:     for (j=0; j<size; j++) {
454:       if (idx >= owners[j] && idx < owners[j+1]) {
455:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
456:       }
457:     }
458:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
459:   }
460:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

462:   /* inform other processors of number of messages and max length*/
463:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

465:   /* post receives:   */
466:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
467:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
468:   for (i=0; i<nrecvs; i++) {
469:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
470:   }

472:   /* do sends:
473:       1) starts[i] gives the starting index in svalues for stuff going to 
474:          the ith processor
475:   */
476:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
477:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
478:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
479:   starts[0] = 0;
480:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
481:   for (i=0; i<N; i++) {
482:     svalues[starts[owner[i]]++] = rows[i];
483:   }
484:   ISRestoreIndices(is,&rows);

486:   starts[0] = 0;
487:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
488:   count = 0;
489:   for (i=0; i<size; i++) {
490:     if (nprocs[2*i+1]) {
491:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
492:     }
493:   }
494:   PetscFree(starts);

496:   base = owners[rank];

498:   /*  wait on receives */
499:   PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
500:   source = lens + nrecvs;
501:   count  = nrecvs; slen = 0;
502:   while (count) {
503:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
504:     /* unpack receives into our local space */
505:     MPI_Get_count(&recv_status,MPIU_INT,&n);
506:     source[imdex]  = recv_status.MPI_SOURCE;
507:     lens[imdex]    = n;
508:     slen          += n;
509:     count--;
510:   }
511:   PetscFree(recv_waits);
512: 
513:   /* move the data into the send scatter */
514:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
515:   count = 0;
516:   for (i=0; i<nrecvs; i++) {
517:     values = rvalues + i*nmax;
518:     for (j=0; j<lens[i]; j++) {
519:       lrows[count++] = values[j] - base;
520:     }
521:   }
522:   PetscFree(rvalues);
523:   PetscFree(lens);
524:   PetscFree(owner);
525:   PetscFree(nprocs);
526: 
527:   /* actually zap the local rows */
528:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
529:   PetscLogObjectParent(A,istmp);

531:   /*
532:         Zero the required rows. If the "diagonal block" of the matrix
533:      is square and the user wishes to set the diagonal we use seperate
534:      code so that MatSetValues() is not called for each diagonal allocating
535:      new memory, thus calling lots of mallocs and slowing things down.

537:        Contributed by: Mathew Knepley
538:   */
539:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
540:   MatZeroRows(l->B,istmp,0);
541:   if (diag && (l->A->M == l->A->N)) {
542:     MatZeroRows(l->A,istmp,diag);
543:   } else if (diag) {
544:     MatZeroRows(l->A,istmp,0);
545:     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
546:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
547: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
548:     }
549:     for (i = 0; i < slen; i++) {
550:       row  = lrows[i] + rstart;
551:       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
552:     }
553:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
554:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
555:   } else {
556:     MatZeroRows(l->A,istmp,0);
557:   }
558:   ISDestroy(istmp);
559:   PetscFree(lrows);

561:   /* wait on sends */
562:   if (nsends) {
563:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
564:     MPI_Waitall(nsends,send_waits,send_status);
565:     PetscFree(send_status);
566:   }
567:   PetscFree(send_waits);
568:   PetscFree(svalues);

570:   return(0);
571: }

575: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
576: {
577:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
579:   PetscInt       nt;

582:   VecGetLocalSize(xx,&nt);
583:   if (nt != A->n) {
584:     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
585:   }
586:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
587:   (*a->A->ops->mult)(a->A,xx,yy);
588:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
589:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
590:   return(0);
591: }

595: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
596: {
597:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

601:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
602:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
603:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
604:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
605:   return(0);
606: }

610: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
611: {
612:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

616:   /* do nondiagonal part */
617:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
618:   /* send it on its way */
619:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
620:   /* do local part */
621:   (*a->A->ops->multtranspose)(a->A,xx,yy);
622:   /* receive remote parts: note this assumes the values are not actually */
623:   /* inserted in yy until the next line, which is true for my implementation*/
624:   /* but is not perhaps always true. */
625:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
626:   return(0);
627: }

632: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
633: {
634:   MPI_Comm       comm;
635:   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
636:   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
637:   IS             Me,Notme;
639:   PetscInt       M,N,first,last,*notme,i;
640:   PetscMPIInt    size;


644:   /* Easy test: symmetric diagonal block */
645:   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
646:   MatIsTranspose(Adia,Bdia,tol,f);
647:   if (!*f) return(0);
648:   PetscObjectGetComm((PetscObject)Amat,&comm);
649:   MPI_Comm_size(comm,&size);
650:   if (size == 1) return(0);

652:   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
653:   MatGetSize(Amat,&M,&N);
654:   MatGetOwnershipRange(Amat,&first,&last);
655:   PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);
656:   for (i=0; i<first; i++) notme[i] = i;
657:   for (i=last; i<M; i++) notme[i-last+first] = i;
658:   ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
659:   ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
660:   MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
661:   Aoff = Aoffs[0];
662:   MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
663:   Boff = Boffs[0];
664:   MatIsTranspose(Aoff,Boff,tol,f);
665:   MatDestroyMatrices(1,&Aoffs);
666:   MatDestroyMatrices(1,&Boffs);
667:   ISDestroy(Me);
668:   ISDestroy(Notme);

670:   return(0);
671: }

676: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677: {
678:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

682:   /* do nondiagonal part */
683:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
684:   /* send it on its way */
685:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
686:   /* do local part */
687:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
688:   /* receive remote parts: note this assumes the values are not actually */
689:   /* inserted in yy until the next line, which is true for my implementation*/
690:   /* but is not perhaps always true. */
691:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
692:   return(0);
693: }

695: /*
696:   This only works correctly for square matrices where the subblock A->A is the 
697:    diagonal block
698: */
701: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702: {
704:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

707:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
708:   if (a->rstart != a->cstart || a->rend != a->cend) {
709:     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
710:   }
711:   MatGetDiagonal(a->A,v);
712:   return(0);
713: }

717: PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
718: {
719:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

723:   MatScale(aa,a->A);
724:   MatScale(aa,a->B);
725:   return(0);
726: }

730: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
731: {
732:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;

736: #if defined(PETSC_USE_LOG)
737:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
738: #endif
739:   MatStashDestroy_Private(&mat->stash);
740:   PetscFree(aij->rowners);
741:   MatDestroy(aij->A);
742:   MatDestroy(aij->B);
743: #if defined (PETSC_USE_CTABLE)
744:   if (aij->colmap) {PetscTableDelete(aij->colmap);}
745: #else
746:   if (aij->colmap) {PetscFree(aij->colmap);}
747: #endif
748:   if (aij->garray) {PetscFree(aij->garray);}
749:   if (aij->lvec)   {VecDestroy(aij->lvec);}
750:   if (aij->Mvctx)  {VecScatterDestroy(aij->Mvctx);}
751:   if (aij->rowvalues) {PetscFree(aij->rowvalues);}
752:   PetscFree(aij);

754:   PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
755:   PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
756:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
757:   PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
758:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
759:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
760:   PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
761:   return(0);
762: }

766: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
767: {
768:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
769:   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
770:   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
771:   PetscErrorCode    ierr;
772:   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
773:   int               fd;
774:   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
775:   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
776:   PetscScalar       *column_values;

779:   MPI_Comm_rank(mat->comm,&rank);
780:   MPI_Comm_size(mat->comm,&size);
781:   nz   = A->nz + B->nz;
782:   if (!rank) {
783:     header[0] = MAT_FILE_COOKIE;
784:     header[1] = mat->M;
785:     header[2] = mat->N;
786:     MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);
787:     PetscViewerBinaryGetDescriptor(viewer,&fd);
788:     PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
789:     /* get largest number of rows any processor has */
790:     rlen = mat->m;
791:     PetscMapGetGlobalRange(mat->rmap,&range);
792:     for (i=1; i<size; i++) {
793:       rlen = PetscMax(rlen,range[i+1] - range[i]);
794:     }
795:   } else {
796:     MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);
797:     rlen = mat->m;
798:   }

800:   /* load up the local row counts */
801:   PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
802:   for (i=0; i<mat->m; i++) {
803:     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
804:   }

806:   /* store the row lengths to the file */
807:   if (!rank) {
808:     MPI_Status status;
809:     PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);
810:     for (i=1; i<size; i++) {
811:       rlen = range[i+1] - range[i];
812:       MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);
813:       PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
814:     }
815:   } else {
816:     MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);
817:   }
818:   PetscFree(row_lengths);

820:   /* load up the local column indices */
821:   nzmax = nz; /* )th processor needs space a largest processor needs */
822:   MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);
823:   PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
824:   cnt  = 0;
825:   for (i=0; i<mat->m; i++) {
826:     for (j=B->i[i]; j<B->i[i+1]; j++) {
827:       if ( (col = garray[B->j[j]]) > cstart) break;
828:       column_indices[cnt++] = col;
829:     }
830:     for (k=A->i[i]; k<A->i[i+1]; k++) {
831:       column_indices[cnt++] = A->j[k] + cstart;
832:     }
833:     for (; j<B->i[i+1]; j++) {
834:       column_indices[cnt++] = garray[B->j[j]];
835:     }
836:   }
837:   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);

839:   /* store the column indices to the file */
840:   if (!rank) {
841:     MPI_Status status;
842:     PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
843:     for (i=1; i<size; i++) {
844:       MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
845:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
846:       MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);
847:       PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
848:     }
849:   } else {
850:     MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
851:     MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);
852:   }
853:   PetscFree(column_indices);

855:   /* load up the local column values */
856:   PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
857:   cnt  = 0;
858:   for (i=0; i<mat->m; i++) {
859:     for (j=B->i[i]; j<B->i[i+1]; j++) {
860:       if ( garray[B->j[j]] > cstart) break;
861:       column_values[cnt++] = B->a[j];
862:     }
863:     for (k=A->i[i]; k<A->i[i+1]; k++) {
864:       column_values[cnt++] = A->a[k];
865:     }
866:     for (; j<B->i[i+1]; j++) {
867:       column_values[cnt++] = B->a[j];
868:     }
869:   }
870:   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);

872:   /* store the column values to the file */
873:   if (!rank) {
874:     MPI_Status status;
875:     PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
876:     for (i=1; i<size; i++) {
877:       MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
878:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
879:       MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
880:       PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
881:     }
882:   } else {
883:     MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
884:     MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
885:   }
886:   PetscFree(column_values);
887:   return(0);
888: }

892: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
893: {
894:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
895:   PetscErrorCode    ierr;
896:   PetscMPIInt       rank = aij->rank,size = aij->size;
897:   PetscTruth        isdraw,iascii,flg,isbinary;
898:   PetscViewer       sviewer;
899:   PetscViewerFormat format;

902:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
903:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
904:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
905:   if (iascii) {
906:     PetscViewerGetFormat(viewer,&format);
907:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
908:       MatInfo info;
909:       MPI_Comm_rank(mat->comm,&rank);
910:       MatGetInfo(mat,MAT_LOCAL,&info);
911:       PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
912:       if (flg) {
913:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
914:                                               rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
915:       } else {
916:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
917:                     rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
918:       }
919:       MatGetInfo(aij->A,MAT_LOCAL,&info);
920:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
921:       MatGetInfo(aij->B,MAT_LOCAL,&info);
922:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
923:       PetscViewerFlush(viewer);
924:       VecScatterView(aij->Mvctx,viewer);
925:       return(0);
926:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
927:       return(0);
928:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
929:       return(0);
930:     }
931:   } else if (isbinary) {
932:     if (size == 1) {
933:       PetscObjectSetName((PetscObject)aij->A,mat->name);
934:       MatView(aij->A,viewer);
935:     } else {
936:       MatView_MPIAIJ_Binary(mat,viewer);
937:     }
938:     return(0);
939:   } else if (isdraw) {
940:     PetscDraw  draw;
941:     PetscTruth isnull;
942:     PetscViewerDrawGetDraw(viewer,0,&draw);
943:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
944:   }

946:   if (size == 1) {
947:     PetscObjectSetName((PetscObject)aij->A,mat->name);
948:     MatView(aij->A,viewer);
949:   } else {
950:     /* assemble the entire matrix onto first processor. */
951:     Mat         A;
952:     Mat_SeqAIJ  *Aloc;
953:     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
954:     PetscScalar *a;

956:     if (!rank) {
957:       MatCreate(mat->comm,M,N,M,N,&A);
958:     } else {
959:       MatCreate(mat->comm,0,0,M,N,&A);
960:     }
961:     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
962:     MatSetType(A,MATMPIAIJ);
963:     MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
964:     PetscLogObjectParent(mat,A);

966:     /* copy over the A part */
967:     Aloc = (Mat_SeqAIJ*)aij->A->data;
968:     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
969:     row = aij->rstart;
970:     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
971:     for (i=0; i<m; i++) {
972:       MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
973:       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
974:     }
975:     aj = Aloc->j;
976:     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}

978:     /* copy over the B part */
979:     Aloc = (Mat_SeqAIJ*)aij->B->data;
980:     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
981:     row  = aij->rstart;
982:     PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
983:     ct   = cols;
984:     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
985:     for (i=0; i<m; i++) {
986:       MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
987:       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
988:     }
989:     PetscFree(ct);
990:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
991:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
992:     /* 
993:        Everyone has to call to draw the matrix since the graphics waits are
994:        synchronized across all processors that share the PetscDraw object
995:     */
996:     PetscViewerGetSingleton(viewer,&sviewer);
997:     if (!rank) {
998:       PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
999:       MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1000:     }
1001:     PetscViewerRestoreSingleton(viewer,&sviewer);
1002:     MatDestroy(A);
1003:   }
1004:   return(0);
1005: }

1009: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1010: {
1012:   PetscTruth     iascii,isdraw,issocket,isbinary;
1013: 
1015:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1016:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1017:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1018:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1019:   if (iascii || isdraw || isbinary || issocket) {
1020:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1021:   } else {
1022:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1023:   }
1024:   return(0);
1025: }



1031: PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1032: {
1033:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1035:   Vec            bb1;
1036:   PetscScalar    mone=-1.0;

1039:   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);

1041:   VecDuplicate(bb,&bb1);

1043:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1044:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1045:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1046:       its--;
1047:     }
1048: 
1049:     while (its--) {
1050:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1051:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1053:       /* update rhs: bb1 = bb - B*x */
1054:       VecScale(&mone,mat->lvec);
1055:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1057:       /* local sweep */
1058:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1059: 
1060:     }
1061:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1062:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1064:       its--;
1065:     }
1066:     while (its--) {
1067:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1068:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1070:       /* update rhs: bb1 = bb - B*x */
1071:       VecScale(&mone,mat->lvec);
1072:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1074:       /* local sweep */
1075:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1076: 
1077:     }
1078:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1079:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1080:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1081:       its--;
1082:     }
1083:     while (its--) {
1084:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1085:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1087:       /* update rhs: bb1 = bb - B*x */
1088:       VecScale(&mone,mat->lvec);
1089:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1091:       /* local sweep */
1092:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1093: 
1094:     }
1095:   } else {
1096:     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1097:   }

1099:   VecDestroy(bb1);
1100:   return(0);
1101: }

1105: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1106: {
1107:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1108:   Mat            A = mat->A,B = mat->B;
1110:   PetscReal      isend[5],irecv[5];

1113:   info->block_size     = 1.0;
1114:   MatGetInfo(A,MAT_LOCAL,info);
1115:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1116:   isend[3] = info->memory;  isend[4] = info->mallocs;
1117:   MatGetInfo(B,MAT_LOCAL,info);
1118:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1119:   isend[3] += info->memory;  isend[4] += info->mallocs;
1120:   if (flag == MAT_LOCAL) {
1121:     info->nz_used      = isend[0];
1122:     info->nz_allocated = isend[1];
1123:     info->nz_unneeded  = isend[2];
1124:     info->memory       = isend[3];
1125:     info->mallocs      = isend[4];
1126:   } else if (flag == MAT_GLOBAL_MAX) {
1127:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1128:     info->nz_used      = irecv[0];
1129:     info->nz_allocated = irecv[1];
1130:     info->nz_unneeded  = irecv[2];
1131:     info->memory       = irecv[3];
1132:     info->mallocs      = irecv[4];
1133:   } else if (flag == MAT_GLOBAL_SUM) {
1134:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1135:     info->nz_used      = irecv[0];
1136:     info->nz_allocated = irecv[1];
1137:     info->nz_unneeded  = irecv[2];
1138:     info->memory       = irecv[3];
1139:     info->mallocs      = irecv[4];
1140:   }
1141:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1142:   info->fill_ratio_needed = 0;
1143:   info->factor_mallocs    = 0;
1144:   info->rows_global       = (double)matin->M;
1145:   info->columns_global    = (double)matin->N;
1146:   info->rows_local        = (double)matin->m;
1147:   info->columns_local     = (double)matin->N;

1149:   return(0);
1150: }

1154: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1155: {
1156:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

1160:   switch (op) {
1161:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1162:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1163:   case MAT_COLUMNS_UNSORTED:
1164:   case MAT_COLUMNS_SORTED:
1165:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1166:   case MAT_KEEP_ZEROED_ROWS:
1167:   case MAT_NEW_NONZERO_LOCATION_ERR:
1168:   case MAT_USE_INODES:
1169:   case MAT_DO_NOT_USE_INODES:
1170:   case MAT_IGNORE_ZERO_ENTRIES:
1171:     MatSetOption(a->A,op);
1172:     MatSetOption(a->B,op);
1173:     break;
1174:   case MAT_ROW_ORIENTED:
1175:     a->roworiented = PETSC_TRUE;
1176:     MatSetOption(a->A,op);
1177:     MatSetOption(a->B,op);
1178:     break;
1179:   case MAT_ROWS_SORTED:
1180:   case MAT_ROWS_UNSORTED:
1181:   case MAT_YES_NEW_DIAGONALS:
1182:     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1183:     break;
1184:   case MAT_COLUMN_ORIENTED:
1185:     a->roworiented = PETSC_FALSE;
1186:     MatSetOption(a->A,op);
1187:     MatSetOption(a->B,op);
1188:     break;
1189:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1190:     a->donotstash = PETSC_TRUE;
1191:     break;
1192:   case MAT_NO_NEW_DIAGONALS:
1193:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1194:   case MAT_SYMMETRIC:
1195:   case MAT_STRUCTURALLY_SYMMETRIC:
1196:   case MAT_HERMITIAN:
1197:   case MAT_SYMMETRY_ETERNAL:
1198:     MatSetOption(a->A,op);
1199:     break;
1200:   case MAT_NOT_SYMMETRIC:
1201:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1202:   case MAT_NOT_HERMITIAN:
1203:   case MAT_NOT_SYMMETRY_ETERNAL:
1204:     break;
1205:   default:
1206:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1207:   }
1208:   return(0);
1209: }

1213: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1214: {
1215:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1216:   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1218:   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1219:   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1220:   PetscInt       *cmap,*idx_p;

1223:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1224:   mat->getrowactive = PETSC_TRUE;

1226:   if (!mat->rowvalues && (idx || v)) {
1227:     /*
1228:         allocate enough space to hold information from the longest row.
1229:     */
1230:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1231:     PetscInt     max = 1,tmp;
1232:     for (i=0; i<matin->m; i++) {
1233:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1234:       if (max < tmp) { max = tmp; }
1235:     }
1236:     PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1237:     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1238:   }

1240:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1241:   lrow = row - rstart;

1243:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1244:   if (!v)   {pvA = 0; pvB = 0;}
1245:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1246:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1247:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1248:   nztot = nzA + nzB;

1250:   cmap  = mat->garray;
1251:   if (v  || idx) {
1252:     if (nztot) {
1253:       /* Sort by increasing column numbers, assuming A and B already sorted */
1254:       PetscInt imark = -1;
1255:       if (v) {
1256:         *v = v_p = mat->rowvalues;
1257:         for (i=0; i<nzB; i++) {
1258:           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1259:           else break;
1260:         }
1261:         imark = i;
1262:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1263:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1264:       }
1265:       if (idx) {
1266:         *idx = idx_p = mat->rowindices;
1267:         if (imark > -1) {
1268:           for (i=0; i<imark; i++) {
1269:             idx_p[i] = cmap[cworkB[i]];
1270:           }
1271:         } else {
1272:           for (i=0; i<nzB; i++) {
1273:             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1274:             else break;
1275:           }
1276:           imark = i;
1277:         }
1278:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1279:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1280:       }
1281:     } else {
1282:       if (idx) *idx = 0;
1283:       if (v)   *v   = 0;
1284:     }
1285:   }
1286:   *nz = nztot;
1287:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1288:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1289:   return(0);
1290: }

1294: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1295: {
1296:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1299:   if (aij->getrowactive == PETSC_FALSE) {
1300:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1301:   }
1302:   aij->getrowactive = PETSC_FALSE;
1303:   return(0);
1304: }

1308: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1309: {
1310:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1311:   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1313:   PetscInt       i,j,cstart = aij->cstart;
1314:   PetscReal      sum = 0.0;
1315:   PetscScalar    *v;

1318:   if (aij->size == 1) {
1319:      MatNorm(aij->A,type,norm);
1320:   } else {
1321:     if (type == NORM_FROBENIUS) {
1322:       v = amat->a;
1323:       for (i=0; i<amat->nz; i++) {
1324: #if defined(PETSC_USE_COMPLEX)
1325:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1326: #else
1327:         sum += (*v)*(*v); v++;
1328: #endif
1329:       }
1330:       v = bmat->a;
1331:       for (i=0; i<bmat->nz; i++) {
1332: #if defined(PETSC_USE_COMPLEX)
1333:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1334: #else
1335:         sum += (*v)*(*v); v++;
1336: #endif
1337:       }
1338:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1339:       *norm = sqrt(*norm);
1340:     } else if (type == NORM_1) { /* max column norm */
1341:       PetscReal *tmp,*tmp2;
1342:       PetscInt    *jj,*garray = aij->garray;
1343:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1344:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1345:       PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1346:       *norm = 0.0;
1347:       v = amat->a; jj = amat->j;
1348:       for (j=0; j<amat->nz; j++) {
1349:         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1350:       }
1351:       v = bmat->a; jj = bmat->j;
1352:       for (j=0; j<bmat->nz; j++) {
1353:         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1354:       }
1355:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1356:       for (j=0; j<mat->N; j++) {
1357:         if (tmp2[j] > *norm) *norm = tmp2[j];
1358:       }
1359:       PetscFree(tmp);
1360:       PetscFree(tmp2);
1361:     } else if (type == NORM_INFINITY) { /* max row norm */
1362:       PetscReal ntemp = 0.0;
1363:       for (j=0; j<aij->A->m; j++) {
1364:         v = amat->a + amat->i[j];
1365:         sum = 0.0;
1366:         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1367:           sum += PetscAbsScalar(*v); v++;
1368:         }
1369:         v = bmat->a + bmat->i[j];
1370:         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1371:           sum += PetscAbsScalar(*v); v++;
1372:         }
1373:         if (sum > ntemp) ntemp = sum;
1374:       }
1375:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1376:     } else {
1377:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1378:     }
1379:   }
1380:   return(0);
1381: }

1385: PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1386: {
1387:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1388:   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1390:   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1391:   Mat            B;
1392:   PetscScalar    *array;

1395:   if (!matout && M != N) {
1396:     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1397:   }

1399:   MatCreate(A->comm,A->n,A->m,N,M,&B);
1400:   MatSetType(B,A->type_name);
1401:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

1403:   /* copy over the A part */
1404:   Aloc = (Mat_SeqAIJ*)a->A->data;
1405:   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1406:   row = a->rstart;
1407:   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1408:   for (i=0; i<m; i++) {
1409:     MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1410:     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1411:   }
1412:   aj = Aloc->j;
1413:   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}

1415:   /* copy over the B part */
1416:   Aloc = (Mat_SeqAIJ*)a->B->data;
1417:   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1418:   row  = a->rstart;
1419:   PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);
1420:   ct   = cols;
1421:   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1422:   for (i=0; i<m; i++) {
1423:     MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1424:     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1425:   }
1426:   PetscFree(ct);
1427:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1428:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1429:   if (matout) {
1430:     *matout = B;
1431:   } else {
1432:     MatHeaderCopy(A,B);
1433:   }
1434:   return(0);
1435: }

1439: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1440: {
1441:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1442:   Mat            a = aij->A,b = aij->B;
1444:   PetscInt       s1,s2,s3;

1447:   MatGetLocalSize(mat,&s2,&s3);
1448:   if (rr) {
1449:     VecGetLocalSize(rr,&s1);
1450:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1451:     /* Overlap communication with computation. */
1452:     VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1453:   }
1454:   if (ll) {
1455:     VecGetLocalSize(ll,&s1);
1456:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1457:     (*b->ops->diagonalscale)(b,ll,0);
1458:   }
1459:   /* scale  the diagonal block */
1460:   (*a->ops->diagonalscale)(a,ll,rr);

1462:   if (rr) {
1463:     /* Do a scatter end and then right scale the off-diagonal block */
1464:     VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1465:     (*b->ops->diagonalscale)(b,0,aij->lvec);
1466:   }
1467: 
1468:   return(0);
1469: }


1474: PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1475: {
1476:   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;

1480:   if (!a->rank) {
1481:     MatPrintHelp_SeqAIJ(a->A);
1482:   }
1483:   return(0);
1484: }

1488: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1489: {
1490:   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;

1494:   MatSetBlockSize(a->A,bs);
1495:   MatSetBlockSize(a->B,bs);
1496:   return(0);
1497: }
1500: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1501: {
1502:   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;

1506:   MatSetUnfactored(a->A);
1507:   return(0);
1508: }

1512: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1513: {
1514:   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1515:   Mat            a,b,c,d;
1516:   PetscTruth     flg;

1520:   a = matA->A; b = matA->B;
1521:   c = matB->A; d = matB->B;

1523:   MatEqual(a,c,&flg);
1524:   if (flg == PETSC_TRUE) {
1525:     MatEqual(b,d,&flg);
1526:   }
1527:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1528:   return(0);
1529: }

1533: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1534: {
1536:   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1537:   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;

1540:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1541:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1542:     /* because of the column compression in the off-processor part of the matrix a->B,
1543:        the number of columns in a->B and b->B may be different, hence we cannot call
1544:        the MatCopy() directly on the two parts. If need be, we can provide a more 
1545:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1546:        then copying the submatrices */
1547:     MatCopy_Basic(A,B,str);
1548:   } else {
1549:     MatCopy(a->A,b->A,str);
1550:     MatCopy(a->B,b->B,str);
1551:   }
1552:   return(0);
1553: }

1557: PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1558: {

1562:    MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1563:   return(0);
1564: }

1566:  #include petscblaslapack.h
1569: PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1570: {
1572:   PetscInt       i;
1573:   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1574:   PetscBLASInt   bnz,one=1;
1575:   Mat_SeqAIJ     *x,*y;

1578:   if (str == SAME_NONZERO_PATTERN) {
1579:     x = (Mat_SeqAIJ *)xx->A->data;
1580:     y = (Mat_SeqAIJ *)yy->A->data;
1581:     bnz = (PetscBLASInt)x->nz;
1582:     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1583:     x = (Mat_SeqAIJ *)xx->B->data;
1584:     y = (Mat_SeqAIJ *)yy->B->data;
1585:     bnz = (PetscBLASInt)x->nz;
1586:     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1587:   } else if (str == SUBSET_NONZERO_PATTERN) {
1588:     MatAXPY_SeqAIJ(a,xx->A,yy->A,str);

1590:     x = (Mat_SeqAIJ *)xx->B->data;
1591:     y = (Mat_SeqAIJ *)yy->B->data;
1592:     if (y->xtoy && y->XtoY != xx->B) {
1593:       PetscFree(y->xtoy);
1594:       MatDestroy(y->XtoY);
1595:     }
1596:     if (!y->xtoy) { /* get xtoy */
1597:       MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1598:       y->XtoY = xx->B;
1599:     }
1600:     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1601:   } else {
1602:     MatAXPY_Basic(a,X,Y,str);
1603:   }
1604:   return(0);
1605: }

1607: /* -------------------------------------------------------------------*/
1608: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1609:        MatGetRow_MPIAIJ,
1610:        MatRestoreRow_MPIAIJ,
1611:        MatMult_MPIAIJ,
1612: /* 4*/ MatMultAdd_MPIAIJ,
1613:        MatMultTranspose_MPIAIJ,
1614:        MatMultTransposeAdd_MPIAIJ,
1615:        0,
1616:        0,
1617:        0,
1618: /*10*/ 0,
1619:        0,
1620:        0,
1621:        MatRelax_MPIAIJ,
1622:        MatTranspose_MPIAIJ,
1623: /*15*/ MatGetInfo_MPIAIJ,
1624:        MatEqual_MPIAIJ,
1625:        MatGetDiagonal_MPIAIJ,
1626:        MatDiagonalScale_MPIAIJ,
1627:        MatNorm_MPIAIJ,
1628: /*20*/ MatAssemblyBegin_MPIAIJ,
1629:        MatAssemblyEnd_MPIAIJ,
1630:        0,
1631:        MatSetOption_MPIAIJ,
1632:        MatZeroEntries_MPIAIJ,
1633: /*25*/ MatZeroRows_MPIAIJ,
1634:        0,
1635:        0,
1636:        0,
1637:        0,
1638: /*30*/ MatSetUpPreallocation_MPIAIJ,
1639:        0,
1640:        0,
1641:        0,
1642:        0,
1643: /*35*/ MatDuplicate_MPIAIJ,
1644:        0,
1645:        0,
1646:        0,
1647:        0,
1648: /*40*/ MatAXPY_MPIAIJ,
1649:        MatGetSubMatrices_MPIAIJ,
1650:        MatIncreaseOverlap_MPIAIJ,
1651:        MatGetValues_MPIAIJ,
1652:        MatCopy_MPIAIJ,
1653: /*45*/ MatPrintHelp_MPIAIJ,
1654:        MatScale_MPIAIJ,
1655:        0,
1656:        0,
1657:        0,
1658: /*50*/ MatSetBlockSize_MPIAIJ,
1659:        0,
1660:        0,
1661:        0,
1662:        0,
1663: /*55*/ MatFDColoringCreate_MPIAIJ,
1664:        0,
1665:        MatSetUnfactored_MPIAIJ,
1666:        0,
1667:        0,
1668: /*60*/ MatGetSubMatrix_MPIAIJ,
1669:        MatDestroy_MPIAIJ,
1670:        MatView_MPIAIJ,
1671:        MatGetPetscMaps_Petsc,
1672:        0,
1673: /*65*/ 0,
1674:        0,
1675:        0,
1676:        0,
1677:        0,
1678: /*70*/ 0,
1679:        0,
1680:        MatSetColoring_MPIAIJ,
1681:        MatSetValuesAdic_MPIAIJ,
1682:        MatSetValuesAdifor_MPIAIJ,
1683: /*75*/ 0,
1684:        0,
1685:        0,
1686:        0,
1687:        0,
1688: /*80*/ 0,
1689:        0,
1690:        0,
1691:        0,
1692: /*84*/ MatLoad_MPIAIJ,
1693:        0,
1694:        0,
1695:        0,
1696:        0,
1697:        0,
1698: /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1699:        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1700:        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1701:        MatPtAP_MPIAIJ_MPIAIJ,
1702:        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1703: /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1704:        0,
1705:        0,
1706:        0};

1708: /* ----------------------------------------------------------------------------------------*/

1713: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1714: {
1715:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;

1719:   MatStoreValues(aij->A);
1720:   MatStoreValues(aij->B);
1721:   return(0);
1722: }

1728: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1729: {
1730:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;

1734:   MatRetrieveValues(aij->A);
1735:   MatRetrieveValues(aij->B);
1736:   return(0);
1737: }

1740:  #include petscpc.h
1744: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1745: {
1746:   Mat_MPIAIJ     *b;
1748:   PetscInt       i;

1751:   B->preallocated = PETSC_TRUE;
1752:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1753:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1754:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1755:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1756:   if (d_nnz) {
1757:     for (i=0; i<B->m; i++) {
1758:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1759:     }
1760:   }
1761:   if (o_nnz) {
1762:     for (i=0; i<B->m; i++) {
1763:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1764:     }
1765:   }
1766:   b = (Mat_MPIAIJ*)B->data;
1767:   MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
1768:   MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);

1770:   return(0);
1771: }

1774: /*MC
1775:    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.

1777:    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1778:    and MATMPIAIJ otherwise.  As a result, for single process communicators, 
1779:   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 
1780:   for communicators controlling multiple processes.  It is recommended that you call both of
1781:   the above preallocation routines for simplicity.

1783:    Options Database Keys:
1784: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()

1786:   Level: beginner

1788: .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1789: M*/

1794: PetscErrorCode MatCreate_AIJ(Mat A)
1795: {
1797:   PetscMPIInt    size;

1800:   PetscObjectChangeTypeName((PetscObject)A,MATAIJ);
1801:   MPI_Comm_size(A->comm,&size);
1802:   if (size == 1) {
1803:     MatSetType(A,MATSEQAIJ);
1804:   } else {
1805:     MatSetType(A,MATMPIAIJ);
1806:   }
1807:   return(0);
1808: }

1813: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1814: {
1815:   Mat            mat;
1816:   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;

1820:   *newmat       = 0;
1821:   MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1822:   MatSetType(mat,matin->type_name);
1823:   PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
1824:   a    = (Mat_MPIAIJ*)mat->data;
1825: 
1826:   mat->factor       = matin->factor;
1827:   mat->bs           = matin->bs;
1828:   mat->assembled    = PETSC_TRUE;
1829:   mat->insertmode   = NOT_SET_VALUES;
1830:   mat->preallocated = PETSC_TRUE;

1832:   a->rstart       = oldmat->rstart;
1833:   a->rend         = oldmat->rend;
1834:   a->cstart       = oldmat->cstart;
1835:   a->cend         = oldmat->cend;
1836:   a->size         = oldmat->size;
1837:   a->rank         = oldmat->rank;
1838:   a->donotstash   = oldmat->donotstash;
1839:   a->roworiented  = oldmat->roworiented;
1840:   a->rowindices   = 0;
1841:   a->rowvalues    = 0;
1842:   a->getrowactive = PETSC_FALSE;

1844:   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));
1845:   MatStashCreate_Private(matin->comm,1,&mat->stash);
1846:   if (oldmat->colmap) {
1847: #if defined (PETSC_USE_CTABLE)
1848:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1849: #else
1850:     PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);
1851:     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1852:     PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));
1853: #endif
1854:   } else a->colmap = 0;
1855:   if (oldmat->garray) {
1856:     PetscInt len;
1857:     len  = oldmat->B->n;
1858:     PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
1859:     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1860:     if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
1861:   } else a->garray = 0;
1862: 
1863:    VecDuplicate(oldmat->lvec,&a->lvec);
1864:   PetscLogObjectParent(mat,a->lvec);
1865:    VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1866:   PetscLogObjectParent(mat,a->Mvctx);
1867:    MatDestroy(a->A);
1868:    MatDuplicate(oldmat->A,cpvalues,&a->A);
1869:   PetscLogObjectParent(mat,a->A);
1870:    MatDestroy(a->B);
1871:    MatDuplicate(oldmat->B,cpvalues,&a->B);
1872:   PetscLogObjectParent(mat,a->B);
1873:   PetscFListDuplicate(matin->qlist,&mat->qlist);
1874:   *newmat = mat;
1875:   return(0);
1876: }

1878:  #include petscsys.h

1882: PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1883: {
1884:   Mat            A;
1885:   PetscScalar    *vals,*svals;
1886:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1887:   MPI_Status     status;
1889:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1890:   PetscInt       i,nz,j,rstart,rend;
1891:   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1892:   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1893:   PetscInt       cend,cstart,n,*rowners;
1894:   int            fd;

1897:   MPI_Comm_size(comm,&size);
1898:   MPI_Comm_rank(comm,&rank);
1899:   if (!rank) {
1900:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1901:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1902:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1903:   }

1905:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1906:   M = header[1]; N = header[2];
1907:   /* determine ownership of all rows */
1908:   m    = M/size + ((M % size) > rank);
1909:   PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
1910:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1911:   rowners[0] = 0;
1912:   for (i=2; i<=size; i++) {
1913:     rowners[i] += rowners[i-1];
1914:   }
1915:   rstart = rowners[rank];
1916:   rend   = rowners[rank+1];

1918:   /* distribute row lengths to all processors */
1919:   PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);
1920:   if (!rank) {
1921:     PetscBinaryRead(fd,ourlens,m,PETSC_INT);
1922:     PetscMalloc(m*sizeof(PetscInt),&rowlengths);
1923:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
1924:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1925:     for (j=0; j<m; j++) {
1926:       procsnz[0] += ourlens[j];
1927:     }
1928:     for (i=1; i<size; i++) {
1929:       PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
1930:       /* calculate the number of nonzeros on each processor */
1931:       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1932:         procsnz[i] += rowlengths[j];
1933:       }
1934:       MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
1935:     }
1936:     PetscFree(rowlengths);
1937:   } else {
1938:     MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);
1939:   }

1941:   if (!rank) {
1942:     /* determine max buffer needed and allocate it */
1943:     maxnz = 0;
1944:     for (i=0; i<size; i++) {
1945:       maxnz = PetscMax(maxnz,procsnz[i]);
1946:     }
1947:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

1949:     /* read in my part of the matrix column indices  */
1950:     nz   = procsnz[0];
1951:     PetscMalloc(nz*sizeof(PetscInt),&mycols);
1952:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1954:     /* read in every one elses and ship off */
1955:     for (i=1; i<size; i++) {
1956:       nz   = procsnz[i];
1957:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1958:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1959:     }
1960:     PetscFree(cols);
1961:   } else {
1962:     /* determine buffer space needed for message */
1963:     nz = 0;
1964:     for (i=0; i<m; i++) {
1965:       nz += ourlens[i];
1966:     }
1967:     PetscMalloc(nz*sizeof(PetscInt),&mycols);

1969:     /* receive message of column indices*/
1970:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1971:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1972:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1973:   }

1975:   /* determine column ownership if matrix is not square */
1976:   if (N != M) {
1977:     n      = N/size + ((N % size) > rank);
1978:     MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
1979:     cstart = cend - n;
1980:   } else {
1981:     cstart = rstart;
1982:     cend   = rend;
1983:     n      = cend - cstart;
1984:   }

1986:   /* loop over local rows, determining number of off diagonal entries */
1987:   PetscMemzero(offlens,m*sizeof(PetscInt));
1988:   jj = 0;
1989:   for (i=0; i<m; i++) {
1990:     for (j=0; j<ourlens[i]; j++) {
1991:       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1992:       jj++;
1993:     }
1994:   }

1996:   /* create our matrix */
1997:   for (i=0; i<m; i++) {
1998:     ourlens[i] -= offlens[i];
1999:   }
2000:   MatCreate(comm,m,n,M,N,&A);
2001:   MatSetType(A,type);
2002:   MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);

2004:   MatSetOption(A,MAT_COLUMNS_SORTED);
2005:   for (i=0; i<m; i++) {
2006:     ourlens[i] += offlens[i];
2007:   }

2009:   if (!rank) {
2010:     PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);

2012:     /* read in my part of the matrix numerical values  */
2013:     nz   = procsnz[0];
2014:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2015: 
2016:     /* insert into matrix */
2017:     jj      = rstart;
2018:     smycols = mycols;
2019:     svals   = vals;
2020:     for (i=0; i<m; i++) {
2021:       MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2022:       smycols += ourlens[i];
2023:       svals   += ourlens[i];
2024:       jj++;
2025:     }

2027:     /* read in other processors and ship out */
2028:     for (i=1; i<size; i++) {
2029:       nz   = procsnz[i];
2030:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2031:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2032:     }
2033:     PetscFree(procsnz);
2034:   } else {
2035:     /* receive numeric values */
2036:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

2038:     /* receive message of values*/
2039:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2040:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2041:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

2043:     /* insert into matrix */
2044:     jj      = rstart;
2045:     smycols = mycols;
2046:     svals   = vals;
2047:     for (i=0; i<m; i++) {
2048:       MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2049:       smycols += ourlens[i];
2050:       svals   += ourlens[i];
2051:       jj++;
2052:     }
2053:   }
2054:   PetscFree2(ourlens,offlens);
2055:   PetscFree(vals);
2056:   PetscFree(mycols);
2057:   PetscFree(rowners);

2059:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2060:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2061:   *newmat = A;
2062:   return(0);
2063: }

2067: /*
2068:     Not great since it makes two copies of the submatrix, first an SeqAIJ 
2069:   in local and then by concatenating the local matrices the end result.
2070:   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2071: */
2072: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2073: {
2075:   PetscMPIInt    rank,size;
2076:   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2077:   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2078:   Mat            *local,M,Mreuse;
2079:   PetscScalar    *vwork,*aa;
2080:   MPI_Comm       comm = mat->comm;
2081:   Mat_SeqAIJ     *aij;


2085:   MPI_Comm_rank(comm,&rank);
2086:   MPI_Comm_size(comm,&size);

2088:   if (call ==  MAT_REUSE_MATRIX) {
2089:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2090:     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2091:     local = &Mreuse;
2092:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2093:   } else {
2094:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2095:     Mreuse = *local;
2096:     PetscFree(local);
2097:   }

2099:   /* 
2100:       m - number of local rows
2101:       n - number of columns (same on all processors)
2102:       rstart - first row in new global matrix generated
2103:   */
2104:   MatGetSize(Mreuse,&m,&n);
2105:   if (call == MAT_INITIAL_MATRIX) {
2106:     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2107:     ii  = aij->i;
2108:     jj  = aij->j;

2110:     /*
2111:         Determine the number of non-zeros in the diagonal and off-diagonal 
2112:         portions of the matrix in order to do correct preallocation
2113:     */

2115:     /* first get start and end of "diagonal" columns */
2116:     if (csize == PETSC_DECIDE) {
2117:       ISGetSize(isrow,&mglobal);
2118:       if (mglobal == n) { /* square matrix */
2119:         nlocal = m;
2120:       } else {
2121:         nlocal = n/size + ((n % size) > rank);
2122:       }
2123:     } else {
2124:       nlocal = csize;
2125:     }
2126:     MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2127:     rstart = rend - nlocal;
2128:     if (rank == size - 1 && rend != n) {
2129:       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2130:     }

2132:     /* next, compute all the lengths */
2133:     PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
2134:     olens = dlens + m;
2135:     for (i=0; i<m; i++) {
2136:       jend = ii[i+1] - ii[i];
2137:       olen = 0;
2138:       dlen = 0;
2139:       for (j=0; j<jend; j++) {
2140:         if (*jj < rstart || *jj >= rend) olen++;
2141:         else dlen++;
2142:         jj++;
2143:       }
2144:       olens[i] = olen;
2145:       dlens[i] = dlen;
2146:     }
2147:     MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);
2148:     MatSetType(M,mat->type_name);
2149:     MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
2150:     PetscFree(dlens);
2151:   } else {
2152:     PetscInt ml,nl;

2154:     M = *newmat;
2155:     MatGetLocalSize(M,&ml,&nl);
2156:     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2157:     MatZeroEntries(M);
2158:     /*
2159:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2160:        rather than the slower MatSetValues().
2161:     */
2162:     M->was_assembled = PETSC_TRUE;
2163:     M->assembled     = PETSC_FALSE;
2164:   }
2165:   MatGetOwnershipRange(M,&rstart,&rend);
2166:   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2167:   ii  = aij->i;
2168:   jj  = aij->j;
2169:   aa  = aij->a;
2170:   for (i=0; i<m; i++) {
2171:     row   = rstart + i;
2172:     nz    = ii[i+1] - ii[i];
2173:     cwork = jj;     jj += nz;
2174:     vwork = aa;     aa += nz;
2175:     MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2176:   }

2178:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2179:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2180:   *newmat = M;

2182:   /* save submatrix used in processor for next request */
2183:   if (call ==  MAT_INITIAL_MATRIX) {
2184:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2185:     PetscObjectDereference((PetscObject)Mreuse);
2186:   }

2188:   return(0);
2189: }

2194: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2195: {
2196:   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2197:   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2198:   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2199:   const PetscInt *JJ;
2200:   PetscScalar    *values;

2204: #if defined(PETSC_OPT_g)
2205:   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2206: #endif
2207:   PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
2208:   o_nnz = d_nnz + m;

2210:   for (i=0; i<m; i++) {
2211:     nnz     = I[i+1]- I[i];
2212:     JJ      = J + I[i];
2213:     nnz_max = PetscMax(nnz_max,nnz);
2214: #if defined(PETSC_OPT_g)
2215:     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2216: #endif
2217:     for (j=0; j<nnz; j++) {
2218:       if (*JJ >= cstart) break;
2219:       JJ++;
2220:     }
2221:     d = 0;
2222:     for (; j<nnz; j++) {
2223:       if (*JJ++ >= cend) break;
2224:       d++;
2225:     }
2226:     d_nnz[i] = d;
2227:     o_nnz[i] = nnz - d;
2228:   }
2229:   MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
2230:   PetscFree(d_nnz);

2232:   if (v) values = (PetscScalar*)v;
2233:   else {
2234:     PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
2235:     PetscMemzero(values,nnz_max*sizeof(PetscScalar));
2236:   }

2238:   MatSetOption(B,MAT_COLUMNS_SORTED);
2239:   for (i=0; i<m; i++) {
2240:     ii   = i + rstart;
2241:     nnz  = I[i+1]- I[i];
2242:     MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);
2243:   }
2244:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2245:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2246:   MatSetOption(B,MAT_COLUMNS_UNSORTED);

2248:   if (!v) {
2249:     PetscFree(values);
2250:   }
2251:   return(0);
2252: }

2257: /*@C
2258:    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2259:    (the default parallel PETSc format).  

2261:    Collective on MPI_Comm

2263:    Input Parameters:
2264: +  A - the matrix 
2265: .  i - the indices into j for the start of each local row (starts with zero)
2266: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2267: -  v - optional values in the matrix

2269:    Level: developer

2271: .keywords: matrix, aij, compressed row, sparse, parallel

2273: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2274: @*/
2275: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2276: {
2277:   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);

2280:   PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);
2281:   if (f) {
2282:     (*f)(B,i,j,v);
2283:   }
2284:   return(0);
2285: }

2289: /*@C
2290:    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2291:    (the default parallel PETSc format).  For good matrix assembly performance
2292:    the user should preallocate the matrix storage by setting the parameters 
2293:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2294:    performance can be increased by more than a factor of 50.

2296:    Collective on MPI_Comm

2298:    Input Parameters:
2299: +  A - the matrix 
2300: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2301:            (same value is used for all local rows)
2302: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2303:            DIAGONAL portion of the local submatrix (possibly different for each row)
2304:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2305:            The size of this array is equal to the number of local rows, i.e 'm'. 
2306:            You must leave room for the diagonal entry even if it is zero.
2307: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2308:            submatrix (same value is used for all local rows).
2309: -  o_nnz - array containing the number of nonzeros in the various rows of the
2310:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2311:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2312:            structure. The size of this array is equal to the number 
2313:            of local rows, i.e 'm'. 

2315:    If the *_nnz parameter is given then the *_nz parameter is ignored

2317:    The AIJ format (also called the Yale sparse matrix format or
2318:    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2319:    storage.  The stored row and column indices begin with zero.  See the users manual for details.

2321:    The parallel matrix is partitioned such that the first m0 rows belong to 
2322:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2323:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2325:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2326:    as the submatrix which is obtained by extraction the part corresponding 
2327:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2328:    first row that belongs to the processor, and r2 is the last row belonging 
2329:    to the this processor. This is a square mxm matrix. The remaining portion 
2330:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2332:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2334:    Example usage:
2335:   
2336:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2337:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2338:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2339:    as follows:

2341: .vb
2342:             1  2  0  |  0  3  0  |  0  4
2343:     Proc0   0  5  6  |  7  0  0  |  8  0
2344:             9  0 10  | 11  0  0  | 12  0
2345:     -------------------------------------
2346:            13  0 14  | 15 16 17  |  0  0
2347:     Proc1   0 18  0  | 19 20 21  |  0  0 
2348:             0  0  0  | 22 23  0  | 24  0
2349:     -------------------------------------
2350:     Proc2  25 26 27  |  0  0 28  | 29  0
2351:            30  0  0  | 31 32 33  |  0 34
2352: .ve

2354:    This can be represented as a collection of submatrices as:

2356: .vb
2357:       A B C
2358:       D E F
2359:       G H I
2360: .ve

2362:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2363:    owned by proc1, G,H,I are owned by proc2.

2365:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2366:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2367:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2369:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2370:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2371:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2372:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2373:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2374:    matrix, ans [DF] as another SeqAIJ matrix.

2376:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2377:    allocated for every row of the local diagonal submatrix, and o_nz
2378:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2379:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2380:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2381:    In this case, the values of d_nz,o_nz are:
2382: .vb
2383:      proc0 : dnz = 2, o_nz = 2
2384:      proc1 : dnz = 3, o_nz = 2
2385:      proc2 : dnz = 1, o_nz = 4
2386: .ve
2387:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2388:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2389:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2390:    34 values.

2392:    When d_nnz, o_nnz parameters are specified, the storage is specified
2393:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2394:    In the above case the values for d_nnz,o_nnz are:
2395: .vb
2396:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2397:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2398:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2399: .ve
2400:    Here the space allocated is sum of all the above values i.e 34, and
2401:    hence pre-allocation is perfect.

2403:    Level: intermediate

2405: .keywords: matrix, aij, compressed row, sparse, parallel

2407: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2408:           MPIAIJ
2409: @*/
2410: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2411: {
2412:   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);

2415:   PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
2416:   if (f) {
2417:     (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
2418:   }
2419:   return(0);
2420: }

2424: /*@C
2425:    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2426:    (the default parallel PETSc format).  For good matrix assembly performance
2427:    the user should preallocate the matrix storage by setting the parameters 
2428:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2429:    performance can be increased by more than a factor of 50.

2431:    Collective on MPI_Comm

2433:    Input Parameters:
2434: +  comm - MPI communicator
2435: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2436:            This value should be the same as the local size used in creating the 
2437:            y vector for the matrix-vector product y = Ax.
2438: .  n - This value should be the same as the local size used in creating the 
2439:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2440:        calculated if N is given) For square matrices n is almost always m.
2441: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2442: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2443: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2444:            (same value is used for all local rows)
2445: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2446:            DIAGONAL portion of the local submatrix (possibly different for each row)
2447:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2448:            The size of this array is equal to the number of local rows, i.e 'm'. 
2449:            You must leave room for the diagonal entry even if it is zero.
2450: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2451:            submatrix (same value is used for all local rows).
2452: -  o_nnz - array containing the number of nonzeros in the various rows of the
2453:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2454:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2455:            structure. The size of this array is equal to the number 
2456:            of local rows, i.e 'm'. 

2458:    Output Parameter:
2459: .  A - the matrix 

2461:    Notes:
2462:    If the *_nnz parameter is given then the *_nz parameter is ignored

2464:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2465:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2466:    storage requirements for this matrix.

2468:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one 
2469:    processor than it must be used on all processors that share the object for 
2470:    that argument.

2472:    The user MUST specify either the local or global matrix dimensions
2473:    (possibly both).

2475:    The parallel matrix is partitioned such that the first m0 rows belong to 
2476:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2477:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2479:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2480:    as the submatrix which is obtained by extraction the part corresponding 
2481:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2482:    first row that belongs to the processor, and r2 is the last row belonging 
2483:    to the this processor. This is a square mxm matrix. The remaining portion 
2484:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2486:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2488:    When calling this routine with a single process communicator, a matrix of
2489:    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2490:    type of communicator, use the construction mechanism:
2491:      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);

2493:    By default, this format uses inodes (identical nodes) when possible.
2494:    We search for consecutive rows with the same nonzero structure, thereby
2495:    reusing matrix information to achieve increased efficiency.

2497:    Options Database Keys:
2498: +  -mat_aij_no_inode  - Do not use inodes
2499: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2500: -  -mat_aij_oneindex - Internally use indexing starting at 1
2501:         rather than 0.  Note that when calling MatSetValues(),
2502:         the user still MUST index entries starting at 0!


2505:    Example usage:
2506:   
2507:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2508:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2509:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2510:    as follows:

2512: .vb
2513:             1  2  0  |  0  3  0  |  0  4
2514:     Proc0   0  5  6  |  7  0  0  |  8  0
2515:             9  0 10  | 11  0  0  | 12  0
2516:     -------------------------------------
2517:            13  0 14  | 15 16 17  |  0  0
2518:     Proc1   0 18  0  | 19 20 21  |  0  0 
2519:             0  0  0  | 22 23  0  | 24  0
2520:     -------------------------------------
2521:     Proc2  25 26 27  |  0  0 28  | 29  0
2522:            30  0  0  | 31 32 33  |  0 34
2523: .ve

2525:    This can be represented as a collection of submatrices as:

2527: .vb
2528:       A B C
2529:       D E F
2530:       G H I
2531: .ve

2533:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2534:    owned by proc1, G,H,I are owned by proc2.

2536:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2537:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2538:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2540:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2541:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2542:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2543:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2544:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2545:    matrix, ans [DF] as another SeqAIJ matrix.

2547:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2548:    allocated for every row of the local diagonal submatrix, and o_nz
2549:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2550:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2551:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2552:    In this case, the values of d_nz,o_nz are:
2553: .vb
2554:      proc0 : dnz = 2, o_nz = 2
2555:      proc1 : dnz = 3, o_nz = 2
2556:      proc2 : dnz = 1, o_nz = 4
2557: .ve
2558:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2559:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2560:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2561:    34 values.

2563:    When d_nnz, o_nnz parameters are specified, the storage is specified
2564:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2565:    In the above case the values for d_nnz,o_nnz are:
2566: .vb
2567:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2568:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2569:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2570: .ve
2571:    Here the space allocated is sum of all the above values i.e 34, and
2572:    hence pre-allocation is perfect.

2574:    Level: intermediate

2576: .keywords: matrix, aij, compressed row, sparse, parallel

2578: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2579:           MPIAIJ
2580: @*/
2581: PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2582: {
2584:   PetscMPIInt    size;

2587:   MatCreate(comm,m,n,M,N,A);
2588:   MPI_Comm_size(comm,&size);
2589:   if (size > 1) {
2590:     MatSetType(*A,MATMPIAIJ);
2591:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2592:   } else {
2593:     MatSetType(*A,MATSEQAIJ);
2594:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2595:   }
2596:   return(0);
2597: }

2601: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2602: {
2603:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;

2606:   *Ad     = a->A;
2607:   *Ao     = a->B;
2608:   *colmap = a->garray;
2609:   return(0);
2610: }

2614: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2615: {
2617:   PetscInt       i;
2618:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

2621:   if (coloring->ctype == IS_COLORING_LOCAL) {
2622:     ISColoringValue *allcolors,*colors;
2623:     ISColoring      ocoloring;

2625:     /* set coloring for diagonal portion */
2626:     MatSetColoring_SeqAIJ(a->A,coloring);

2628:     /* set coloring for off-diagonal portion */
2629:     ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2630:     PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2631:     for (i=0; i<a->B->n; i++) {
2632:       colors[i] = allcolors[a->garray[i]];
2633:     }
2634:     PetscFree(allcolors);
2635:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2636:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2637:     ISColoringDestroy(ocoloring);
2638:   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2639:     ISColoringValue *colors;
2640:     PetscInt             *larray;
2641:     ISColoring      ocoloring;

2643:     /* set coloring for diagonal portion */
2644:     PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);
2645:     for (i=0; i<a->A->n; i++) {
2646:       larray[i] = i + a->cstart;
2647:     }
2648:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2649:     PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);
2650:     for (i=0; i<a->A->n; i++) {
2651:       colors[i] = coloring->colors[larray[i]];
2652:     }
2653:     PetscFree(larray);
2654:     ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2655:     MatSetColoring_SeqAIJ(a->A,ocoloring);
2656:     ISColoringDestroy(ocoloring);

2658:     /* set coloring for off-diagonal portion */
2659:     PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);
2660:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2661:     PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2662:     for (i=0; i<a->B->n; i++) {
2663:       colors[i] = coloring->colors[larray[i]];
2664:     }
2665:     PetscFree(larray);
2666:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2667:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2668:     ISColoringDestroy(ocoloring);
2669:   } else {
2670:     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2671:   }

2673:   return(0);
2674: }

2678: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2679: {
2680:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

2684:   MatSetValuesAdic_SeqAIJ(a->A,advalues);
2685:   MatSetValuesAdic_SeqAIJ(a->B,advalues);
2686:   return(0);
2687: }

2691: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2692: {
2693:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

2697:   MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2698:   MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2699:   return(0);
2700: }

2704: /*@C
2705:       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2706:                  matrices from each processor

2708:     Collective on MPI_Comm

2710:    Input Parameters:
2711: +    comm - the communicators the parallel matrix will live on
2712: .    inmat - the input sequential matrices
2713: .    n - number of local columns (or PETSC_DECIDE)
2714: -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX

2716:    Output Parameter:
2717: .    outmat - the parallel matrix generated

2719:     Level: advanced

2721:    Notes: The number of columns of the matrix in EACH processor MUST be the same.

2723: @*/
2724: PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2725: {
2727:   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2728:   PetscInt       *indx;
2729:   PetscScalar    *values;
2730:   PetscMap       columnmap,rowmap;

2733:     MatGetSize(inmat,&m,&N);
2734:   /*
2735:   PetscMPIInt       rank;
2736:   MPI_Comm_rank(comm,&rank);
2737:   PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2738:   */
2739:   if (scall == MAT_INITIAL_MATRIX){
2740:     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2741:     if (n == PETSC_DECIDE){
2742:       PetscMapCreate(comm,&columnmap);
2743:       PetscMapSetSize(columnmap,N);
2744:       PetscMapSetType(columnmap,MAP_MPI);
2745:       PetscMapGetLocalSize(columnmap,&n);
2746:       PetscMapDestroy(columnmap);
2747:     }

2749:     PetscMapCreate(comm,&rowmap);
2750:     PetscMapSetLocalSize(rowmap,m);
2751:     PetscMapSetType(rowmap,MAP_MPI);
2752:     PetscMapGetLocalRange(rowmap,&rstart,0);
2753:     PetscMapDestroy(rowmap);

2755:     MatPreallocateInitialize(comm,m,n,dnz,onz);
2756:     for (i=0;i<m;i++) {
2757:       MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
2758:       MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2759:       MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
2760:     }
2761:     /* This routine will ONLY return MPIAIJ type matrix */
2762:     MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);
2763:     MatSetType(*outmat,MATMPIAIJ);
2764:     MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
2765:     MatPreallocateFinalize(dnz,onz);
2766: 
2767:   } else if (scall == MAT_REUSE_MATRIX){
2768:     MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);
2769:   } else {
2770:     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2771:   }

2773:   for (i=0;i<m;i++) {
2774:     MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
2775:     I    = i + rstart;
2776:     MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2777:     MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
2778:   }
2779:   MatDestroy(inmat);
2780:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2781:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);

2783:   return(0);
2784: }

2788: PetscErrorCode MatFileSplit(Mat A,char *outfile)
2789: {
2790:   PetscErrorCode    ierr;
2791:   PetscMPIInt       rank;
2792:   PetscInt          m,N,i,rstart,nnz;
2793:   size_t            len;
2794:   const PetscInt    *indx;
2795:   PetscViewer       out;
2796:   char              *name;
2797:   Mat               B;
2798:   const PetscScalar *values;

2801:   MatGetLocalSize(A,&m,0);
2802:   MatGetSize(A,0,&N);
2803:   /* Should this be the type of the diagonal block of A? */
2804:   MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);
2805:   MatSetType(B,MATSEQAIJ);
2806:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
2807:   MatGetOwnershipRange(A,&rstart,0);
2808:   for (i=0;i<m;i++) {
2809:     MatGetRow(A,i+rstart,&nnz,&indx,&values);
2810:     MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2811:     MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2812:   }
2813:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2814:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2816:   MPI_Comm_rank(A->comm,&rank);
2817:   PetscStrlen(outfile,&len);
2818:   PetscMalloc((len+5)*sizeof(char),&name);
2819:   sprintf(name,"%s.%d",outfile,rank);
2820:   PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);
2821:   PetscFree(name);
2822:   MatView(B,out);
2823:   PetscViewerDestroy(out);
2824:   MatDestroy(B);
2825:   return(0);
2826: }

2828: EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2831: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2832: {
2833:   PetscErrorCode       ierr;
2834:   Mat_Merge_SeqsToMPI  *merge;
2835:   PetscObjectContainer container;

2838:   PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
2839:   if (container) {
2840:     PetscObjectContainerGetPointer(container,(void **)&merge);
2841:     PetscFree(merge->id_r);
2842:     PetscFree(merge->len_s);
2843:     PetscFree(merge->len_r);
2844:     PetscFree(merge->bi);
2845:     PetscFree(merge->bj);
2846:     PetscFree(merge->buf_ri);
2847:     PetscFree(merge->buf_rj);
2848:     PetscMapDestroy(merge->rowmap);
2849:     if (merge->coi){PetscFree(merge->coi);}
2850:     if (merge->coj){PetscFree(merge->coj);}
2851:     if (merge->owners_co){PetscFree(merge->owners_co);}
2852: 
2853:     PetscObjectContainerDestroy(container);
2854:     PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
2855:   }
2856:   PetscFree(merge);

2858:   MatDestroy_MPIAIJ(A);
2859:   return(0);
2860: }

2862:  #include src/mat/utils/freespace.h
2863:  #include petscbt.h
2866: /*@C
2867:       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2868:                  matrices from each processor

2870:     Collective on MPI_Comm

2872:    Input Parameters:
2873: +    comm - the communicators the parallel matrix will live on
2874: .    seqmat - the input sequential matrices
2875: .    m - number of local rows (or PETSC_DECIDE)
2876: .    n - number of local columns (or PETSC_DECIDE)
2877: -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX

2879:    Output Parameter:
2880: .    mpimat - the parallel matrix generated

2882:     Level: advanced

2884:    Notes: 
2885:      The dimensions of the sequential matrix in each processor MUST be the same.
2886:      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2887:      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2888: @*/
2889: static PetscEvent logkey_seqstompinum = 0;
2890: PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2891: {
2892:   PetscErrorCode       ierr;
2893:   MPI_Comm             comm=mpimat->comm;
2894:   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2895:   PetscMPIInt          size,rank,taga,*len_s;
2896:   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2897:   PetscInt             proc,m;
2898:   PetscInt             **buf_ri,**buf_rj;
2899:   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2900:   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2901:   MPI_Request          *s_waits,*r_waits;
2902:   MPI_Status           *status;
2903:   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2904:   Mat_Merge_SeqsToMPI  *merge;
2905:   PetscObjectContainer container;
2906: 
2908:   if (!logkey_seqstompinum) {
2909:     PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2910:   }
2911:   PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);

2913:   MPI_Comm_size(comm,&size);
2914:   MPI_Comm_rank(comm,&rank);

2916:   PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
2917:   if (container) {
2918:     PetscObjectContainerGetPointer(container,(void **)&merge);
2919:   }
2920:   bi     = merge->bi;
2921:   bj     = merge->bj;
2922:   buf_ri = merge->buf_ri;
2923:   buf_rj = merge->buf_rj;

2925:   PetscMalloc(size*sizeof(MPI_Status),&status);
2926:   PetscMapGetGlobalRange(merge->rowmap,&owners);
2927:   len_s  = merge->len_s;

2929:   /* send and recv matrix values */
2930:   /*-----------------------------*/
2931:   PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);
2932:   PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);

2934:   PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
2935:   for (proc=0,k=0; proc<size; proc++){
2936:     if (!len_s[proc]) continue;
2937:     i = owners[proc];
2938:     MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
2939:     k++;
2940:   }

2942:   MPI_Waitall(merge->nrecv,r_waits,status);
2943:   MPI_Waitall(merge->nsend,s_waits,status);
2944:   PetscFree(status);

2946:   PetscFree(s_waits);
2947:   PetscFree(r_waits);

2949:   /* insert mat values of mpimat */
2950:   /*----------------------------*/
2951:   PetscMalloc(N*sizeof(MatScalar),&ba_i);
2952:   PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
2953:   nextrow = buf_ri_k + merge->nrecv;
2954:   nextai  = nextrow + merge->nrecv;

2956:   for (k=0; k<merge->nrecv; k++){
2957:     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2958:     nrows = *(buf_ri_k[k]);
2959:     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2960:     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2961:   }

2963:   /* set values of ba */
2964:   PetscMapGetLocalSize(merge->rowmap,&m);
2965:   for (i=0; i<m; i++) {
2966:     arow = owners[rank] + i;
2967:     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2968:     bnzi = bi[i+1] - bi[i];
2969:     PetscMemzero(ba_i,bnzi*sizeof(MatScalar));

2971:     /* add local non-zero vals of this proc's seqmat into ba */
2972:     anzi = ai[arow+1] - ai[arow];
2973:     aj   = a->j + ai[arow];
2974:     aa   = a->a + ai[arow];
2975:     nextaj = 0;
2976:     for (j=0; nextaj<anzi; j++){
2977:       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2978:         ba_i[j] += aa[nextaj++];
2979:       }
2980:     }

2982:     /* add received vals into ba */
2983:     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2984:       /* i-th row */
2985:       if (i == *nextrow[k]) {
2986:         anzi = *(nextai[k]+1) - *nextai[k];
2987:         aj   = buf_rj[k] + *(nextai[k]);
2988:         aa   = abuf_r[k] + *(nextai[k]);
2989:         nextaj = 0;
2990:         for (j=0; nextaj<anzi; j++){
2991:           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2992:             ba_i[j] += aa[nextaj++];
2993:           }
2994:         }
2995:         nextrow[k]++; nextai[k]++;
2996:       }
2997:     }
2998:     MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
2999:   }
3000:   MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
3001:   MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);

3003:   PetscFree(abuf_r);
3004:   PetscFree(ba_i);
3005:   PetscFree(buf_ri_k);
3006:   PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);
3007:   return(0);
3008: }
3009: static PetscEvent logkey_seqstompisym = 0;
3010: PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3011: {
3012:   PetscErrorCode       ierr;
3013:   Mat                  B_mpi;
3014:   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3015:   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3016:   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3017:   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3018:   PetscInt             len,proc,*dnz,*onz;
3019:   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3020:   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3021:   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3022:   MPI_Status           *status;
3023:   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3024:   PetscBT              lnkbt;
3025:   Mat_Merge_SeqsToMPI  *merge;
3026:   PetscObjectContainer container;

3029:   if (!logkey_seqstompisym) {
3030:     PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3031:   }
3032:   PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);

3034:   MPI_Comm_size(comm,&size);
3035:   MPI_Comm_rank(comm,&rank);
3036: 
3037:   PetscNew(Mat_Merge_SeqsToMPI,&merge);
3038:   PetscMalloc(size*sizeof(MPI_Status),&status);

3040:   /* determine row ownership */
3041:   /*---------------------------------------------------------*/
3042:   PetscMapCreate(comm,&merge->rowmap);
3043:   if (m == PETSC_DECIDE) {
3044:     PetscMapSetSize(merge->rowmap,M);
3045:   } else {
3046:     PetscMapSetLocalSize(merge->rowmap,m);
3047:   }
3048:   PetscMapSetType(merge->rowmap,MAP_MPI);
3049:   PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
3050:   PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
3051: 
3052:   if (m == PETSC_DECIDE) {PetscMapGetLocalSize(merge->rowmap,&m); }
3053:   PetscMapGetGlobalRange(merge->rowmap,&owners);

3055:   /* determine the number of messages to send, their lengths */
3056:   /*---------------------------------------------------------*/
3057:   len_s  = merge->len_s;

3059:   len = 0;  /* length of buf_si[] */
3060:   merge->nsend = 0;
3061:   for (proc=0; proc<size; proc++){
3062:     len_si[proc] = 0;
3063:     if (proc == rank){
3064:       len_s[proc] = 0;
3065:     } else {
3066:       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3067:       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3068:     }
3069:     if (len_s[proc]) {
3070:       merge->nsend++;
3071:       nrows = 0;
3072:       for (i=owners[proc]; i<owners[proc+1]; i++){
3073:         if (ai[i+1] > ai[i]) nrows++;
3074:       }
3075:       len_si[proc] = 2*(nrows+1);
3076:       len += len_si[proc];
3077:     }
3078:   }

3080:   /* determine the number and length of messages to receive for ij-structure */
3081:   /*-------------------------------------------------------------------------*/
3082:   PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
3083:   PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);

3085:   /* post the Irecv of j-structure */
3086:   /*-------------------------------*/
3087:   PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);
3088:   PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);

3090:   /* post the Isend of j-structure */
3091:   /*--------------------------------*/
3092:   PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);
3093:   sj_waits = si_waits + merge->nsend;

3095:   for (proc=0, k=0; proc<size; proc++){
3096:     if (!len_s[proc]) continue;
3097:     i = owners[proc];
3098:     MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
3099:     k++;
3100:   }

3102:   /* receives and sends of j-structure are complete */
3103:   /*------------------------------------------------*/
3104:   MPI_Waitall(merge->nrecv,rj_waits,status);
3105:   MPI_Waitall(merge->nsend,sj_waits,status);
3106: 
3107:   /* send and recv i-structure */
3108:   /*---------------------------*/
3109:   PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);
3110:   PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
3111: 
3112:   PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
3113:   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3114:   for (proc=0,k=0; proc<size; proc++){
3115:     if (!len_s[proc]) continue;
3116:     /* form outgoing message for i-structure: 
3117:          buf_si[0]:                 nrows to be sent
3118:                [1:nrows]:           row index (global)
3119:                [nrows+1:2*nrows+1]: i-structure index
3120:     */
3121:     /*-------------------------------------------*/
3122:     nrows = len_si[proc]/2 - 1;
3123:     buf_si_i    = buf_si + nrows+1;
3124:     buf_si[0]   = nrows;
3125:     buf_si_i[0] = 0;
3126:     nrows = 0;
3127:     for (i=owners[proc]; i<owners[proc+1]; i++){
3128:       anzi = ai[i+1] - ai[i];
3129:       if (anzi) {
3130:         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3131:         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3132:         nrows++;
3133:       }
3134:     }
3135:     MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
3136:     k++;
3137:     buf_si += len_si[proc];
3138:   }

3140:   MPI_Waitall(merge->nrecv,ri_waits,status);
3141:   MPI_Waitall(merge->nsend,si_waits,status);

3143:   PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
3144:   for (i=0; i<merge->nrecv; i++){
3145:     PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
3146:   }

3148:   PetscFree(len_si);
3149:   PetscFree(len_ri);
3150:   PetscFree(rj_waits);
3151:   PetscFree(si_waits);
3152:   PetscFree(ri_waits);
3153:   PetscFree(buf_s);
3154:   PetscFree(status);

3156:   /* compute a local seq matrix in each processor */
3157:   /*----------------------------------------------*/
3158:   /* allocate bi array and free space for accumulating nonzero column info */
3159:   PetscMalloc((m+1)*sizeof(PetscInt),&bi);
3160:   bi[0] = 0;

3162:   /* create and initialize a linked list */
3163:   nlnk = N+1;
3164:   PetscLLCreate(N,N,nlnk,lnk,lnkbt);
3165: 
3166:   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3167:   len = 0;
3168:   len  = ai[owners[rank+1]] - ai[owners[rank]];
3169:   GetMoreSpace((PetscInt)(2*len+1),&free_space);
3170:   current_space = free_space;

3172:   /* determine symbolic info for each local row */
3173:   PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3174:   nextrow = buf_ri_k + merge->nrecv;
3175:   nextai  = nextrow + merge->nrecv;
3176:   for (k=0; k<merge->nrecv; k++){
3177:     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3178:     nrows = *buf_ri_k[k];
3179:     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3180:     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3181:   }

3183:   MatPreallocateInitialize(comm,m,n,dnz,onz);
3184:   len = 0;
3185:   for (i=0;i<m;i++) {
3186:     bnzi   = 0;
3187:     /* add local non-zero cols of this proc's seqmat into lnk */
3188:     arow   = owners[rank] + i;
3189:     anzi   = ai[arow+1] - ai[arow];
3190:     aj     = a->j + ai[arow];
3191:     PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3192:     bnzi += nlnk;
3193:     /* add received col data into lnk */
3194:     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3195:       if (i == *nextrow[k]) { /* i-th row */
3196:         anzi = *(nextai[k]+1) - *nextai[k];
3197:         aj   = buf_rj[k] + *nextai[k];
3198:         PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3199:         bnzi += nlnk;
3200:         nextrow[k]++; nextai[k]++;
3201:       }
3202:     }
3203:     if (len < bnzi) len = bnzi;  /* =max(bnzi) */

3205:     /* if free space is not available, make more free space */
3206:     if (current_space->local_remaining<bnzi) {
3207:       GetMoreSpace(current_space->total_array_size,&current_space);
3208:       nspacedouble++;
3209:     }
3210:     /* copy data into free space, then initialize lnk */
3211:     PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
3212:     MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);

3214:     current_space->array           += bnzi;
3215:     current_space->local_used      += bnzi;
3216:     current_space->local_remaining -= bnzi;
3217: 
3218:     bi[i+1] = bi[i] + bnzi;
3219:   }
3220: 
3221:   PetscFree(buf_ri_k);

3223:   PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
3224:   MakeSpaceContiguous(&free_space,bj);
3225:   PetscLLDestroy(lnk,lnkbt);

3227:   /* create symbolic parallel matrix B_mpi */
3228:   /*---------------------------------------*/
3229:   if (n==PETSC_DECIDE) {
3230:     MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);
3231:   } else {
3232:     MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);
3233:   }
3234:   MatSetType(B_mpi,MATMPIAIJ);
3235:   MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
3236:   MatPreallocateFinalize(dnz,onz);

3238:   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3239:   B_mpi->assembled     = PETSC_FALSE;
3240:   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3241:   merge->bi            = bi;
3242:   merge->bj            = bj;
3243:   merge->buf_ri        = buf_ri;
3244:   merge->buf_rj        = buf_rj;
3245:   merge->coi           = PETSC_NULL;
3246:   merge->coj           = PETSC_NULL;
3247:   merge->owners_co     = PETSC_NULL;

3249:   /* attach the supporting struct to B_mpi for reuse */
3250:   PetscObjectContainerCreate(PETSC_COMM_SELF,&container);
3251:   PetscObjectContainerSetPointer(container,merge);
3252:   PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
3253:   *mpimat = B_mpi;
3254:   PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);
3255:   return(0);
3256: }

3258: static PetscEvent logkey_seqstompi = 0;
3259: PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3260: {
3261:   PetscErrorCode   ierr;

3264:   if (!logkey_seqstompi) {
3265:     PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3266:   }
3267:   PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);
3268:   if (scall == MAT_INITIAL_MATRIX){
3269:     MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
3270:   }
3271:   MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
3272:   PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);
3273:   return(0);
3274: }
3275: static PetscEvent logkey_getlocalmat = 0;
3278: /*@C
3279:      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows

3281:     Not Collective

3283:    Input Parameters:
3284: +    A - the matrix 
3285: .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 

3287:    Output Parameter:
3288: .    A_loc - the local sequential matrix generated

3290:     Level: developer

3292: @*/
3293: PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3294: {
3295:   PetscErrorCode  ierr;
3296:   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3297:   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3298:   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3299:   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3300:   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3301:   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;

3304:   if (!logkey_getlocalmat) {
3305:     PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3306:   }
3307:   PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);
3308:   if (scall == MAT_INITIAL_MATRIX){
3309:     PetscMalloc((1+am)*sizeof(PetscInt),&ci);
3310:     ci[0] = 0;
3311:     for (i=0; i<am; i++){
3312:       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3313:     }
3314:     PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
3315:     PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
3316:     k = 0;
3317:     for (i=0; i<am; i++) {
3318:       ncols_o = bi[i+1] - bi[i];
3319:       ncols_d = ai[i+1] - ai[i];
3320:       /* off-diagonal portion of A */
3321:       for (jo=0; jo<ncols_o; jo++) {
3322:         col = cmap[*bj];
3323:         if (col >= cstart) break;
3324:         cj[k]   = col; bj++;
3325:         ca[k++] = *ba++;
3326:       }
3327:       /* diagonal portion of A */
3328:       for (j=0; j<ncols_d; j++) {
3329:         cj[k]   = cstart + *aj++;
3330:         ca[k++] = *aa++;
3331:       }
3332:       /* off-diagonal portion of A */
3333:       for (j=jo; j<ncols_o; j++) {
3334:         cj[k]   = cmap[*bj++];
3335:         ca[k++] = *ba++;
3336:       }
3337:     }
3338:     /* put together the new matrix */
3339:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);
3340:     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3341:     /* Since these are PETSc arrays, change flags to free them as necessary. */
3342:     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3343:     mat->freedata = PETSC_TRUE;
3344:     mat->nonew    = 0;
3345:   } else if (scall == MAT_REUSE_MATRIX){
3346:     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3347:     ci = mat->i; cj = mat->j; ca = mat->a;
3348:     for (i=0; i<am; i++) {
3349:       /* off-diagonal portion of A */
3350:       ncols_o = bi[i+1] - bi[i];
3351:       for (jo=0; jo<ncols_o; jo++) {
3352:         col = cmap[*bj];
3353:         if (col >= cstart) break;
3354:         *ca++ = *ba++; bj++;
3355:       }
3356:       /* diagonal portion of A */
3357:       ncols_d = ai[i+1] - ai[i];
3358:       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3359:       /* off-diagonal portion of A */
3360:       for (j=jo; j<ncols_o; j++) {
3361:         *ca++ = *ba++; bj++;
3362:       }
3363:     }
3364:   } else {
3365:     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3366:   }

3368:   PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);
3369:   return(0);
3370: }

3372: static PetscEvent logkey_getlocalmatcondensed = 0;
3375: /*@C
3376:      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns

3378:     Not Collective

3380:    Input Parameters:
3381: +    A - the matrix 
3382: .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3383: -    row, col - index sets of rows and columns to extract (or PETSC_NULL)  

3385:    Output Parameter:
3386: .    A_loc - the local sequential matrix generated

3388:     Level: developer

3390: @*/
3391: PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3392: {
3393:   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3394:   PetscErrorCode    ierr;
3395:   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3396:   IS                isrowa,iscola;
3397:   Mat               *aloc;

3400:   if (!logkey_getlocalmatcondensed) {
3401:     PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3402:   }
3403:   PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);
3404:   if (!row){
3405:     start = a->rstart; end = a->rend;
3406:     ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
3407:   } else {
3408:     isrowa = *row;
3409:   }
3410:   if (!col){
3411:     start = a->cstart;
3412:     cmap  = a->garray;
3413:     nzA   = a->A->n;
3414:     nzB   = a->B->n;
3415:     PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
3416:     ncols = 0;
3417:     for (i=0; i<nzB; i++) {
3418:       if (cmap[i] < start) idx[ncols++] = cmap[i];
3419:       else break;
3420:     }
3421:     imark = i;
3422:     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3423:     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3424:     ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);
3425:     PetscFree(idx);
3426:   } else {
3427:     iscola = *col;
3428:   }
3429:   if (scall != MAT_INITIAL_MATRIX){
3430:     PetscMalloc(sizeof(Mat),&aloc);
3431:     aloc[0] = *A_loc;
3432:   }
3433:   MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
3434:   *A_loc = aloc[0];
3435:   PetscFree(aloc);
3436:   if (!row){
3437:     ISDestroy(isrowa);
3438:   }
3439:   if (!col){
3440:     ISDestroy(iscola);
3441:   }
3442:   PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);
3443:   return(0);
3444: }

3446: static PetscEvent logkey_GetBrowsOfAcols = 0;
3449: /*@C
3450:     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 

3452:     Collective on Mat

3454:    Input Parameters:
3455: +    A,B - the matrices in mpiaij format
3456: .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3457: -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)   

3459:    Output Parameter:
3460: +    rowb, colb - index sets of rows and columns of B to extract 
3461: .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3462: -    B_seq - the sequential matrix generated

3464:     Level: developer

3466: @*/
3467: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3468: {
3469:   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3470:   PetscErrorCode    ierr;
3471:   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3472:   IS                isrowb,iscolb;
3473:   Mat               *bseq;
3474: 
3476:   if (a->cstart != b->rstart || a->cend != b->rend){
3477:     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3478:   }
3479:   if (!logkey_GetBrowsOfAcols) {
3480:     PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3481:   }
3482:   PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);
3483: 
3484:   if (scall == MAT_INITIAL_MATRIX){
3485:     start = a->cstart;
3486:     cmap  = a->garray;
3487:     nzA   = a->A->n;
3488:     nzB   = a->B->n;
3489:     PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
3490:     ncols = 0;
3491:     for (i=0; i<nzB; i++) {  /* row < local row index */
3492:       if (cmap[i] < start) idx[ncols++] = cmap[i];
3493:       else break;
3494:     }
3495:     imark = i;
3496:     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3497:     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3498:     ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);
3499:     PetscFree(idx);
3500:     *brstart = imark;
3501:     ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);
3502:   } else {
3503:     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3504:     isrowb = *rowb; iscolb = *colb;
3505:     PetscMalloc(sizeof(Mat),&bseq);
3506:     bseq[0] = *B_seq;
3507:   }
3508:   MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
3509:   *B_seq = bseq[0];
3510:   PetscFree(bseq);
3511:   if (!rowb){
3512:     ISDestroy(isrowb);
3513:   } else {
3514:     *rowb = isrowb;
3515:   }
3516:   if (!colb){
3517:     ISDestroy(iscolb);
3518:   } else {
3519:     *colb = iscolb;
3520:   }
3521:   PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);
3522:   return(0);
3523: }

3525: static PetscEvent logkey_GetBrowsOfAocols = 0;
3528: /*@C
3529:     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3530:     of the OFF-DIAGONAL portion of local A 

3532:     Collective on Mat

3534:    Input Parameters:
3535: +    A,B - the matrices in mpiaij format
3536: .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3537: .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 
3538: -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 

3540:    Output Parameter:
3541: +    B_oth - the sequential matrix generated

3543:     Level: developer

3545: @*/
3546: PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3547: {
3548:   VecScatter_MPI_General *gen_to,*gen_from;
3549:   PetscErrorCode         ierr;
3550:   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3551:   Mat_SeqAIJ             *b_oth;
3552:   VecScatter             ctx=a->Mvctx;
3553:   MPI_Comm               comm=ctx->comm;
3554:   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3555:   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3556:   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3557:   PetscInt               i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len;
3558:   MPI_Request            *rwaits,*swaits;
3559:   MPI_Status             *sstatus,rstatus;
3560:   PetscInt               *cols;
3561:   PetscScalar            *vals;
3562:   PetscMPIInt            j;
3563: 
3565:   if (a->cstart != b->rstart || a->cend != b->rend){
3566:     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3567:   }
3568:   if (!logkey_GetBrowsOfAocols) {
3569:     PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3570:   }
3571:   PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);
3572:   MPI_Comm_rank(comm,&rank);

3574:   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3575:   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3576:   rvalues  = gen_from->values; /* holds the length of sending row */
3577:   svalues  = gen_to->values;   /* holds the length of receiving row */
3578:   nrecvs   = gen_from->n;
3579:   nsends   = gen_to->n;
3580:   rwaits   = gen_from->requests;
3581:   swaits   = gen_to->requests;
3582:   rrow     = gen_from->indices; /* local row index to be received */
3583:   srow     = gen_to->indices;   /* local row index to be sent */
3584:   rstarts  = gen_from->starts;
3585:   sstarts  = gen_to->starts;
3586:   rprocs   = gen_from->procs;
3587:   sprocs   = gen_to->procs;
3588:   sstatus  = gen_to->sstatus;

3590:   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3591:   if (scall == MAT_INITIAL_MATRIX){
3592:     /* i-array */
3593:     /*---------*/
3594:     /*  post receives */
3595:     for (i=0; i<nrecvs; i++){
3596:       rowlen = (PetscInt*)rvalues + rstarts[i];
3597:       nrows = rstarts[i+1]-rstarts[i];
3598:       MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
3599:     }

3601:     /* pack the outgoing message */
3602:     PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);
3603:     rstartsj = sstartsj + nsends +1;
3604:     sstartsj[0] = 0;  rstartsj[0] = 0;
3605:     len = 0; /* total length of j or a array to be sent */
3606:     k = 0;
3607:     for (i=0; i<nsends; i++){
3608:       rowlen = (PetscInt*)svalues + sstarts[i];
3609:       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3610:       for (j=0; j<nrows; j++) {
3611:         row = srow[k] + b->rowners[rank]; /* global row idx */
3612:         MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL); /* rowlength */
3613:         len += rowlen[j];
3614:         MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);
3615:         k++;
3616:       }
3617:       MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);
3618:        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3619:     }
3620:     /* recvs and sends of i-array are completed */
3621:     i = nrecvs;
3622:     while (i--) {
3623:       MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3624:     }
3625:     if (nsends) {
3626:       MPI_Waitall(nsends,swaits,sstatus);
3627:     }
3628:     /* allocate buffers for sending j and a arrays */
3629:     PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
3630:     PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);

3632:     /* create i-array of B_oth */
3633:     PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
3634:     b_othi[0] = 0;
3635:     len = 0; /* total length of j or a array to be received */
3636:     k = 0;
3637:     for (i=0; i<nrecvs; i++){
3638:       rowlen = (PetscInt*)rvalues + rstarts[i];
3639:       nrows = rstarts[i+1]-rstarts[i];
3640:       for (j=0; j<nrows; j++) {
3641:         b_othi[k+1] = b_othi[k] + rowlen[j];
3642:         len += rowlen[j]; k++;
3643:       }
3644:       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3645:     }

3647:     /* allocate space for j and a arrrays of B_oth */
3648:     PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
3649:     PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);

3651:     /* j-array */
3652:     /*---------*/
3653:     /*  post receives of j-array */
3654:     for (i=0; i<nrecvs; i++){
3655:       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3656:       MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
3657:     }
3658:     k = 0;
3659:     for (i=0; i<nsends; i++){
3660:       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3661:       bufJ = bufj+sstartsj[i];
3662:       for (j=0; j<nrows; j++) {
3663:         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3664:         MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);
3665:         for (l=0; l<ncols; l++){
3666:           *bufJ++ = cols[l];
3667:         }
3668:         MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);
3669:       }
3670:       MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
3671:     }

3673:     /* recvs and sends of j-array are completed */
3674:     i = nrecvs;
3675:     while (i--) {
3676:       MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3677:     }
3678:     if (nsends) {
3679:       MPI_Waitall(nsends,swaits,sstatus);
3680:     }
3681:   } else if (scall == MAT_REUSE_MATRIX){
3682:     sstartsj = *startsj;
3683:     rstartsj = sstartsj + nsends +1;
3684:     bufa     = *bufa_ptr;
3685:     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3686:     b_otha   = b_oth->a;
3687:   } else {
3688:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3689:   }

3691:   /* a-array */
3692:   /*---------*/
3693:   /*  post receives of a-array */
3694:   for (i=0; i<nrecvs; i++){
3695:     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3696:     MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
3697:   }
3698:   k = 0;
3699:   for (i=0; i<nsends; i++){
3700:     nrows = sstarts[i+1]-sstarts[i];
3701:     bufA = bufa+sstartsj[i];
3702:     for (j=0; j<nrows; j++) {
3703:       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3704:       MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);
3705:       for (l=0; l<ncols; l++){
3706:         *bufA++ = vals[l];
3707:       }
3708:       MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);

3710:     }
3711:     MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
3712:   }
3713:   /* recvs and sends of a-array are completed */
3714:   i = nrecvs;
3715:   while (i--) {
3716:     MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3717:   }
3718:    if (nsends) {
3719:     MPI_Waitall(nsends,swaits,sstatus);
3720:   }
3721: 
3722:   if (scall == MAT_INITIAL_MATRIX){
3723:     /* put together the new matrix */
3724:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);

3726:     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3727:     /* Since these are PETSc arrays, change flags to free them as necessary. */
3728:     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3729:     b_oth->freedata = PETSC_TRUE;
3730:     b_oth->nonew    = 0;

3732:     PetscFree(bufj);
3733:     if (!startsj || !bufa_ptr){
3734:       PetscFree(sstartsj);
3735:       PetscFree(bufa_ptr);
3736:     } else {
3737:       *startsj  = sstartsj;
3738:       *bufa_ptr = bufa;
3739:     }
3740:   }
3741:   PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);
3742: 
3743:   return(0);
3744: }

3746: /*MC
3747:    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.

3749:    Options Database Keys:
3750: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()

3752:   Level: beginner

3754: .seealso: MatCreateMPIAIJ
3755: M*/

3760: PetscErrorCode MatCreate_MPIAIJ(Mat B)
3761: {
3762:   Mat_MPIAIJ     *b;
3764:   PetscInt       i;
3765:   PetscMPIInt    size;

3768:   MPI_Comm_size(B->comm,&size);

3770:   PetscNew(Mat_MPIAIJ,&b);
3771:   B->data         = (void*)b;
3772:   PetscMemzero(b,sizeof(Mat_MPIAIJ));
3773:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3774:   B->factor       = 0;
3775:   B->assembled    = PETSC_FALSE;
3776:   B->mapping      = 0;

3778:   B->insertmode      = NOT_SET_VALUES;
3779:   b->size            = size;
3780:   MPI_Comm_rank(B->comm,&b->rank);

3782:   PetscSplitOwnership(B->comm,&B->m,&B->M);
3783:   PetscSplitOwnership(B->comm,&B->n,&B->N);

3785:   /* the information in the maps duplicates the information computed below, eventually 
3786:      we should remove the duplicate information that is not contained in the maps */
3787:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
3788:   PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);

3790:   /* build local table of row and column ownerships */
3791:   PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);
3792:   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3793:   b->cowners = b->rowners + b->size + 2;
3794:   MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);
3795:   b->rowners[0] = 0;
3796:   for (i=2; i<=b->size; i++) {
3797:     b->rowners[i] += b->rowners[i-1];
3798:   }
3799:   b->rstart = b->rowners[b->rank];
3800:   b->rend   = b->rowners[b->rank+1];
3801:   MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);
3802:   b->cowners[0] = 0;
3803:   for (i=2; i<=b->size; i++) {
3804:     b->cowners[i] += b->cowners[i-1];
3805:   }
3806:   b->cstart = b->cowners[b->rank];
3807:   b->cend   = b->cowners[b->rank+1];

3809:   /* build cache for off array entries formed */
3810:   MatStashCreate_Private(B->comm,1,&B->stash);
3811:   b->donotstash  = PETSC_FALSE;
3812:   b->colmap      = 0;
3813:   b->garray      = 0;
3814:   b->roworiented = PETSC_TRUE;

3816:   /* stuff used for matrix vector multiply */
3817:   b->lvec      = PETSC_NULL;
3818:   b->Mvctx     = PETSC_NULL;

3820:   /* stuff for MatGetRow() */
3821:   b->rowindices   = 0;
3822:   b->rowvalues    = 0;
3823:   b->getrowactive = PETSC_FALSE;

3825:   /* Explicitly create 2 MATSEQAIJ matrices. */
3826:   MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);
3827:   MatSetType(b->A,MATSEQAIJ);
3828:   PetscLogObjectParent(B,b->A);
3829:   MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);
3830:   MatSetType(b->B,MATSEQAIJ);
3831:   PetscLogObjectParent(B,b->B);

3833:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3834:                                      "MatStoreValues_MPIAIJ",
3835:                                      MatStoreValues_MPIAIJ);
3836:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3837:                                      "MatRetrieveValues_MPIAIJ",
3838:                                      MatRetrieveValues_MPIAIJ);
3839:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3840:                                      "MatGetDiagonalBlock_MPIAIJ",
3841:                                      MatGetDiagonalBlock_MPIAIJ);
3842:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3843:                                      "MatIsTranspose_MPIAIJ",
3844:                                      MatIsTranspose_MPIAIJ);
3845:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3846:                                      "MatMPIAIJSetPreallocation_MPIAIJ",
3847:                                      MatMPIAIJSetPreallocation_MPIAIJ);
3848:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3849:                                      "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3850:                                      MatMPIAIJSetPreallocationCSR_MPIAIJ);
3851:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3852:                                      "MatDiagonalScaleLocal_MPIAIJ",
3853:                                      MatDiagonalScaleLocal_MPIAIJ);
3854:   return(0);
3855: }