Actual source code: mpiaij.c

  1: /*$Id: mpiaij.c,v 1.334 2001/04/10 19:35:25 bsmith Exp $*/

  3: #include "src/mat/impls/aij/mpi/mpiaij.h"
  4: #include "src/vec/vecimpl.h"
  5: #include "src/inline/spops.h"

  7: EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
  8: EXTERN int DisAssemble_MPIAIJ(Mat);
  9: EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
 10: EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
 11: EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
 12: EXTERN int MatPrintHelp_SeqAIJ(Mat);

 14: /* 
 15:   Local utility routine that creates a mapping from the global column 
 16: number to the local number in the off-diagonal part of the local 
 17: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at 
 18: a slightly higher hash table cost; without it it is not scalable (each processor
 19: has an order N integer array but is fast to acess.
 20: */
 21: int CreateColmap_MPIAIJ_Private(Mat mat)
 22: {
 23:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
 24:   int        n = aij->B->n,i,ierr;

 27: #if defined (PETSC_USE_CTABLE)
 28:   PetscTableCreate(n,&aij->colmap);
 29:   for (i=0; i<n; i++){
 30:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
 31:   }
 32: #else
 33:   PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
 34:   PetscLogObjectMemory(mat,mat->N*sizeof(int));
 35:   PetscMemzero(aij->colmap,mat->N*sizeof(int));
 36:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
 37: #endif
 38:   return(0);
 39: }

 41: #define CHUNKSIZE   15
 42: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) 
 43: { 
 44:  
 45:     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; 
 46:     rmax = aimax[row]; nrow = ailen[row];  
 47:     col1 = col - shift; 
 48:      
 49:     low = 0; high = nrow; 
 50:     while (high-low > 5) { 
 51:       t = (low+high)/2; 
 52:       if (rp[t] > col) high = t; 
 53:       else             low  = t; 
 54:     } 
 55:       for (_i=low; _i<high; _i++) { 
 56:         if (rp[_i] > col1) break; 
 57:         if (rp[_i] == col1) { 
 58:           if (addv == ADD_VALUES) ap[_i] += value;   
 59:           else                  ap[_i] = value; 
 60:           goto a_noinsert; 
 61:         } 
 62:       }  
 63:       if (nonew == 1) goto a_noinsert; 
 64:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
 65:       if (nrow >= rmax) { 
 66:         /* there is no extra room in row, therefore enlarge */ 
 67:         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; 
 68:         Scalar *new_a; 
 69:  
 70:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
 71:  
 72:         /* malloc new storage space */ 
 73:         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); 
 74:         ierr    = PetscMalloc(len,&new_a); 
 75:         new_j   = (int*)(new_a + new_nz); 
 76:         new_i   = new_j + new_nz; 
 77:  
 78:         /* copy over old data into new slots */ 
 79:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 
 80:         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 
 81:         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); 
 82:         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); 
 83:         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, 
 84:                                                            len*sizeof(int)); 
 85:         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); 
 86:         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, 
 87:                                                            len*sizeof(Scalar));  
 88:         /* free up old matrix storage */ 
 89:  
 90:         PetscFree(a->a);  
 91:         if (!a->singlemalloc) { 
 92:            PetscFree(a->i); 
 93:            PetscFree(a->j); 
 94:         } 
 95:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  
 96:         a->singlemalloc = PETSC_TRUE; 
 97:  
 98:         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; 
 99:         rmax = aimax[row] = aimax[row] + CHUNKSIZE; 
100:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); 
101:         a->maxnz += CHUNKSIZE; 
102:         a->reallocs++; 
103:       } 
104:       N = nrow++ - 1; a->nz++; 
105:       /* shift up all the later entries in this row */ 
106:       for (ii=N; ii>=_i; ii--) { 
107:         rp[ii+1] = rp[ii]; 
108:         ap[ii+1] = ap[ii]; 
109:       } 
110:       rp[_i] = col1;  
111:       ap[_i] = value;  
112:       a_noinsert: ; 
113:       ailen[row] = nrow; 
114: } 

116: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) 
117: { 
118:  
119:     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; 
120:     rmax = bimax[row]; nrow = bilen[row];  
121:     col1 = col - shift; 
122:      
123:     low = 0; high = nrow; 
124:     while (high-low > 5) { 
125:       t = (low+high)/2; 
126:       if (rp[t] > col) high = t; 
127:       else             low  = t; 
128:     } 
129:        for (_i=low; _i<high; _i++) { 
130:         if (rp[_i] > col1) break; 
131:         if (rp[_i] == col1) { 
132:           if (addv == ADD_VALUES) ap[_i] += value;   
133:           else                  ap[_i] = value; 
134:           goto b_noinsert; 
135:         } 
136:       }  
137:       if (nonew == 1) goto b_noinsert; 
138:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
139:       if (nrow >= rmax) { 
140:         /* there is no extra room in row, therefore enlarge */ 
141:         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; 
142:         Scalar *new_a; 
143:  
144:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
145:  
146:         /* malloc new storage space */ 
147:         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); 
148:         ierr    = PetscMalloc(len,&new_a); 
149:         new_j   = (int*)(new_a + new_nz); 
150:         new_i   = new_j + new_nz; 
151:  
152:         /* copy over old data into new slots */ 
153:         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} 
154:         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} 
155:         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); 
156:         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); 
157:         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, 
158:                                                            len*sizeof(int)); 
159:         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); 
160:         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, 
161:                                                            len*sizeof(Scalar));  
162:         /* free up old matrix storage */ 
163:  
164:         PetscFree(b->a);  
165:         if (!b->singlemalloc) { 
166:           PetscFree(b->i); 
167:           PetscFree(b->j); 
168:         } 
169:         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  
170:         b->singlemalloc = PETSC_TRUE; 
171:  
172:         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; 
173:         rmax = bimax[row] = bimax[row] + CHUNKSIZE; 
174:         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); 
175:         b->maxnz += CHUNKSIZE; 
176:         b->reallocs++; 
177:       } 
178:       N = nrow++ - 1; b->nz++; 
179:       /* shift up all the later entries in this row */ 
180:       for (ii=N; ii>=_i; ii--) { 
181:         rp[ii+1] = rp[ii]; 
182:         ap[ii+1] = ap[ii]; 
183:       } 
184:       rp[_i] = col1;  
185:       ap[_i] = value;  
186:       b_noinsert: ; 
187:       bilen[row] = nrow; 
188: }

190: int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
191: {
192:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
193:   Scalar     value;
194:   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
195:   int        cstart = aij->cstart,cend = aij->cend,row,col;
196:   PetscTruth roworiented = aij->roworiented;

198:   /* Some Variables required in the macro */
199:   Mat        A = aij->A;
200:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
201:   int        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
202:   Scalar     *aa = a->a;
203:   PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
204:   Mat        B = aij->B;
205:   Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
206:   int        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
207:   Scalar     *ba = b->a;

209:   int        *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
210:   int        nonew = a->nonew,shift = a->indexshift;
211:   Scalar     *ap;

214:   for (i=0; i<m; i++) {
215:     if (im[i] < 0) continue;
216: #if defined(PETSC_USE_BOPT_g)
217:     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
218: #endif
219:     if (im[i] >= rstart && im[i] < rend) {
220:       row = im[i] - rstart;
221:       for (j=0; j<n; j++) {
222:         if (in[j] >= cstart && in[j] < cend){
223:           col = in[j] - cstart;
224:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
225:           if (ignorezeroentries && value == 0.0) continue;
226:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
227:           /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
228:         } else if (in[j] < 0) continue;
229: #if defined(PETSC_USE_BOPT_g)
230:         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
231: #endif
232:         else {
233:           if (mat->was_assembled) {
234:             if (!aij->colmap) {
235:               CreateColmap_MPIAIJ_Private(mat);
236:             }
237: #if defined (PETSC_USE_CTABLE)
238:             PetscTableFind(aij->colmap,in[j]+1,&col);
239:             col--;
240: #else
241:             col = aij->colmap[in[j]] - 1;
242: #endif
243:             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
244:               DisAssemble_MPIAIJ(mat);
245:               col =  in[j];
246:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
247:               B = aij->B;
248:               b = (Mat_SeqAIJ*)B->data;
249:               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
250:               ba = b->a;
251:             }
252:           } else col = in[j];
253:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254:           if (ignorezeroentries && value == 0.0) continue;
255:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
256:           /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
257:         }
258:       }
259:     } else {
260:       if (!aij->donotstash) {
261:         if (roworiented) {
262:           if (ignorezeroentries && v[i*n] == 0.0) continue;
263:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
264:         } else {
265:           if (ignorezeroentries && v[i] == 0.0) continue;
266:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
267:         }
268:       }
269:     }
270:   }
271:   return(0);
272: }

274: int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
275: {
276:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
277:   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
278:   int        cstart = aij->cstart,cend = aij->cend,row,col;

281:   for (i=0; i<m; i++) {
282:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
283:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
284:     if (idxm[i] >= rstart && idxm[i] < rend) {
285:       row = idxm[i] - rstart;
286:       for (j=0; j<n; j++) {
287:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
288:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
289:         if (idxn[j] >= cstart && idxn[j] < cend){
290:           col = idxn[j] - cstart;
291:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
292:         } else {
293:           if (!aij->colmap) {
294:             CreateColmap_MPIAIJ_Private(mat);
295:           }
296: #if defined (PETSC_USE_CTABLE)
297:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
298:           col --;
299: #else
300:           col = aij->colmap[idxn[j]] - 1;
301: #endif
302:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303:           else {
304:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
305:           }
306:         }
307:       }
308:     } else {
309:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310:     }
311:   }
312:   return(0);
313: }

315: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316: {
317:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
318:   int         ierr,nstash,reallocs;
319:   InsertMode  addv;

322:   if (aij->donotstash) {
323:     return(0);
324:   }

326:   /* make sure all processors are either in INSERTMODE or ADDMODE */
327:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
328:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
329:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
330:   }
331:   mat->insertmode = addv; /* in case this processor had no cache */

333:   MatStashScatterBegin_Private(&mat->stash,aij->rowners);
334:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
335:   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
336:   return(0);
337: }


340: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
341: {
342:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
343:   int         i,j,rstart,ncols,n,ierr,flg;
344:   int         *row,*col,other_disassembled;
345:   Scalar      *val;
346:   InsertMode  addv = mat->insertmode;

349:   if (!aij->donotstash) {
350:     while (1) {
351:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
352:       if (!flg) break;

354:       for (i=0; i<n;) {
355:         /* Now identify the consecutive vals belonging to the same row */
356:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
357:         if (j < n) ncols = j-i;
358:         else       ncols = n-i;
359:         /* Now assemble all these values with a single function call */
360:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
361:         i = j;
362:       }
363:     }
364:     MatStashScatterEnd_Private(&mat->stash);
365:   }
366: 
367:   MatAssemblyBegin(aij->A,mode);
368:   MatAssemblyEnd(aij->A,mode);

370:   /* determine if any processor has disassembled, if so we must 
371:      also disassemble ourselfs, in order that we may reassemble. */
372:   /*
373:      if nonzero structure of submatrix B cannot change then we know that
374:      no processor disassembled thus we can skip this stuff
375:   */
376:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
377:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
378:     if (mat->was_assembled && !other_disassembled) {
379:       DisAssemble_MPIAIJ(mat);
380:     }
381:   }

383:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
384:     MatSetUpMultiply_MPIAIJ(mat);
385:   }
386:   MatAssemblyBegin(aij->B,mode);
387:   MatAssemblyEnd(aij->B,mode);

389:   if (aij->rowvalues) {
390:     PetscFree(aij->rowvalues);
391:     aij->rowvalues = 0;
392:   }
393:   return(0);
394: }

396: int MatZeroEntries_MPIAIJ(Mat A)
397: {
398:   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
399:   int        ierr;

402:   MatZeroEntries(l->A);
403:   MatZeroEntries(l->B);
404:   return(0);
405: }

407: int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
408: {
409:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
410:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
411:   int            *procs,*nprocs,j,idx,nsends,*work,row;
412:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
413:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
414:   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
415:   MPI_Comm       comm = A->comm;
416:   MPI_Request    *send_waits,*recv_waits;
417:   MPI_Status     recv_status,*send_status;
418:   IS             istmp;
419:   PetscTruth     found;

422:   ISGetLocalSize(is,&N);
423:   ISGetIndices(is,&rows);

425:   /*  first count number of contributors to each processor */
426:   PetscMalloc(2*size*sizeof(int),&nprocs);
427:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
428:   procs  = nprocs + size;
429:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
430:   for (i=0; i<N; i++) {
431:     idx = rows[i];
432:     found = PETSC_FALSE;
433:     for (j=0; j<size; j++) {
434:       if (idx >= owners[j] && idx < owners[j+1]) {
435:         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
436:       }
437:     }
438:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
439:   }
440:   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}

442:   /* inform other processors of number of messages and max length*/
443:   PetscMalloc(2*size*sizeof(int),&work);
444:   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
445:   nrecvs = work[size+rank];
446:   nmax   = work[rank];
447:   ierr   = PetscFree(work);

449:   /* post receives:   */
450:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
451:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
452:   for (i=0; i<nrecvs; i++) {
453:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
454:   }

456:   /* do sends:
457:       1) starts[i] gives the starting index in svalues for stuff going to 
458:          the ith processor
459:   */
460:   PetscMalloc((N+1)*sizeof(int),&svalues);
461:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
462:   PetscMalloc((size+1)*sizeof(int),&starts);
463:   starts[0] = 0;
464:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
465:   for (i=0; i<N; i++) {
466:     svalues[starts[owner[i]]++] = rows[i];
467:   }
468:   ISRestoreIndices(is,&rows);

470:   starts[0] = 0;
471:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
472:   count = 0;
473:   for (i=0; i<size; i++) {
474:     if (procs[i]) {
475:       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
476:     }
477:   }
478:   PetscFree(starts);

480:   base = owners[rank];

482:   /*  wait on receives */
483:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
484:   source = lens + nrecvs;
485:   count  = nrecvs; slen = 0;
486:   while (count) {
487:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
488:     /* unpack receives into our local space */
489:     MPI_Get_count(&recv_status,MPI_INT,&n);
490:     source[imdex]  = recv_status.MPI_SOURCE;
491:     lens[imdex]    = n;
492:     slen          += n;
493:     count--;
494:   }
495:   PetscFree(recv_waits);
496: 
497:   /* move the data into the send scatter */
498:   PetscMalloc((slen+1)*sizeof(int),&lrows);
499:   count = 0;
500:   for (i=0; i<nrecvs; i++) {
501:     values = rvalues + i*nmax;
502:     for (j=0; j<lens[i]; j++) {
503:       lrows[count++] = values[j] - base;
504:     }
505:   }
506:   PetscFree(rvalues);
507:   PetscFree(lens);
508:   PetscFree(owner);
509:   PetscFree(nprocs);
510: 
511:   /* actually zap the local rows */
512:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
513:   PetscLogObjectParent(A,istmp);

515:   /*
516:         Zero the required rows. If the "diagonal block" of the matrix
517:      is square and the user wishes to set the diagonal we use seperate
518:      code so that MatSetValues() is not called for each diagonal allocating
519:      new memory, thus calling lots of mallocs and slowing things down.

521:        Contributed by: Mathew Knepley
522:   */
523:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
524:   MatZeroRows(l->B,istmp,0);
525:   if (diag && (l->A->M == l->A->N)) {
526:     ierr      = MatZeroRows(l->A,istmp,diag);
527:   } else if (diag) {
528:     MatZeroRows(l->A,istmp,0);
529:     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
530:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat optionsn
531: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
532:     }
533:     for (i = 0; i < slen; i++) {
534:       row  = lrows[i] + rstart;
535:       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
536:     }
537:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
538:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
539:   } else {
540:     MatZeroRows(l->A,istmp,0);
541:   }
542:   ISDestroy(istmp);
543:   PetscFree(lrows);

545:   /* wait on sends */
546:   if (nsends) {
547:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
548:     MPI_Waitall(nsends,send_waits,send_status);
549:     PetscFree(send_status);
550:   }
551:   PetscFree(send_waits);
552:   PetscFree(svalues);

554:   return(0);
555: }

557: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
558: {
559:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
560:   int        ierr,nt;

563:   VecGetLocalSize(xx,&nt);
564:   if (nt != A->n) {
565:     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
566:   }
567:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
568:   (*a->A->ops->mult)(a->A,xx,yy);
569:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
570:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
571:   return(0);
572: }

574: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
575: {
576:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
577:   int        ierr;

580:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
581:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
582:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
583:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
584:   return(0);
585: }

587: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
588: {
589:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
590:   int        ierr;

593:   /* do nondiagonal part */
594:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
595:   /* send it on its way */
596:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
597:   /* do local part */
598:   (*a->A->ops->multtranspose)(a->A,xx,yy);
599:   /* receive remote parts: note this assumes the values are not actually */
600:   /* inserted in yy until the next line, which is true for my implementation*/
601:   /* but is not perhaps always true. */
602:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
603:   return(0);
604: }

606: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
607: {
608:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609:   int        ierr;

612:   /* do nondiagonal part */
613:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
614:   /* send it on its way */
615:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
616:   /* do local part */
617:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
618:   /* receive remote parts: note this assumes the values are not actually */
619:   /* inserted in yy until the next line, which is true for my implementation*/
620:   /* but is not perhaps always true. */
621:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
622:   return(0);
623: }

625: /*
626:   This only works correctly for square matrices where the subblock A->A is the 
627:    diagonal block
628: */
629: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
630: {
631:   int        ierr;
632:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

635:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
636:   if (a->rstart != a->cstart || a->rend != a->cend) {
637:     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
638:   }
639:   MatGetDiagonal(a->A,v);
640:   return(0);
641: }

643: int MatScale_MPIAIJ(Scalar *aa,Mat A)
644: {
645:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
646:   int        ierr;

649:   MatScale(aa,a->A);
650:   MatScale(aa,a->B);
651:   return(0);
652: }

654: int MatDestroy_MPIAIJ(Mat mat)
655: {
656:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
657:   int        ierr;

660: #if defined(PETSC_USE_LOG)
661:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
662: #endif
663:   MatStashDestroy_Private(&mat->stash);
664:   PetscFree(aij->rowners);
665:   MatDestroy(aij->A);
666:   MatDestroy(aij->B);
667: #if defined (PETSC_USE_CTABLE)
668:   if (aij->colmap) {PetscTableDelete(aij->colmap);}
669: #else
670:   if (aij->colmap) {PetscFree(aij->colmap);}
671: #endif
672:   if (aij->garray) {PetscFree(aij->garray);}
673:   if (aij->lvec)   {VecDestroy(aij->lvec);}
674:   if (aij->Mvctx)  {VecScatterDestroy(aij->Mvctx);}
675:   if (aij->rowvalues) {PetscFree(aij->rowvalues);}
676:   PetscFree(aij);
677:   return(0);
678: }

680: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
681: {
682:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
683:   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
684:   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
685:   PetscTruth        isdraw,isascii,flg;
686:   PetscViewer       sviewer;
687:   PetscViewerFormat format;

690:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
691:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
692:   if (isascii) {
693:     PetscViewerGetFormat(viewer,&format);
694:     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
695:       MatInfo info;
696:       MPI_Comm_rank(mat->comm,&rank);
697:       MatGetInfo(mat,MAT_LOCAL,&info);
698:       PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
699:       if (flg) {
700:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routinesn",
701:                                               rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
702:       } else {
703:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routinesn",
704:                     rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
705:       }
706:       MatGetInfo(aij->A,MAT_LOCAL,&info);
707:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used);
708:       MatGetInfo(aij->B,MAT_LOCAL,&info);
709:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used);
710:       PetscViewerFlush(viewer);
711:       VecScatterView(aij->Mvctx,viewer);
712:       return(0);
713:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
714:       return(0);
715:     }
716:   } else if (isdraw) {
717:     PetscDraw       draw;
718:     PetscTruth isnull;
719:     PetscViewerDrawGetDraw(viewer,0,&draw);
720:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
721:   }

723:   if (size == 1) {
724:     MatView(aij->A,viewer);
725:   } else {
726:     /* assemble the entire matrix onto first processor. */
727:     Mat         A;
728:     Mat_SeqAIJ *Aloc;
729:     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
730:     Scalar      *a;

732:     if (!rank) {
733:       MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
734:     } else {
735:       MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
736:     }
737:     PetscLogObjectParent(mat,A);

739:     /* copy over the A part */
740:     Aloc = (Mat_SeqAIJ*)aij->A->data;
741:     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
742:     row = aij->rstart;
743:     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
744:     for (i=0; i<m; i++) {
745:       MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
746:       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
747:     }
748:     aj = Aloc->j;
749:     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}

751:     /* copy over the B part */
752:     Aloc = (Mat_SeqAIJ*)aij->B->data;
753:     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
754:     row  = aij->rstart;
755:     PetscMalloc((ai[m]+1)*sizeof(int),&cols);
756:     ct   = cols;
757:     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
758:     for (i=0; i<m; i++) {
759:       MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
760:       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
761:     }
762:     PetscFree(ct);
763:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
764:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
765:     /* 
766:        Everyone has to call to draw the matrix since the graphics waits are
767:        synchronized across all processors that share the PetscDraw object
768:     */
769:     PetscViewerGetSingleton(viewer,&sviewer);
770:     if (!rank) {
771:       MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
772:     }
773:     PetscViewerRestoreSingleton(viewer,&sviewer);
774:     MatDestroy(A);
775:   }
776:   return(0);
777: }

779: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
780: {
781:   int        ierr;
782:   PetscTruth isascii,isdraw,issocket,isbinary;
783: 
785:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
786:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
787:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
788:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
789:   if (isascii || isdraw || isbinary || issocket) {
790:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
791:   } else {
792:     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
793:   }
794:   return(0);
795: }

797: /*
798:     This has to provide several versions.

800:      2) a) use only local smoothing updating outer values only once.
801:         b) local smoothing updating outer values each inner iteration
802:      3) color updating out values betwen colors.
803: */
804: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
805: {
806:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
807:   Mat        AA = mat->A,BB = mat->B;
808:   Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
809:   Scalar     *b,*x,*xs,*ls,d,*v,sum;
810:   int        ierr,*idx,*diag;
811:   int        n = matin->n,m = matin->m,i,shift = A->indexshift;

814:   if (!A->diag) {MatMarkDiagonal_SeqAIJ(AA);}
815:   diag = A->diag;
816:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
817:     if (flag & SOR_ZERO_INITIAL_GUESS) {
818:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);
819:       return(0);
820:     }
821:     VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
822:     VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
823:     VecGetArray(xx,&x);
824:     if (xx != bb) {
825:       VecGetArray(bb,&b);
826:     } else {
827:       b = x;
828:     }
829:     VecGetArray(mat->lvec,&ls);
830:     xs = x + shift; /* shift by one for index start of 1 */
831:     ls = ls + shift;
832:     while (its--) {
833:       /* go down through the rows */
834:       for (i=0; i<m; i++) {
835:         n    = A->i[i+1] - A->i[i];
836:         PetscLogFlops(4*n+3);
837:         idx  = A->j + A->i[i] + shift;
838:         v    = A->a + A->i[i] + shift;
839:         sum  = b[i];
840:         SPARSEDENSEMDOT(sum,xs,v,idx,n);
841:         d    = fshift + A->a[diag[i]+shift];
842:         n    = B->i[i+1] - B->i[i];
843:         idx  = B->j + B->i[i] + shift;
844:         v    = B->a + B->i[i] + shift;
845:         SPARSEDENSEMDOT(sum,ls,v,idx,n);
846:         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
847:       }
848:       /* come up through the rows */
849:       for (i=m-1; i>-1; i--) {
850:         n    = A->i[i+1] - A->i[i];
851:         PetscLogFlops(4*n+3);
852:         idx  = A->j + A->i[i] + shift;
853:         v    = A->a + A->i[i] + shift;
854:         sum  = b[i];
855:         SPARSEDENSEMDOT(sum,xs,v,idx,n);
856:         d    = fshift + A->a[diag[i]+shift];
857:         n    = B->i[i+1] - B->i[i];
858:         idx  = B->j + B->i[i] + shift;
859:         v    = B->a + B->i[i] + shift;
860:         SPARSEDENSEMDOT(sum,ls,v,idx,n);
861:         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
862:       }
863:     }
864:     VecRestoreArray(xx,&x);
865:     if (bb != xx) {VecRestoreArray(bb,&b); }
866:     VecRestoreArray(mat->lvec,&ls);
867:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
868:     if (flag & SOR_ZERO_INITIAL_GUESS) {
869:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);
870:       return(0);
871:     }
872:     VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
873:     VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
874:     VecGetArray(xx,&x);
875:     if (xx != bb) {
876:       VecGetArray(bb,&b);
877:     } else {
878:       b = x;
879:     }
880:     VecGetArray(mat->lvec,&ls);
881:     xs = x + shift; /* shift by one for index start of 1 */
882:     ls = ls + shift;
883:     while (its--) {
884:       for (i=0; i<m; i++) {
885:         n    = A->i[i+1] - A->i[i];
886:         PetscLogFlops(4*n+3);
887:         idx  = A->j + A->i[i] + shift;
888:         v    = A->a + A->i[i] + shift;
889:         sum  = b[i];
890:         SPARSEDENSEMDOT(sum,xs,v,idx,n);
891:         d    = fshift + A->a[diag[i]+shift];
892:         n    = B->i[i+1] - B->i[i];
893:         idx  = B->j + B->i[i] + shift;
894:         v    = B->a + B->i[i] + shift;
895:         SPARSEDENSEMDOT(sum,ls,v,idx,n);
896:         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
897:       }
898:     }
899:     VecRestoreArray(xx,&x);
900:     if (bb != xx) {VecRestoreArray(bb,&b); }
901:     VecRestoreArray(mat->lvec,&ls);
902:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
903:     if (flag & SOR_ZERO_INITIAL_GUESS) {
904:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);
905:       return(0);
906:     }
907:     VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
908:     VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
909:     VecGetArray(xx,&x);
910:     if (xx != bb) {
911:       VecGetArray(bb,&b);
912:     } else {
913:       b = x;
914:     }
915:     VecGetArray(mat->lvec,&ls);
916:     xs = x + shift; /* shift by one for index start of 1 */
917:     ls = ls + shift;
918:     while (its--) {
919:       for (i=m-1; i>-1; i--) {
920:         n    = A->i[i+1] - A->i[i];
921:         PetscLogFlops(4*n+3);
922:         idx  = A->j + A->i[i] + shift;
923:         v    = A->a + A->i[i] + shift;
924:         sum  = b[i];
925:         SPARSEDENSEMDOT(sum,xs,v,idx,n);
926:         d    = fshift + A->a[diag[i]+shift];
927:         n    = B->i[i+1] - B->i[i];
928:         idx  = B->j + B->i[i] + shift;
929:         v    = B->a + B->i[i] + shift;
930:         SPARSEDENSEMDOT(sum,ls,v,idx,n);
931:         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
932:       }
933:     }
934:     VecRestoreArray(xx,&x);
935:     if (bb != xx) {VecRestoreArray(bb,&b); }
936:     VecRestoreArray(mat->lvec,&ls);
937:   } else {
938:     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
939:   }
940:   return(0);
941: }

943: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
944: {
945:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
946:   Mat        A = mat->A,B = mat->B;
947:   int        ierr;
948:   PetscReal  isend[5],irecv[5];

951:   info->block_size     = 1.0;
952:   MatGetInfo(A,MAT_LOCAL,info);
953:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
954:   isend[3] = info->memory;  isend[4] = info->mallocs;
955:   MatGetInfo(B,MAT_LOCAL,info);
956:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
957:   isend[3] += info->memory;  isend[4] += info->mallocs;
958:   if (flag == MAT_LOCAL) {
959:     info->nz_used      = isend[0];
960:     info->nz_allocated = isend[1];
961:     info->nz_unneeded  = isend[2];
962:     info->memory       = isend[3];
963:     info->mallocs      = isend[4];
964:   } else if (flag == MAT_GLOBAL_MAX) {
965:     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
966:     info->nz_used      = irecv[0];
967:     info->nz_allocated = irecv[1];
968:     info->nz_unneeded  = irecv[2];
969:     info->memory       = irecv[3];
970:     info->mallocs      = irecv[4];
971:   } else if (flag == MAT_GLOBAL_SUM) {
972:     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
973:     info->nz_used      = irecv[0];
974:     info->nz_allocated = irecv[1];
975:     info->nz_unneeded  = irecv[2];
976:     info->memory       = irecv[3];
977:     info->mallocs      = irecv[4];
978:   }
979:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
980:   info->fill_ratio_needed = 0;
981:   info->factor_mallocs    = 0;
982:   info->rows_global       = (double)matin->M;
983:   info->columns_global    = (double)matin->N;
984:   info->rows_local        = (double)matin->m;
985:   info->columns_local     = (double)matin->N;

987:   return(0);
988: }

990: int MatSetOption_MPIAIJ(Mat A,MatOption op)
991: {
992:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
993:   int        ierr;

996:   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
997:       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
998:       op == MAT_COLUMNS_UNSORTED ||
999:       op == MAT_COLUMNS_SORTED ||
1000:       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1001:       op == MAT_KEEP_ZEROED_ROWS ||
1002:       op == MAT_NEW_NONZERO_LOCATION_ERR ||
1003:       op == MAT_USE_INODES ||
1004:       op == MAT_DO_NOT_USE_INODES ||
1005:       op == MAT_IGNORE_ZERO_ENTRIES) {
1006:         MatSetOption(a->A,op);
1007:         MatSetOption(a->B,op);
1008:   } else if (op == MAT_ROW_ORIENTED) {
1009:     a->roworiented = PETSC_TRUE;
1010:     MatSetOption(a->A,op);
1011:     MatSetOption(a->B,op);
1012:   } else if (op == MAT_ROWS_SORTED ||
1013:              op == MAT_ROWS_UNSORTED ||
1014:              op == MAT_YES_NEW_DIAGONALS) {
1015:     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignoredn");
1016:   } else if (op == MAT_COLUMN_ORIENTED) {
1017:     a->roworiented = PETSC_FALSE;
1018:     MatSetOption(a->A,op);
1019:     MatSetOption(a->B,op);
1020:   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1021:     a->donotstash = PETSC_TRUE;
1022:   } else if (op == MAT_NO_NEW_DIAGONALS){
1023:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1024:   } else {
1025:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1026:   }
1027:   return(0);
1028: }

1030: int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1031: {
1032:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;

1035:   if (m) *m = mat->rstart;
1036:   if (n) *n = mat->rend;
1037:   return(0);
1038: }

1040: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1041: {
1042:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1043:   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1044:   int        i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1045:   int        nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1046:   int        *cmap,*idx_p;

1049:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1050:   mat->getrowactive = PETSC_TRUE;

1052:   if (!mat->rowvalues && (idx || v)) {
1053:     /*
1054:         allocate enough space to hold information from the longest row.
1055:     */
1056:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1057:     int     max = 1,tmp;
1058:     for (i=0; i<matin->m; i++) {
1059:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1060:       if (max < tmp) { max = tmp; }
1061:     }
1062:     PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);
1063:     mat->rowindices = (int*)(mat->rowvalues + max);
1064:   }

1066:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1067:   lrow = row - rstart;

1069:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1070:   if (!v)   {pvA = 0; pvB = 0;}
1071:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1072:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1073:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1074:   nztot = nzA + nzB;

1076:   cmap  = mat->garray;
1077:   if (v  || idx) {
1078:     if (nztot) {
1079:       /* Sort by increasing column numbers, assuming A and B already sorted */
1080:       int imark = -1;
1081:       if (v) {
1082:         *v = v_p = mat->rowvalues;
1083:         for (i=0; i<nzB; i++) {
1084:           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1085:           else break;
1086:         }
1087:         imark = i;
1088:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1089:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1090:       }
1091:       if (idx) {
1092:         *idx = idx_p = mat->rowindices;
1093:         if (imark > -1) {
1094:           for (i=0; i<imark; i++) {
1095:             idx_p[i] = cmap[cworkB[i]];
1096:           }
1097:         } else {
1098:           for (i=0; i<nzB; i++) {
1099:             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1100:             else break;
1101:           }
1102:           imark = i;
1103:         }
1104:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1105:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1106:       }
1107:     } else {
1108:       if (idx) *idx = 0;
1109:       if (v)   *v   = 0;
1110:     }
1111:   }
1112:   *nz = nztot;
1113:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1114:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1115:   return(0);
1116: }

1118: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1119: {
1120:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1123:   if (aij->getrowactive == PETSC_FALSE) {
1124:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1125:   }
1126:   aij->getrowactive = PETSC_FALSE;
1127:   return(0);
1128: }

1130: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1131: {
1132:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1133:   Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1134:   int        ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1135:   PetscReal  sum = 0.0;
1136:   Scalar     *v;

1139:   if (aij->size == 1) {
1140:      MatNorm(aij->A,type,norm);
1141:   } else {
1142:     if (type == NORM_FROBENIUS) {
1143:       v = amat->a;
1144:       for (i=0; i<amat->nz; i++) {
1145: #if defined(PETSC_USE_COMPLEX)
1146:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1147: #else
1148:         sum += (*v)*(*v); v++;
1149: #endif
1150:       }
1151:       v = bmat->a;
1152:       for (i=0; i<bmat->nz; i++) {
1153: #if defined(PETSC_USE_COMPLEX)
1154:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1155: #else
1156:         sum += (*v)*(*v); v++;
1157: #endif
1158:       }
1159:       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1160:       *norm = sqrt(*norm);
1161:     } else if (type == NORM_1) { /* max column norm */
1162:       PetscReal *tmp,*tmp2;
1163:       int    *jj,*garray = aij->garray;
1164:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1165:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1166:       PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1167:       *norm = 0.0;
1168:       v = amat->a; jj = amat->j;
1169:       for (j=0; j<amat->nz; j++) {
1170:         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1171:       }
1172:       v = bmat->a; jj = bmat->j;
1173:       for (j=0; j<bmat->nz; j++) {
1174:         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1175:       }
1176:       MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1177:       for (j=0; j<mat->N; j++) {
1178:         if (tmp2[j] > *norm) *norm = tmp2[j];
1179:       }
1180:       PetscFree(tmp);
1181:       PetscFree(tmp2);
1182:     } else if (type == NORM_INFINITY) { /* max row norm */
1183:       PetscReal ntemp = 0.0;
1184:       for (j=0; j<aij->A->m; j++) {
1185:         v = amat->a + amat->i[j] + shift;
1186:         sum = 0.0;
1187:         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1188:           sum += PetscAbsScalar(*v); v++;
1189:         }
1190:         v = bmat->a + bmat->i[j] + shift;
1191:         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1192:           sum += PetscAbsScalar(*v); v++;
1193:         }
1194:         if (sum > ntemp) ntemp = sum;
1195:       }
1196:       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1197:     } else {
1198:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1199:     }
1200:   }
1201:   return(0);
1202: }

1204: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1205: {
1206:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1207:   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1208:   int        ierr,shift = Aloc->indexshift;
1209:   int        M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1210:   Mat        B;
1211:   Scalar     *array;

1214:   if (!matout && M != N) {
1215:     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1216:   }

1218:   MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);

1220:   /* copy over the A part */
1221:   Aloc = (Mat_SeqAIJ*)a->A->data;
1222:   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1223:   row = a->rstart;
1224:   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1225:   for (i=0; i<m; i++) {
1226:     MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1227:     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1228:   }
1229:   aj = Aloc->j;
1230:   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}

1232:   /* copy over the B part */
1233:   Aloc = (Mat_SeqAIJ*)a->B->data;
1234:   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1235:   row  = a->rstart;
1236:   PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);
1237:   ct   = cols;
1238:   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1239:   for (i=0; i<m; i++) {
1240:     MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1241:     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1242:   }
1243:   PetscFree(ct);
1244:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1245:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1246:   if (matout) {
1247:     *matout = B;
1248:   } else {
1249:     MatHeaderCopy(A,B);
1250:   }
1251:   return(0);
1252: }

1254: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1255: {
1256:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1257:   Mat        a = aij->A,b = aij->B;
1258:   int        ierr,s1,s2,s3;

1261:   MatGetLocalSize(mat,&s2,&s3);
1262:   if (rr) {
1263:     VecGetLocalSize(rr,&s1);
1264:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1265:     /* Overlap communication with computation. */
1266:     VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1267:   }
1268:   if (ll) {
1269:     VecGetLocalSize(ll,&s1);
1270:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1271:     (*b->ops->diagonalscale)(b,ll,0);
1272:   }
1273:   /* scale  the diagonal block */
1274:   (*a->ops->diagonalscale)(a,ll,rr);

1276:   if (rr) {
1277:     /* Do a scatter end and then right scale the off-diagonal block */
1278:     VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1279:     (*b->ops->diagonalscale)(b,0,aij->lvec);
1280:   }
1281: 
1282:   return(0);
1283: }


1286: int MatPrintHelp_MPIAIJ(Mat A)
1287: {
1288:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1289:   int        ierr;

1292:   if (!a->rank) {
1293:     MatPrintHelp_SeqAIJ(a->A);
1294:   }
1295:   return(0);
1296: }

1298: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1299: {
1301:   *bs = 1;
1302:   return(0);
1303: }
1304: int MatSetUnfactored_MPIAIJ(Mat A)
1305: {
1306:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1307:   int        ierr;

1310:   MatSetUnfactored(a->A);
1311:   return(0);
1312: }

1314: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1315: {
1316:   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1317:   Mat        a,b,c,d;
1318:   PetscTruth flg;
1319:   int        ierr;

1322:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1323:   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1324:   a = matA->A; b = matA->B;
1325:   c = matB->A; d = matB->B;

1327:   MatEqual(a,c,&flg);
1328:   if (flg == PETSC_TRUE) {
1329:     MatEqual(b,d,&flg);
1330:   }
1331:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1332:   return(0);
1333: }

1335: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1336: {
1337:   int        ierr;
1338:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1339:   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1340:   PetscTruth flg;

1343:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1344:   if (str != SAME_NONZERO_PATTERN || !flg) {
1345:     /* because of the column compression in the off-processor part of the matrix a->B,
1346:        the number of columns in a->B and b->B may be different, hence we cannot call
1347:        the MatCopy() directly on the two parts. If need be, we can provide a more 
1348:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1349:        then copying the submatrices */
1350:     MatCopy_Basic(A,B,str);
1351:   } else {
1352:     MatCopy(a->A,b->A,str);
1353:     MatCopy(a->B,b->B,str);
1354:   }
1355:   return(0);
1356: }

1358: int MatSetUpPreallocation_MPIAIJ(Mat A)
1359: {
1360:   int        ierr;

1363:    MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1364:   return(0);
1365: }

1367: EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1368: EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1369: EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1370: EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1371: EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1372: #if !defined(PETSC_USE_COMPLEX)
1373: EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1374: #endif

1376: /* -------------------------------------------------------------------*/
1377: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1378:        MatGetRow_MPIAIJ,
1379:        MatRestoreRow_MPIAIJ,
1380:        MatMult_MPIAIJ,
1381:        MatMultAdd_MPIAIJ,
1382:        MatMultTranspose_MPIAIJ,
1383:        MatMultTransposeAdd_MPIAIJ,
1384:        0,
1385:        0,
1386:        0,
1387:        0,
1388:        0,
1389:        0,
1390:        MatRelax_MPIAIJ,
1391:        MatTranspose_MPIAIJ,
1392:        MatGetInfo_MPIAIJ,
1393:        MatEqual_MPIAIJ,
1394:        MatGetDiagonal_MPIAIJ,
1395:        MatDiagonalScale_MPIAIJ,
1396:        MatNorm_MPIAIJ,
1397:        MatAssemblyBegin_MPIAIJ,
1398:        MatAssemblyEnd_MPIAIJ,
1399:        0,
1400:        MatSetOption_MPIAIJ,
1401:        MatZeroEntries_MPIAIJ,
1402:        MatZeroRows_MPIAIJ,
1403: #if !defined(PETSC_USE_COMPLEX)
1404:                                        MatLUFactorSymbolic_MPIAIJ_TFS,
1405: #else
1406:        0,
1407: #endif
1408:        0,
1409:        0,
1410:        0,
1411:        MatSetUpPreallocation_MPIAIJ,
1412:        0,
1413:        MatGetOwnershipRange_MPIAIJ,
1414:        0,
1415:        0,
1416:        0,
1417:        0,
1418:        MatDuplicate_MPIAIJ,
1419:        0,
1420:        0,
1421:        0,
1422:        0,
1423:        0,
1424:        MatGetSubMatrices_MPIAIJ,
1425:        MatIncreaseOverlap_MPIAIJ,
1426:        MatGetValues_MPIAIJ,
1427:        MatCopy_MPIAIJ,
1428:        MatPrintHelp_MPIAIJ,
1429:        MatScale_MPIAIJ,
1430:        0,
1431:        0,
1432:        0,
1433:        MatGetBlockSize_MPIAIJ,
1434:        0,
1435:        0,
1436:        0,
1437:        0,
1438:        MatFDColoringCreate_MPIAIJ,
1439:        0,
1440:        MatSetUnfactored_MPIAIJ,
1441:        0,
1442:        0,
1443:        MatGetSubMatrix_MPIAIJ,
1444:        MatDestroy_MPIAIJ,
1445:        MatView_MPIAIJ,
1446:        MatGetMaps_Petsc};

1448: /* ----------------------------------------------------------------------------------------*/

1450: EXTERN_C_BEGIN
1451: int MatStoreValues_MPIAIJ(Mat mat)
1452: {
1453:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1454:   int        ierr;

1457:   MatStoreValues(aij->A);
1458:   MatStoreValues(aij->B);
1459:   return(0);
1460: }
1461: EXTERN_C_END

1463: EXTERN_C_BEGIN
1464: int MatRetrieveValues_MPIAIJ(Mat mat)
1465: {
1466:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1467:   int        ierr;

1470:   MatRetrieveValues(aij->A);
1471:   MatRetrieveValues(aij->B);
1472:   return(0);
1473: }
1474: EXTERN_C_END

1476: #include "petscpc.h"
1477: EXTERN_C_BEGIN
1478: EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1479: EXTERN_C_END

1481: EXTERN_C_BEGIN
1482: int MatCreate_MPIAIJ(Mat B)
1483: {
1484:   Mat_MPIAIJ   *b;
1485:   int          ierr,i,size;

1488:   MPI_Comm_size(B->comm,&size);

1490:   ierr            = PetscNew(Mat_MPIAIJ,&b);
1491:   B->data         = (void*)b;
1492:   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));
1493:   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1494:   B->factor       = 0;
1495:   B->assembled    = PETSC_FALSE;
1496:   B->mapping      = 0;

1498:   B->insertmode      = NOT_SET_VALUES;
1499:   b->size            = size;
1500:   MPI_Comm_rank(B->comm,&b->rank);

1502:   PetscSplitOwnership(B->comm,&B->m,&B->M);
1503:   PetscSplitOwnership(B->comm,&B->n,&B->N);

1505:   /* the information in the maps duplicates the information computed below, eventually 
1506:      we should remove the duplicate information that is not contained in the maps */
1507:   MapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1508:   MapCreateMPI(B->comm,B->n,B->N,&B->cmap);

1510:   /* build local table of row and column ownerships */
1511:   PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1512:   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1513:   b->cowners = b->rowners + b->size + 2;
1514:   MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1515:   b->rowners[0] = 0;
1516:   for (i=2; i<=b->size; i++) {
1517:     b->rowners[i] += b->rowners[i-1];
1518:   }
1519:   b->rstart = b->rowners[b->rank];
1520:   b->rend   = b->rowners[b->rank+1];
1521:   MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1522:   b->cowners[0] = 0;
1523:   for (i=2; i<=b->size; i++) {
1524:     b->cowners[i] += b->cowners[i-1];
1525:   }
1526:   b->cstart = b->cowners[b->rank];
1527:   b->cend   = b->cowners[b->rank+1];

1529:   /* build cache for off array entries formed */
1530:   MatStashCreate_Private(B->comm,1,&B->stash);
1531:   b->donotstash  = PETSC_FALSE;
1532:   b->colmap      = 0;
1533:   b->garray      = 0;
1534:   b->roworiented = PETSC_TRUE;

1536:   /* stuff used for matrix vector multiply */
1537:   b->lvec      = PETSC_NULL;
1538:   b->Mvctx     = PETSC_NULL;

1540:   /* stuff for MatGetRow() */
1541:   b->rowindices   = 0;
1542:   b->rowvalues    = 0;
1543:   b->getrowactive = PETSC_FALSE;

1545:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1546:                                      "MatStoreValues_MPIAIJ",
1547:                                      MatStoreValues_MPIAIJ);
1548:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1549:                                      "MatRetrieveValues_MPIAIJ",
1550:                                      MatRetrieveValues_MPIAIJ);
1551:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1552:                                      "MatGetDiagonalBlock_MPIAIJ",
1553:                                      MatGetDiagonalBlock_MPIAIJ);
1554:   return(0);
1555: }
1556: EXTERN_C_END

1558: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1559: {
1560:   Mat        mat;
1561:   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1562:   int        ierr;

1565:   *newmat       = 0;
1566:   MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1567:   MatSetType(mat,MATMPIAIJ);
1568:   a    = (Mat_MPIAIJ*)mat->data;
1569:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1570:   mat->factor       = matin->factor;
1571:   mat->assembled    = PETSC_TRUE;
1572:   mat->insertmode   = NOT_SET_VALUES;
1573:   mat->preallocated = PETSC_TRUE;

1575:   a->rstart       = oldmat->rstart;
1576:   a->rend         = oldmat->rend;
1577:   a->cstart       = oldmat->cstart;
1578:   a->cend         = oldmat->cend;
1579:   a->size         = oldmat->size;
1580:   a->rank         = oldmat->rank;
1581:   a->donotstash   = oldmat->donotstash;
1582:   a->roworiented  = oldmat->roworiented;
1583:   a->rowindices   = 0;
1584:   a->rowvalues    = 0;
1585:   a->getrowactive = PETSC_FALSE;

1587:   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1588:   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);
1589:   if (oldmat->colmap) {
1590: #if defined (PETSC_USE_CTABLE)
1591:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1592: #else
1593:     PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1594:     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1595:     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1596: #endif
1597:   } else a->colmap = 0;
1598:   if (oldmat->garray) {
1599:     int len;
1600:     len  = oldmat->B->n;
1601:     PetscMalloc((len+1)*sizeof(int),&a->garray);
1602:     PetscLogObjectMemory(mat,len*sizeof(int));
1603:     if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1604:   } else a->garray = 0;
1605: 
1606:    VecDuplicate(oldmat->lvec,&a->lvec);
1607:   PetscLogObjectParent(mat,a->lvec);
1608:    VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1609:   PetscLogObjectParent(mat,a->Mvctx);
1610:    MatDuplicate(oldmat->A,cpvalues,&a->A);
1611:   PetscLogObjectParent(mat,a->A);
1612:    MatDuplicate(oldmat->B,cpvalues,&a->B);
1613:   PetscLogObjectParent(mat,a->B);
1614:   PetscFListDuplicate(matin->qlist,&mat->qlist);
1615:   *newmat = mat;
1616:   return(0);
1617: }

1619: #include "petscsys.h"

1621: EXTERN_C_BEGIN
1622: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1623: {
1624:   Mat          A;
1625:   Scalar       *vals,*svals;
1626:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1627:   MPI_Status   status;
1628:   int          i,nz,ierr,j,rstart,rend,fd;
1629:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1630:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1631:   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;

1634:   MPI_Comm_size(comm,&size);
1635:   MPI_Comm_rank(comm,&rank);
1636:   if (!rank) {
1637:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1638:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1639:     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1640:     if (header[3] < 0) {
1641:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1642:     }
1643:   }

1645:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1646:   M = header[1]; N = header[2];
1647:   /* determine ownership of all rows */
1648:   m = M/size + ((M % size) > rank);
1649:   PetscMalloc((size+2)*sizeof(int),&rowners);
1650:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1651:   rowners[0] = 0;
1652:   for (i=2; i<=size; i++) {
1653:     rowners[i] += rowners[i-1];
1654:   }
1655:   rstart = rowners[rank];
1656:   rend   = rowners[rank+1];

1658:   /* distribute row lengths to all processors */
1659:   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1660:   offlens = ourlens + (rend-rstart);
1661:   if (!rank) {
1662:     PetscMalloc(M*sizeof(int),&rowlengths);
1663:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1664:     PetscMalloc(size*sizeof(int),&sndcounts);
1665:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1666:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1667:     PetscFree(sndcounts);
1668:   } else {
1669:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1670:   }

1672:   if (!rank) {
1673:     /* calculate the number of nonzeros on each processor */
1674:     PetscMalloc(size*sizeof(int),&procsnz);
1675:     PetscMemzero(procsnz,size*sizeof(int));
1676:     for (i=0; i<size; i++) {
1677:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1678:         procsnz[i] += rowlengths[j];
1679:       }
1680:     }
1681:     PetscFree(rowlengths);

1683:     /* determine max buffer needed and allocate it */
1684:     maxnz = 0;
1685:     for (i=0; i<size; i++) {
1686:       maxnz = PetscMax(maxnz,procsnz[i]);
1687:     }
1688:     PetscMalloc(maxnz*sizeof(int),&cols);

1690:     /* read in my part of the matrix column indices  */
1691:     nz   = procsnz[0];
1692:     PetscMalloc(nz*sizeof(int),&mycols);
1693:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1695:     /* read in every one elses and ship off */
1696:     for (i=1; i<size; i++) {
1697:       nz   = procsnz[i];
1698:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1699:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1700:     }
1701:     PetscFree(cols);
1702:   } else {
1703:     /* determine buffer space needed for message */
1704:     nz = 0;
1705:     for (i=0; i<m; i++) {
1706:       nz += ourlens[i];
1707:     }
1708:     PetscMalloc((nz+1)*sizeof(int),&mycols);

1710:     /* receive message of column indices*/
1711:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1712:     MPI_Get_count(&status,MPI_INT,&maxnz);
1713:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1714:   }

1716:   /* determine column ownership if matrix is not square */
1717:   if (N != M) {
1718:     n      = N/size + ((N % size) > rank);
1719:     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
1720:     cstart = cend - n;
1721:   } else {
1722:     cstart = rstart;
1723:     cend   = rend;
1724:     n      = cend - cstart;
1725:   }

1727:   /* loop over local rows, determining number of off diagonal entries */
1728:   PetscMemzero(offlens,m*sizeof(int));
1729:   jj = 0;
1730:   for (i=0; i<m; i++) {
1731:     for (j=0; j<ourlens[i]; j++) {
1732:       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1733:       jj++;
1734:     }
1735:   }

1737:   /* create our matrix */
1738:   for (i=0; i<m; i++) {
1739:     ourlens[i] -= offlens[i];
1740:   }
1741:   MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);
1742:   A = *newmat;
1743:   MatSetOption(A,MAT_COLUMNS_SORTED);
1744:   for (i=0; i<m; i++) {
1745:     ourlens[i] += offlens[i];
1746:   }

1748:   if (!rank) {
1749:     PetscMalloc(maxnz*sizeof(Scalar),&vals);

1751:     /* read in my part of the matrix numerical values  */
1752:     nz   = procsnz[0];
1753:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1754: 
1755:     /* insert into matrix */
1756:     jj      = rstart;
1757:     smycols = mycols;
1758:     svals   = vals;
1759:     for (i=0; i<m; i++) {
1760:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1761:       smycols += ourlens[i];
1762:       svals   += ourlens[i];
1763:       jj++;
1764:     }

1766:     /* read in other processors and ship out */
1767:     for (i=1; i<size; i++) {
1768:       nz   = procsnz[i];
1769:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1770:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1771:     }
1772:     PetscFree(procsnz);
1773:   } else {
1774:     /* receive numeric values */
1775:     PetscMalloc((nz+1)*sizeof(Scalar),&vals);

1777:     /* receive message of values*/
1778:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1779:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1780:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1782:     /* insert into matrix */
1783:     jj      = rstart;
1784:     smycols = mycols;
1785:     svals   = vals;
1786:     for (i=0; i<m; i++) {
1787:       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1788:       smycols += ourlens[i];
1789:       svals   += ourlens[i];
1790:       jj++;
1791:     }
1792:   }
1793:   PetscFree(ourlens);
1794:   PetscFree(vals);
1795:   PetscFree(mycols);
1796:   PetscFree(rowners);

1798:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1799:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1800:   return(0);
1801: }
1802: EXTERN_C_END

1804: /*
1805:     Not great since it makes two copies of the submatrix, first an SeqAIJ 
1806:   in local and then by concatenating the local matrices the end result.
1807:   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1808: */
1809: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1810: {
1811:   int        ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1812:   int        *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1813:   Mat        *local,M,Mreuse;
1814:   Scalar     *vwork,*aa;
1815:   MPI_Comm   comm = mat->comm;
1816:   Mat_SeqAIJ *aij;


1820:   MPI_Comm_rank(comm,&rank);
1821:   MPI_Comm_size(comm,&size);

1823:   if (call ==  MAT_REUSE_MATRIX) {
1824:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
1825:     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1826:     local = &Mreuse;
1827:     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
1828:   } else {
1829:     MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
1830:     Mreuse = *local;
1831:     PetscFree(local);
1832:   }

1834:   /* 
1835:       m - number of local rows
1836:       n - number of columns (same on all processors)
1837:       rstart - first row in new global matrix generated
1838:   */
1839:   MatGetSize(Mreuse,&m,&n);
1840:   if (call == MAT_INITIAL_MATRIX) {
1841:     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1842:     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1843:     ii  = aij->i;
1844:     jj  = aij->j;

1846:     /*
1847:         Determine the number of non-zeros in the diagonal and off-diagonal 
1848:         portions of the matrix in order to do correct preallocation
1849:     */

1851:     /* first get start and end of "diagonal" columns */
1852:     if (csize == PETSC_DECIDE) {
1853:       nlocal = n/size + ((n % size) > rank);
1854:     } else {
1855:       nlocal = csize;
1856:     }
1857:     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
1858:     rstart = rend - nlocal;
1859:     if (rank == size - 1 && rend != n) {
1860:       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1861:     }

1863:     /* next, compute all the lengths */
1864:     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);
1865:     olens = dlens + m;
1866:     for (i=0; i<m; i++) {
1867:       jend = ii[i+1] - ii[i];
1868:       olen = 0;
1869:       dlen = 0;
1870:       for (j=0; j<jend; j++) {
1871:         if (*jj < rstart || *jj >= rend) olen++;
1872:         else dlen++;
1873:         jj++;
1874:       }
1875:       olens[i] = olen;
1876:       dlens[i] = dlen;
1877:     }
1878:     MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
1879:     PetscFree(dlens);
1880:   } else {
1881:     int ml,nl;

1883:     M = *newmat;
1884:     MatGetLocalSize(M,&ml,&nl);
1885:     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1886:     MatZeroEntries(M);
1887:     /*
1888:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1889:        rather than the slower MatSetValues().
1890:     */
1891:     M->was_assembled = PETSC_TRUE;
1892:     M->assembled     = PETSC_FALSE;
1893:   }
1894:   MatGetOwnershipRange(M,&rstart,&rend);
1895:   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1896:   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1897:   ii  = aij->i;
1898:   jj  = aij->j;
1899:   aa  = aij->a;
1900:   for (i=0; i<m; i++) {
1901:     row   = rstart + i;
1902:     nz    = ii[i+1] - ii[i];
1903:     cwork = jj;     jj += nz;
1904:     vwork = aa;     aa += nz;
1905:     MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
1906:   }

1908:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
1909:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
1910:   *newmat = M;

1912:   /* save submatrix used in processor for next request */
1913:   if (call ==  MAT_INITIAL_MATRIX) {
1914:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
1915:     PetscObjectDereference((PetscObject)Mreuse);
1916:   }

1918:   return(0);
1919: }

1921: /*@C
1922:    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1923:    (the default parallel PETSc format).  For good matrix assembly performance
1924:    the user should preallocate the matrix storage by setting the parameters 
1925:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1926:    performance can be increased by more than a factor of 50.

1928:    Collective on MPI_Comm

1930:    Input Parameters:
1931: +  A - the matrix 
1932: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1933:            (same value is used for all local rows)
1934: .  d_nnz - array containing the number of nonzeros in the various rows of the 
1935:            DIAGONAL portion of the local submatrix (possibly different for each row)
1936:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
1937:            The size of this array is equal to the number of local rows, i.e 'm'. 
1938:            You must leave room for the diagonal entry even if it is zero.
1939: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1940:            submatrix (same value is used for all local rows).
1941: -  o_nnz - array containing the number of nonzeros in the various rows of the
1942:            OFF-DIAGONAL portion of the local submatrix (possibly different for
1943:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
1944:            structure. The size of this array is equal to the number 
1945:            of local rows, i.e 'm'. 

1947:    The AIJ format (also called the Yale sparse matrix format or
1948:    compressed row storage), is fully compatible with standard Fortran 77
1949:    storage.  That is, the stored row and column indices can begin at
1950:    either one (as in Fortran) or zero.  See the users manual for details.

1952:    The user MUST specify either the local or global matrix dimensions
1953:    (possibly both).

1955:    The parallel matrix is partitioned such that the first m0 rows belong to 
1956:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
1957:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

1959:    The DIAGONAL portion of the local submatrix of a processor can be defined 
1960:    as the submatrix which is obtained by extraction the part corresponding 
1961:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
1962:    first row that belongs to the processor, and r2 is the last row belonging 
1963:    to the this processor. This is a square mxm matrix. The remaining portion 
1964:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

1966:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

1968:    By default, this format uses inodes (identical nodes) when possible.
1969:    We search for consecutive rows with the same nonzero structure, thereby
1970:    reusing matrix information to achieve increased efficiency.

1972:    Options Database Keys:
1973: +  -mat_aij_no_inode  - Do not use inodes
1974: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1975: -  -mat_aij_oneindex - Internally use indexing starting at 1
1976:         rather than 0.  Note that when calling MatSetValues(),
1977:         the user still MUST index entries starting at 0!

1979:    Example usage:
1980:   
1981:    Consider the following 8x8 matrix with 34 non-zero values, that is 
1982:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1983:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
1984:    as follows:

1986: .vb
1987:             1  2  0  |  0  3  0  |  0  4
1988:     Proc0   0  5  6  |  7  0  0  |  8  0
1989:             9  0 10  | 11  0  0  | 12  0
1990:     -------------------------------------
1991:            13  0 14  | 15 16 17  |  0  0
1992:     Proc1   0 18  0  | 19 20 21  |  0  0 
1993:             0  0  0  | 22 23  0  | 24  0
1994:     -------------------------------------
1995:     Proc2  25 26 27  |  0  0 28  | 29  0
1996:            30  0  0  | 31 32 33  |  0 34
1997: .ve

1999:    This can be represented as a collection of submatrices as:

2001: .vb
2002:       A B C
2003:       D E F
2004:       G H I
2005: .ve

2007:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2008:    owned by proc1, G,H,I are owned by proc2.

2010:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2011:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2012:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2014:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2015:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2016:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2017:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2018:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2019:    matrix, ans [DF] as another SeqAIJ matrix.

2021:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2022:    allocated for every row of the local diagonal submatrix, and o_nz
2023:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2024:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2025:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2026:    In this case, the values of d_nz,o_nz are:
2027: .vb
2028:      proc0 : dnz = 2, o_nz = 2
2029:      proc1 : dnz = 3, o_nz = 2
2030:      proc2 : dnz = 1, o_nz = 4
2031: .ve
2032:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2033:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2034:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2035:    34 values.

2037:    When d_nnz, o_nnz parameters are specified, the storage is specified
2038:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2039:    In the above case the values for d_nnz,o_nnz are:
2040: .vb
2041:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2042:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2043:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2044: .ve
2045:    Here the space allocated is sum of all the above values i.e 34, and
2046:    hence pre-allocation is perfect.

2048:    Level: intermediate

2050: .keywords: matrix, aij, compressed row, sparse, parallel

2052: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2053: @*/
2054: int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2055: {
2056:   Mat_MPIAIJ   *b;
2057:   int          ierr,i;
2058:   PetscTruth   flg2;

2061:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);
2062:   if (!flg2) return(0);
2063:   B->preallocated = PETSC_TRUE;
2064:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2065:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2066:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2067:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2068:   if (d_nnz) {
2069:     for (i=0; i<B->m; i++) {
2070:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2071:     }
2072:   }
2073:   if (o_nnz) {
2074:     for (i=0; i<B->m; i++) {
2075:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2076:     }
2077:   }
2078:   b = (Mat_MPIAIJ*)B->data;

2080:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
2081:   PetscLogObjectParent(B,b->A);
2082:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
2083:   PetscLogObjectParent(B,b->B);

2085:   return(0);
2086: }

2088: /*@C
2089:    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2090:    (the default parallel PETSc format).  For good matrix assembly performance
2091:    the user should preallocate the matrix storage by setting the parameters 
2092:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2093:    performance can be increased by more than a factor of 50.

2095:    Collective on MPI_Comm

2097:    Input Parameters:
2098: +  comm - MPI communicator
2099: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2100:            This value should be the same as the local size used in creating the 
2101:            y vector for the matrix-vector product y = Ax.
2102: .  n - This value should be the same as the local size used in creating the 
2103:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2104:        calculated if N is given) For square matrices n is almost always m.
2105: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2106: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2107: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2108:            (same value is used for all local rows)
2109: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2110:            DIAGONAL portion of the local submatrix (possibly different for each row)
2111:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2112:            The size of this array is equal to the number of local rows, i.e 'm'. 
2113:            You must leave room for the diagonal entry even if it is zero.
2114: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2115:            submatrix (same value is used for all local rows).
2116: -  o_nnz - array containing the number of nonzeros in the various rows of the
2117:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2118:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2119:            structure. The size of this array is equal to the number 
2120:            of local rows, i.e 'm'. 

2122:    Output Parameter:
2123: .  A - the matrix 

2125:    Notes:
2126:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2127:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2128:    storage requirements for this matrix.

2130:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one 
2131:    processor than it must be used on all processors that share the object for 
2132:    that argument.

2134:    The AIJ format (also called the Yale sparse matrix format or
2135:    compressed row storage), is fully compatible with standard Fortran 77
2136:    storage.  That is, the stored row and column indices can begin at
2137:    either one (as in Fortran) or zero.  See the users manual for details.

2139:    The user MUST specify either the local or global matrix dimensions
2140:    (possibly both).

2142:    The parallel matrix is partitioned such that the first m0 rows belong to 
2143:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2144:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2146:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2147:    as the submatrix which is obtained by extraction the part corresponding 
2148:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2149:    first row that belongs to the processor, and r2 is the last row belonging 
2150:    to the this processor. This is a square mxm matrix. The remaining portion 
2151:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2153:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2155:    By default, this format uses inodes (identical nodes) when possible.
2156:    We search for consecutive rows with the same nonzero structure, thereby
2157:    reusing matrix information to achieve increased efficiency.

2159:    Options Database Keys:
2160: +  -mat_aij_no_inode  - Do not use inodes
2161: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2162: -  -mat_aij_oneindex - Internally use indexing starting at 1
2163:         rather than 0.  Note that when calling MatSetValues(),
2164:         the user still MUST index entries starting at 0!


2167:    Example usage:
2168:   
2169:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2170:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2171:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2172:    as follows:

2174: .vb
2175:             1  2  0  |  0  3  0  |  0  4
2176:     Proc0   0  5  6  |  7  0  0  |  8  0
2177:             9  0 10  | 11  0  0  | 12  0
2178:     -------------------------------------
2179:            13  0 14  | 15 16 17  |  0  0
2180:     Proc1   0 18  0  | 19 20 21  |  0  0 
2181:             0  0  0  | 22 23  0  | 24  0
2182:     -------------------------------------
2183:     Proc2  25 26 27  |  0  0 28  | 29  0
2184:            30  0  0  | 31 32 33  |  0 34
2185: .ve

2187:    This can be represented as a collection of submatrices as:

2189: .vb
2190:       A B C
2191:       D E F
2192:       G H I
2193: .ve

2195:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2196:    owned by proc1, G,H,I are owned by proc2.

2198:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2199:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2200:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2202:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2203:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2204:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2205:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2206:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2207:    matrix, ans [DF] as another SeqAIJ matrix.

2209:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2210:    allocated for every row of the local diagonal submatrix, and o_nz
2211:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2212:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2213:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2214:    In this case, the values of d_nz,o_nz are:
2215: .vb
2216:      proc0 : dnz = 2, o_nz = 2
2217:      proc1 : dnz = 3, o_nz = 2
2218:      proc2 : dnz = 1, o_nz = 4
2219: .ve
2220:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2221:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2222:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2223:    34 values.

2225:    When d_nnz, o_nnz parameters are specified, the storage is specified
2226:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2227:    In the above case the values for d_nnz,o_nnz are:
2228: .vb
2229:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2230:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2231:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2232: .ve
2233:    Here the space allocated is sum of all the above values i.e 34, and
2234:    hence pre-allocation is perfect.

2236:    Level: intermediate

2238: .keywords: matrix, aij, compressed row, sparse, parallel

2240: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2241: @*/
2242: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2243: {
2244:   int ierr,size;

2247:   MatCreate(comm,m,n,M,N,A);
2248:   MPI_Comm_size(comm,&size);
2249:   if (size > 1) {
2250:     MatSetType(*A,MATMPIAIJ);
2251:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2252:   } else {
2253:     MatSetType(*A,MATSEQAIJ);
2254:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2255:   }
2256:   return(0);
2257: }

2259: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2260: {
2261:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2263:   *Ad     = a->A;
2264:   *Ao     = a->B;
2265:   *colmap = a->garray;
2266:   return(0);
2267: }