Actual source code: mpibdiag.c

  1: /*$Id: mpibdiag.c,v 1.205 2001/08/10 03:31:02 bsmith Exp $*/
  2: /*
  3:    The basic matrix operations for the Block diagonal parallel 
  4:   matrices.
  5: */
 6:  #include src/mat/impls/bdiag/mpi/mpibdiag.h
 7:  #include src/vec/vecimpl.h

 11: int MatSetValues_MPIBDiag(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
 12: {
 13:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 14:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
 15:   PetscTruth   roworiented = mbd->roworiented;

 18:   for (i=0; i<m; i++) {
 19:     if (idxm[i] < 0) continue;
 20:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 21:     if (idxm[i] >= rstart && idxm[i] < rend) {
 22:       row = idxm[i] - rstart;
 23:       for (j=0; j<n; j++) {
 24:         if (idxn[j] < 0) continue;
 25:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 26:         if (roworiented) {
 27:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
 28:         } else {
 29:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 30:         }
 31:       }
 32:     } else {
 33:       if (!mbd->donotstash) {
 34:         if (roworiented) {
 35:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 36:         } else {
 37:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 38:         }
 39:       }
 40:     }
 41:   }
 42:   return(0);
 43: }

 47: int MatGetValues_MPIBDiag(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
 48: {
 49:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 50:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;

 53:   for (i=0; i<m; i++) {
 54:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 55:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 56:     if (idxm[i] >= rstart && idxm[i] < rend) {
 57:       row = idxm[i] - rstart;
 58:       for (j=0; j<n; j++) {
 59:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 60:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 61:         MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
 62:       }
 63:     } else {
 64:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 65:     }
 66:   }
 67:   return(0);
 68: }

 72: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
 73: {
 74:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 75:   MPI_Comm     comm = mat->comm;
 76:   int          ierr,nstash,reallocs;
 77:   InsertMode   addv;

 80:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
 81:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
 82:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
 83:   }
 84:   mat->insertmode = addv; /* in case this processor had no cache */
 85:   MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
 86:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
 87:   PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
 88:   return(0);
 89: }

 93: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
 94: {
 95:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 96:   Mat_SeqBDiag *mlocal;
 97:   int          i,n,*row,*col;
 98:   int          *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
 99:   PetscScalar  *val;
100:   InsertMode   addv = mat->insertmode;


104:   while (1) {
105:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
106:     if (!flg) break;
107: 
108:     for (i=0; i<n;) {
109:       /* Now identify the consecutive vals belonging to the same row */
110:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
111:       if (j < n) ncols = j-i;
112:       else       ncols = n-i;
113:       /* Now assemble all these values with a single function call */
114:       MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
115:       i = j;
116:     }
117:   }
118:   MatStashScatterEnd_Private(&mat->stash);

120:   MatAssemblyBegin(mbd->A,mode);
121:   MatAssemblyEnd(mbd->A,mode);

123:   /* Fix main diagonal location and determine global diagonals */
124:   mlocal         = (Mat_SeqBDiag*)mbd->A->data;
125:   Mblock         = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
126:   len            = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
127:   PetscMalloc(2*len*sizeof(int),&tmp1);
128:   tmp2           = tmp1 + len;
129:   PetscMemzero(tmp1,2*len*sizeof(int));
130:   mlocal->mainbd = -1;
131:   for (i=0; i<mlocal->nd; i++) {
132:     if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
133:     tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
134:   }
135:   MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
136:   ict  = 0;
137:   for (i=0; i<len; i++) {
138:     if (tmp2[i]) {
139:       mbd->gdiag[ict] = i - Mblock;
140:       ict++;
141:     }
142:   }
143:   mbd->gnd = ict;
144:   PetscFree(tmp1);

146:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
147:     MatSetUpMultiply_MPIBDiag(mat);
148:   }
149:   return(0);
150: }

154: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
155: {
156:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
157:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;

160:   *bs = dmat->bs;
161:   return(0);
162: }

166: int MatZeroEntries_MPIBDiag(Mat A)
167: {
168:   Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
169:   int          ierr;

172:   MatZeroEntries(l->A);
173:   return(0);
174: }

176: /* again this uses the same basic stratagy as in the assembly and 
177:    scatter create routines, we should try to do it systematically 
178:    if we can figure out the proper level of generality. */

180: /* the code does not do the diagonal entries correctly unless the 
181:    matrix is square and the column and row owerships are identical.
182:    This is a BUG. The only way to fix it seems to be to access 
183:    aij->A and aij->B directly and not through the MatZeroRows() 
184:    routine. 
185: */

189: int MatZeroRows_MPIBDiag(Mat A,IS is,const PetscScalar *diag)
190: {
191:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;
192:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
193:   int            *nprocs,j,idx,nsends;
194:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
195:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
196:   int            *lens,imdex,*lrows,*values;
197:   MPI_Comm       comm = A->comm;
198:   MPI_Request    *send_waits,*recv_waits;
199:   MPI_Status     recv_status,*send_status;
200:   IS             istmp;
201:   PetscTruth     found;

204:   ISGetLocalSize(is,&N);
205:   ISGetIndices(is,&rows);

207:   /*  first count number of contributors to each processor */
208:   PetscMalloc(2*size*sizeof(int),&nprocs);
209:   PetscMemzero(nprocs,2*size*sizeof(int));
210:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
211:   for (i=0; i<N; i++) {
212:     idx = rows[i];
213:     found = PETSC_FALSE;
214:     for (j=0; j<size; j++) {
215:       if (idx >= owners[j] && idx < owners[j+1]) {
216:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
217:       }
218:     }
219:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
220:   }
221:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

223:   /* inform other processors of number of messages and max length*/
224:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

226:   /* post receives:   */
227:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
228:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
229:   for (i=0; i<nrecvs; i++) {
230:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
231:   }

233:   /* do sends:
234:       1) starts[i] gives the starting index in svalues for stuff going to 
235:          the ith processor
236:   */
237:   PetscMalloc((N+1)*sizeof(int),&svalues);
238:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
239:   PetscMalloc((size+1)*sizeof(int),&starts);
240:   starts[0] = 0;
241:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
242:   for (i=0; i<N; i++) {
243:     svalues[starts[owner[i]]++] = rows[i];
244:   }
245:   ISRestoreIndices(is,&rows);

247:   starts[0] = 0;
248:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
249:   count = 0;
250:   for (i=0; i<size; i++) {
251:     if (nprocs[2*i+1]) {
252:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
253:     }
254:   }
255:   PetscFree(starts);

257:   base = owners[rank];

259:   /*  wait on receives */
260:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
261:   source = lens + nrecvs;
262:   count  = nrecvs;
263:   slen   = 0;
264:   while (count) {
265:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
266:     /* unpack receives into our local space */
267:     MPI_Get_count(&recv_status,MPI_INT,&n);
268:     source[imdex]  = recv_status.MPI_SOURCE;
269:     lens[imdex]  = n;
270:     slen += n;
271:     count--;
272:   }
273:   PetscFree(recv_waits);
274: 
275:   /* move the data into the send scatter */
276:   PetscMalloc((slen+1)*sizeof(int),&lrows);
277:   count = 0;
278:   for (i=0; i<nrecvs; i++) {
279:     values = rvalues + i*nmax;
280:     for (j=0; j<lens[i]; j++) {
281:       lrows[count++] = values[j] - base;
282:     }
283:   }
284:   PetscFree(rvalues);
285:   PetscFree(lens);
286:   PetscFree(owner);
287:   PetscFree(nprocs);
288: 
289:   /* actually zap the local rows */
290:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
291:   PetscLogObjectParent(A,istmp);
292:   PetscFree(lrows);
293:   MatZeroRows(l->A,istmp,diag);
294:   ISDestroy(istmp);

296:   /* wait on sends */
297:   if (nsends) {
298:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
299:     MPI_Waitall(nsends,send_waits,send_status);
300:     PetscFree(send_status);
301:   }
302:   PetscFree(send_waits);
303:   PetscFree(svalues);

305:   return(0);
306: }

310: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
311: {
312:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
313:   int          ierr;

316:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
317:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
318:   (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
319:   return(0);
320: }

324: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
325: {
326:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
327:   int          ierr;

330:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
331:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
332:   (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
333:   return(0);
334: }

338: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
339: {
340:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
341:   int          ierr;
342:   PetscScalar  zero = 0.0;

345:   VecSet(&zero,yy);
346:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
347:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
348:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
349:   return(0);
350: }

354: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
355: {
356:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
357:   int          ierr;

360:   VecCopy(yy,zz);
361:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
362:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
363:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
364:   return(0);
365: }

369: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
370: {
371:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
372:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
373:   int          ierr;
374:   PetscReal    isend[5],irecv[5];

377:   info->block_size     = (PetscReal)dmat->bs;
378:   MatGetInfo(mat->A,MAT_LOCAL,info);
379:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
380:   isend[3] = info->memory;  isend[4] = info->mallocs;
381:   if (flag == MAT_LOCAL) {
382:     info->nz_used      = isend[0];
383:     info->nz_allocated = isend[1];
384:     info->nz_unneeded  = isend[2];
385:     info->memory       = isend[3];
386:     info->mallocs      = isend[4];
387:   } else if (flag == MAT_GLOBAL_MAX) {
388:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
389:     info->nz_used      = irecv[0];
390:     info->nz_allocated = irecv[1];
391:     info->nz_unneeded  = irecv[2];
392:     info->memory       = irecv[3];
393:     info->mallocs      = irecv[4];
394:   } else if (flag == MAT_GLOBAL_SUM) {
395:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
396:     info->nz_used      = irecv[0];
397:     info->nz_allocated = irecv[1];
398:     info->nz_unneeded  = irecv[2];
399:     info->memory       = irecv[3];
400:     info->mallocs      = irecv[4];
401:   }
402:   info->rows_global    = (double)matin->M;
403:   info->columns_global = (double)matin->N;
404:   info->rows_local     = (double)matin->m;
405:   info->columns_local  = (double)matin->N;
406:   return(0);
407: }

411: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
412: {
413:   int          ierr;
414:   Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;

417:   MatGetDiagonal(A->A,v);
418:   return(0);
419: }

423: int MatDestroy_MPIBDiag(Mat mat)
424: {
425:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
426:   int          ierr;
427: #if defined(PETSC_USE_LOG)
428:   Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;

431:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
432: #else
434: #endif
435:   MatStashDestroy_Private(&mat->stash);
436:   PetscFree(mbd->rowners);
437:   PetscFree(mbd->gdiag);
438:   MatDestroy(mbd->A);
439:   if (mbd->lvec) {VecDestroy(mbd->lvec);}
440:   if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
441:   PetscFree(mbd);
442:   return(0);
443: }


448: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
449: {
450:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
451:   int          ierr;

454:   if (mbd->size == 1) {
455:     MatView(mbd->A,viewer);
456:   } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
457:   return(0);
458: }

462: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
463: {
464:   Mat_MPIBDiag      *mbd = (Mat_MPIBDiag*)mat->data;
465:   Mat_SeqBDiag      *dmat = (Mat_SeqBDiag*)mbd->A->data;
466:   int               ierr,i,size = mbd->size,rank = mbd->rank;
467:   PetscTruth        isascii,isdraw;
468:   PetscViewer       sviewer;
469:   PetscViewerFormat format;

472:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
473:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
474:   if (isascii) {
475:     PetscViewerGetFormat(viewer,&format);
476:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
477:       int nline = PetscMin(10,mbd->gnd),k,nk,np;
478:       PetscViewerASCIIPrintf(viewer,"  block size=%d, total number of diagonals=%d\n",dmat->bs,mbd->gnd);
479:       nk = (mbd->gnd-1)/nline + 1;
480:       for (k=0; k<nk; k++) {
481:         PetscViewerASCIIPrintf(viewer,"  global diag numbers:");
482:         np = PetscMin(nline,mbd->gnd - nline*k);
483:         for (i=0; i<np; i++) {
484:           PetscViewerASCIIPrintf(viewer,"  %d",mbd->gdiag[i+nline*k]);
485:         }
486:         PetscViewerASCIIPrintf(viewer,"\n");
487:       }
488:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
489:         MatInfo info;
490:         MPI_Comm_rank(mat->comm,&rank);
491:         MatGetInfo(mat,MAT_LOCAL,&info);
492:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
493:             (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
494:         PetscViewerFlush(viewer);
495:         VecScatterView(mbd->Mvctx,viewer);
496:       }
497:       return(0);
498:     }
499:   }

501:   if (isdraw) {
502:     PetscDraw       draw;
503:     PetscTruth isnull;
504:     PetscViewerDrawGetDraw(viewer,0,&draw);
505:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
506:   }

508:   if (size == 1) {
509:     MatView(mbd->A,viewer);
510:   } else {
511:     /* assemble the entire matrix onto first processor. */
512:     Mat          A;
513:     int          M = mat->M,N = mat->N,m,row,nz,*cols;
514:     PetscScalar  *vals;
515:     Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;

517:     if (!rank) {
518:       MatCreateMPIBDiag(mat->comm,M,M,N,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL,&A);
519:     } else {
520:       MatCreateMPIBDiag(mat->comm,0,M,N,0,Ambd->bs,PETSC_NULL,PETSC_NULL,&A);
521:     }
522:     PetscLogObjectParent(mat,A);

524:     /* Copy the matrix ... This isn't the most efficient means,
525:        but it's quick for now */
526:     row = mbd->rstart;
527:     m = mbd->A->m;
528:     for (i=0; i<m; i++) {
529:       MatGetRow(mat,row,&nz,&cols,&vals);
530:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
531:       MatRestoreRow(mat,row,&nz,&cols,&vals);
532:       row++;
533:     }
534:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
535:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
536:     PetscViewerGetSingleton(viewer,&sviewer);
537:     if (!rank) {
538:       MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
539:     }
540:     PetscViewerRestoreSingleton(viewer,&sviewer);
541:     PetscViewerFlush(viewer);
542:     MatDestroy(A);
543:   }
544:   return(0);
545: }

549: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
550: {
551:   int        ierr;
552:   PetscTruth isascii,isdraw,isbinary;

555:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
556:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
557:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
558:   if (isascii || isdraw) {
559:     MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
560:   } else if (isbinary) {
561:     MatView_MPIBDiag_Binary(mat,viewer);
562:   } else {
563:     SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
564:   }
565:   return(0);
566: }

570: int MatSetOption_MPIBDiag(Mat A,MatOption op)
571: {
572:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
573:   int          ierr;

575:   switch (op) {
576:   case MAT_NO_NEW_NONZERO_LOCATIONS:
577:   case MAT_YES_NEW_NONZERO_LOCATIONS:
578:   case MAT_NEW_NONZERO_LOCATION_ERR:
579:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
580:   case MAT_NO_NEW_DIAGONALS:
581:   case MAT_YES_NEW_DIAGONALS:
582:     MatSetOption(mbd->A,op);
583:     break;
584:   case MAT_ROW_ORIENTED:
585:     mbd->roworiented = PETSC_TRUE;
586:     MatSetOption(mbd->A,op);
587:     break;
588:   case MAT_COLUMN_ORIENTED:
589:     mbd->roworiented = PETSC_FALSE;
590:     MatSetOption(mbd->A,op);
591:     break;
592:   case MAT_IGNORE_OFF_PROC_ENTRIES:
593:     mbd->donotstash = PETSC_TRUE;
594:     break;
595:   case MAT_ROWS_SORTED:
596:   case MAT_ROWS_UNSORTED:
597:   case MAT_COLUMNS_SORTED:
598:   case MAT_COLUMNS_UNSORTED:
599:     PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignored\n");
600:     break;
601:   default:
602:     SETERRQ(PETSC_ERR_SUP,"unknown option");
603:   }
604:   return(0);
605: }

609: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
610: {
611:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
612:   int          lrow,ierr;

615:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
616:   lrow = row - mat->rstart;
617:   MatGetRow(mat->A,lrow,nz,idx,v);
618:   return(0);
619: }

623: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
624:                                   PetscScalar **v)
625: {
626:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
627:   int          lrow,ierr;

630:   lrow = row - mat->rstart;
631:   MatRestoreRow(mat->A,lrow,nz,idx,v);
632:   return(0);
633: }


638: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
639: {
640:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
641:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
642:   PetscReal    sum = 0.0;
643:   int          ierr,d,i,nd = a->nd,bs = a->bs,len;
644:   PetscScalar  *dv;

647:   if (type == NORM_FROBENIUS) {
648:     for (d=0; d<nd; d++) {
649:       dv   = a->diagv[d];
650:       len  = a->bdlen[d]*bs*bs;
651:       for (i=0; i<len; i++) {
652: #if defined(PETSC_USE_COMPLEX)
653:         sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
654: #else
655:         sum += dv[i]*dv[i];
656: #endif
657:       }
658:     }
659:     MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
660:     *nrm = sqrt(*nrm);
661:     PetscLogFlops(2*A->n*A->m);
662:   } else if (type == NORM_1) { /* max column norm */
663:     PetscReal *tmp,*tmp2;
664:     int    j;
665:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
666:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
667:     MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
668:     *nrm = 0.0;
669:     MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
670:     for (j=0; j<mbd->A->n; j++) {
671:       if (tmp2[j] > *nrm) *nrm = tmp2[j];
672:     }
673:     PetscFree(tmp);
674:     PetscFree(tmp2);
675:   } else if (type == NORM_INFINITY) { /* max row norm */
676:     PetscReal normtemp;
677:     MatNorm(mbd->A,type,&normtemp);
678:     MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
679:   }
680:   return(0);
681: }

685: int MatPrintHelp_MPIBDiag(Mat A)
686: {
687:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
688:   int          ierr;

691:   if (!a->rank) {
692:     MatPrintHelp_SeqBDiag(a->A);
693:   }
694:   return(0);
695: }

699: int MatScale_MPIBDiag(const PetscScalar *alpha,Mat A)
700: {
701:   int          ierr;
702:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;

705:   MatScale_SeqBDiag(alpha,a->A);
706:   return(0);
707: }

711: int MatSetUpPreallocation_MPIBDiag(Mat A)
712: {
713:   int        ierr;

716:    MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
717:   return(0);
718: }

720: /* -------------------------------------------------------------------*/

722: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
723:        MatGetRow_MPIBDiag,
724:        MatRestoreRow_MPIBDiag,
725:        MatMult_MPIBDiag,
726: /* 4*/ MatMultAdd_MPIBDiag,
727:        MatMultTranspose_MPIBDiag,
728:        MatMultTransposeAdd_MPIBDiag,
729:        0,
730:        0,
731:        0,
732: /*10*/ 0,
733:        0,
734:        0,
735:        0,
736:        0,
737: /*15*/ MatGetInfo_MPIBDiag,
738:        0,
739:        MatGetDiagonal_MPIBDiag,
740:        0,
741:        MatNorm_MPIBDiag,
742: /*20*/ MatAssemblyBegin_MPIBDiag,
743:        MatAssemblyEnd_MPIBDiag,
744:        0,
745:        MatSetOption_MPIBDiag,
746:        MatZeroEntries_MPIBDiag,
747: /*25*/ MatZeroRows_MPIBDiag,
748:        0,
749:        0,
750:        0,
751:        0,
752: /*30*/ MatSetUpPreallocation_MPIBDiag,
753:        0,
754:        0,
755:        0,
756:        0,
757: /*35*/ 0,
758:        0,
759:        0,
760:        0,
761:        0,
762: /*40*/ 0,
763:        0,
764:        0,
765:        MatGetValues_MPIBDiag,
766:        0,
767: /*45*/ MatPrintHelp_MPIBDiag,
768:        MatScale_MPIBDiag,
769:        0,
770:        0,
771:        0,
772: /*50*/ MatGetBlockSize_MPIBDiag,
773:        0,
774:        0,
775:        0,
776:        0,
777: /*55*/ 0,
778:        0,
779:        0,
780:        0,
781:        0,
782: /*60*/ 0,
783:        MatDestroy_MPIBDiag,
784:        MatView_MPIBDiag,
785:        MatGetPetscMaps_Petsc,
786:        0,
787: /*65*/ 0,
788:        0,
789:        0,
790:        0,
791:        0,
792: /*70*/ 0,
793:        0,
794:        0,
795:        0,
796:        0,
797: /*75*/ 0,
798:        0,
799:        0,
800:        0,
801:        0,
802: /*80*/ 0,
803:        0,
804:        0,
805:        0,
806:        0,
807: /*85*/ MatLoad_MPIBDiag
808: };

810: EXTERN_C_BEGIN
813: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
814: {
815:   Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
816:   int          ierr,lrows,lcols,rstart,rend;
817:   IS           localc,localr;

820:   MatGetLocalSize(A,&lrows,&lcols);
821:   MatGetOwnershipRange(A,&rstart,&rend);
822:   ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
823:   ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
824:   MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
825:   ISDestroy(localr);
826:   ISDestroy(localc);

828:   *iscopy = PETSC_TRUE;
829:   return(0);
830: }
831: EXTERN_C_END

833: EXTERN_C_BEGIN
836: int MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,int nd,int bs,int *diag,PetscScalar **diagv)
837: {
838:   Mat_MPIBDiag *b;
839:   int          ierr,i,k,*ldiag,len,nd2;
840:   PetscScalar  **ldiagv = 0;
841:   PetscTruth   flg2;

844:   B->preallocated = PETSC_TRUE;
845:   if (bs == PETSC_DEFAULT) bs = 1;
846:   if (nd == PETSC_DEFAULT) nd = 0;
847:   PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
848:   PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
849:   PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
850:   if (nd && !diag) {
851:     PetscMalloc(nd*sizeof(int),&diag);
852:     nd2  = nd;
853:     PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
854:     if (nd2 != nd) {
855:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
856:     }
857:   } else if (flg2) {
858:     SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
859:   }

861:   if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");

863:   PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
864:   B->n = B->N = PetscMax(B->n,B->N);

866:   if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
867:   if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
868:   if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");

870:   /* the information in the maps duplicates the information computed below, eventually 
871:      we should remove the duplicate information that is not contained in the maps */
872:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
873:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);


876:   b          = (Mat_MPIBDiag*)B->data;
877:   b->gnd     = nd;

879:   MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
880:   b->rowners[0] = 0;
881:   for (i=2; i<=b->size; i++) {
882:     b->rowners[i] += b->rowners[i-1];
883:   }
884:   b->rstart  = b->rowners[b->rank];
885:   b->rend    = b->rowners[b->rank+1];
886:   b->brstart = (b->rstart)/bs;
887:   b->brend   = (b->rend)/bs;


890:   /* Determine local diagonals; for now, assume global rows = global cols */
891:   /* These are sorted in MatCreateSeqBDiag */
892:   PetscMalloc((nd+1)*sizeof(int),&ldiag);
893:   len  = B->M/bs + B->N/bs + 1;
894:   PetscMalloc(len*sizeof(int),&b->gdiag);
895:   k    = 0;
896:   PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
897:                         + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
898:   if (diagv) {
899:     PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
900:   }
901:   for (i=0; i<nd; i++) {
902:     b->gdiag[i] = diag[i];
903:     if (diag[i] > 0) { /* lower triangular */
904:       if (diag[i] < b->brend) {
905:         ldiag[k] = diag[i] - b->brstart;
906:         if (diagv) ldiagv[k] = diagv[i];
907:         k++;
908:       }
909:     } else { /* upper triangular */
910:       if (B->M/bs - diag[i] > B->N/bs) {
911:         if (B->M/bs + diag[i] > b->brstart) {
912:           ldiag[k] = diag[i] - b->brstart;
913:           if (diagv) ldiagv[k] = diagv[i];
914:           k++;
915:         }
916:       } else {
917:         if (B->M/bs > b->brstart) {
918:           ldiag[k] = diag[i] - b->brstart;
919:           if (diagv) ldiagv[k] = diagv[i];
920:           k++;
921:         }
922:       }
923:     }
924:   }

926:   /* Form local matrix */
927:   MatCreateSeqBDiag(PETSC_COMM_SELF,B->m,B->n,k,bs,ldiag,ldiagv,&b->A);
928:   PetscLogObjectParent(B,b->A);
929:   PetscFree(ldiag);
930:   if (ldiagv) {PetscFree(ldiagv);}

932:   return(0);
933: }
934: EXTERN_C_END

936: /*MC
937:    MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.

939:    Options Database Keys:
940: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()

942:   Level: beginner

944: .seealso: MatCreateMPIBDiag
945: M*/

947: EXTERN_C_BEGIN
950: int MatCreate_MPIBDiag(Mat B)
951: {
952:   Mat_MPIBDiag *b;
953:   int          ierr;

956:   PetscNew(Mat_MPIBDiag,&b);
957:   B->data         = (void*)b;
958:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
959:   B->factor       = 0;
960:   B->mapping      = 0;

962:   B->insertmode = NOT_SET_VALUES;
963:   MPI_Comm_rank(B->comm,&b->rank);
964:   MPI_Comm_size(B->comm,&b->size);

966:   /* build local table of row ownerships */
967:   PetscMalloc((b->size+2)*sizeof(int),&b->rowners);

969:   /* build cache for off array entries formed */
970:   MatStashCreate_Private(B->comm,1,&B->stash);
971:   b->donotstash = PETSC_FALSE;

973:   /* stuff used for matrix-vector multiply */
974:   b->lvec        = 0;
975:   b->Mvctx       = 0;

977:   /* used for MatSetValues() input */
978:   b->roworiented = PETSC_TRUE;

980:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
981:                                      "MatGetDiagonalBlock_MPIBDiag",
982:                                       MatGetDiagonalBlock_MPIBDiag);
983:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
984:                                      "MatMPIBDiagSetPreallocation_MPIBDiag",
985:                                       MatMPIBDiagSetPreallocation_MPIBDiag);
986:   return(0);
987: }
988: EXTERN_C_END

990: /*MC
991:    MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.

993:    This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
994:    and MATMPIBDIAG otherwise.

996:    Options Database Keys:
997: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()

999:   Level: beginner

1001: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
1002: M*/

1004: EXTERN_C_BEGIN
1007: int MatCreate_BDiag(Mat A) {
1008:   int ierr,size;

1011:   PetscObjectChangeTypeName((PetscObject)A,MATBDIAG);
1012:   MPI_Comm_size(A->comm,&size);
1013:   if (size == 1) {
1014:     MatSetType(A,MATSEQBDIAG);
1015:   } else {
1016:     MatSetType(A,MATMPIBDIAG);
1017:   }
1018:   return(0);
1019: }
1020: EXTERN_C_END

1024: /*@C
1025:    MatMPIBDiagSetPreallocation - 

1027:    Collective on Mat

1029:    Input Parameters:
1030: +  A - the matrix 
1031: .  nd - number of block diagonals (global) (optional)
1032: .  bs - each element of a diagonal is an bs x bs dense matrix
1033: .  diag - optional array of block diagonal numbers (length nd).
1034:    For a matrix element A[i,j], where i=row and j=column, the
1035:    diagonal number is
1036: $     diag = i/bs - j/bs  (integer division)
1037:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1038:    needed (expensive).
1039: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1040:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1041:    to control memory allocation.


1044:    Options Database Keys:
1045: .  -mat_block_size <bs> - Sets blocksize
1046: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1048:    Notes:
1049:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1050:    than it must be used on all processors that share the object for that argument.

1052:    The parallel matrix is partitioned across the processors by rows, where
1053:    each local rectangular matrix is stored in the uniprocessor block 
1054:    diagonal format.  See the users manual for further details.

1056:    The user MUST specify either the local or global numbers of rows
1057:    (possibly both).

1059:    The case bs=1 (conventional diagonal storage) is implemented as
1060:    a special case.

1062:    Fortran Notes:
1063:    Fortran programmers cannot set diagv; this variable is ignored.

1065:    Level: intermediate

1067: .keywords: matrix, block, diagonal, parallel, sparse

1069: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1070: @*/
1071: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,const int diag[],PetscScalar *diagv[])
1072: {
1073:   int ierr,(*f)(Mat,int,int,const int[],PetscScalar*[]);

1076:   PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1077:   if (f) {
1078:     (*f)(B,nd,bs,diag,diagv);
1079:   }
1080:   return(0);
1081: }

1085: /*@C
1086:    MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.

1088:    Collective on MPI_Comm

1090:    Input Parameters:
1091: +  comm - MPI communicator
1092: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1093: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1094: .  N - number of columns (local and global)
1095: .  nd - number of block diagonals (global) (optional)
1096: .  bs - each element of a diagonal is an bs x bs dense matrix
1097: .  diag - optional array of block diagonal numbers (length nd).
1098:    For a matrix element A[i,j], where i=row and j=column, the
1099:    diagonal number is
1100: $     diag = i/bs - j/bs  (integer division)
1101:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1102:    needed (expensive).
1103: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1104:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1105:    to control memory allocation.

1107:    Output Parameter:
1108: .  A - the matrix 

1110:    Options Database Keys:
1111: .  -mat_block_size <bs> - Sets blocksize
1112: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1114:    Notes:
1115:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1116:    than it must be used on all processors that share the object for that argument.

1118:    The parallel matrix is partitioned across the processors by rows, where
1119:    each local rectangular matrix is stored in the uniprocessor block 
1120:    diagonal format.  See the users manual for further details.

1122:    The user MUST specify either the local or global numbers of rows
1123:    (possibly both).

1125:    The case bs=1 (conventional diagonal storage) is implemented as
1126:    a special case.

1128:    Fortran Notes:
1129:    Fortran programmers cannot set diagv; this variable is ignored.

1131:    Level: intermediate

1133: .keywords: matrix, block, diagonal, parallel, sparse

1135: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1136: @*/
1137: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,const int diag[],PetscScalar *diagv[],Mat *A)
1138: {
1139:   int ierr,size;

1142:   MatCreate(comm,m,m,M,N,A);
1143:   MPI_Comm_size(comm,&size);
1144:   if (size > 1) {
1145:     MatSetType(*A,MATMPIBDIAG);
1146:     MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1147:   } else {
1148:     MatSetType(*A,MATSEQBDIAG);
1149:     MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1150:   }
1151:   return(0);
1152: }

1156: /*@C
1157:    MatBDiagGetData - Gets the data for the block diagonal matrix format.
1158:    For the parallel case, this returns information for the local submatrix.

1160:    Input Parameters:
1161: .  mat - the matrix, stored in block diagonal format.

1163:    Not Collective

1165:    Output Parameters:
1166: +  m - number of rows
1167: .  n - number of columns
1168: .  nd - number of block diagonals
1169: .  bs - each element of a diagonal is an bs x bs dense matrix
1170: .  bdlen - array of total block lengths of block diagonals
1171: .  diag - optional array of block diagonal numbers (length nd).
1172:    For a matrix element A[i,j], where i=row and j=column, the
1173:    diagonal number is
1174: $     diag = i/bs - j/bs  (integer division)
1175:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1176:    needed (expensive).
1177: -  diagv - pointer to actual diagonals (in same order as diag array), 

1179:    Level: advanced

1181:    Notes:
1182:    See the users manual for further details regarding this storage format.

1184: .keywords: matrix, block, diagonal, get, data

1186: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1187: @*/
1188: int MatBDiagGetData(Mat mat,int *nd,int *bs,int *diag[],int *bdlen[],PetscScalar ***diagv)
1189: {
1190:   Mat_MPIBDiag *pdmat;
1191:   Mat_SeqBDiag *dmat = 0;
1192:   PetscTruth   isseq,ismpi;
1193:   int          ierr;

1197:   PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1198:   PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1199:   if (isseq) {
1200:     dmat = (Mat_SeqBDiag*)mat->data;
1201:   } else if (ismpi) {
1202:     pdmat = (Mat_MPIBDiag*)mat->data;
1203:     dmat = (Mat_SeqBDiag*)pdmat->A->data;
1204:   } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1205:   *nd    = dmat->nd;
1206:   *bs    = dmat->bs;
1207:   *diag  = dmat->diag;
1208:   *bdlen = dmat->bdlen;
1209:   *diagv = dmat->diagv;
1210:   return(0);
1211: }

1213:  #include petscsys.h

1217: int MatLoad_MPIBDiag(PetscViewer viewer,MatType type,Mat *newmat)
1218: {
1219:   Mat          A;
1220:   PetscScalar  *vals,*svals;
1221:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1222:   MPI_Status   status;
1223:   int          bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1224:   int          header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1225:   int          *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1226:   int          tag = ((PetscObject)viewer)->tag,extra_rows;

1229:   MPI_Comm_size(comm,&size);
1230:   MPI_Comm_rank(comm,&rank);
1231:   if (!rank) {
1232:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1233:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1234:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1235:     if (header[3] < 0) {
1236:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1237:     }
1238:   }
1239:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1240:   M = header[1]; N = header[2];

1242:   bs = 1;   /* uses a block size of 1 by default; */
1243:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);

1245:   /* 
1246:      This code adds extra rows to make sure the number of rows is 
1247:      divisible by the blocksize
1248:   */
1249:   Mbs        = M/bs;
1250:   extra_rows = bs - M + bs*(Mbs);
1251:   if (extra_rows == bs) extra_rows = 0;
1252:   else                  Mbs++;
1253:   if (extra_rows && !rank) {
1254:     PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksize\n");
1255:   }

1257:   /* determine ownership of all rows */
1258:   m          = bs*(Mbs/size + ((Mbs % size) > rank));
1259:   PetscMalloc((size+2)*sizeof(int),&rowners);
1260:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1261:   rowners[0] = 0;
1262:   for (i=2; i<=size; i++) {
1263:     rowners[i] += rowners[i-1];
1264:   }
1265:   rstart = rowners[rank];
1266:   rend   = rowners[rank+1];

1268:   /* distribute row lengths to all processors */
1269:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1270:   if (!rank) {
1271:     PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1272:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1273:     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1274:     PetscMalloc(size*sizeof(int),&sndcounts);
1275:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1276:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1277:     PetscFree(sndcounts);
1278:   } else {
1279:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1280:   }

1282:   if (!rank) {
1283:     /* calculate the number of nonzeros on each processor */
1284:     PetscMalloc(size*sizeof(int),&procsnz);
1285:     PetscMemzero(procsnz,size*sizeof(int));
1286:     for (i=0; i<size; i++) {
1287:       for (j=rowners[i]; j<rowners[i+1]; j++) {
1288:         procsnz[i] += rowlengths[j];
1289:       }
1290:     }
1291:     PetscFree(rowlengths);

1293:     /* determine max buffer needed and allocate it */
1294:     maxnz = 0;
1295:     for (i=0; i<size; i++) {
1296:       maxnz = PetscMax(maxnz,procsnz[i]);
1297:     }
1298:     PetscMalloc(maxnz*sizeof(int),&cols);

1300:     /* read in my part of the matrix column indices  */
1301:     nz   = procsnz[0];
1302:     PetscMalloc(nz*sizeof(int),&mycols);
1303:     if (size == 1)  nz -= extra_rows;
1304:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1305:     if (size == 1)  for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }

1307:     /* read in every one elses and ship off */
1308:     for (i=1; i<size-1; i++) {
1309:       nz   = procsnz[i];
1310:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1311:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1312:     }
1313:     /* read in the stuff for the last proc */
1314:     if (size != 1) {
1315:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1316:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1317:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1318:       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1319:     }
1320:     PetscFree(cols);
1321:   } else {
1322:     /* determine buffer space needed for message */
1323:     nz = 0;
1324:     for (i=0; i<m; i++) {
1325:       nz += ourlens[i];
1326:     }
1327:     PetscMalloc(nz*sizeof(int),&mycols);

1329:     /* receive message of column indices*/
1330:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1331:     MPI_Get_count(&status,MPI_INT,&maxnz);
1332:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1333:   }

1335:   MatCreateMPIBDiag(comm,m,M+extra_rows,N+extra_rows,PETSC_NULL,bs,PETSC_NULL,PETSC_NULL,newmat);
1336:   A = *newmat;

1338:   if (!rank) {
1339:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1341:     /* read in my part of the matrix numerical values  */
1342:     nz = procsnz[0];
1343:     if (size == 1)  nz -= extra_rows;
1344:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1345:     if (size == 1)  for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }

1347:     /* insert into matrix */
1348:     jj      = rstart;
1349:     smycols = mycols;
1350:     svals   = vals;
1351:     for (i=0; i<m; i++) {
1352:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1353:       smycols += ourlens[i];
1354:       svals   += ourlens[i];
1355:       jj++;
1356:     }

1358:     /* read in other processors (except the last one) and ship out */
1359:     for (i=1; i<size-1; i++) {
1360:       nz   = procsnz[i];
1361:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1362:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1363:     }
1364:     /* the last proc */
1365:     if (size != 1){
1366:       nz   = procsnz[i] - extra_rows;
1367:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1368:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1369:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1370:     }
1371:     PetscFree(procsnz);
1372:   } else {
1373:     /* receive numeric values */
1374:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1376:     /* receive message of values*/
1377:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1378:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1379:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1381:     /* insert into matrix */
1382:     jj      = rstart;
1383:     smycols = mycols;
1384:     svals   = vals;
1385:     for (i=0; i<m; i++) {
1386:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1387:       smycols += ourlens[i];
1388:       svals   += ourlens[i];
1389:       jj++;
1390:     }
1391:   }
1392:   PetscFree(ourlens);
1393:   PetscFree(vals);
1394:   PetscFree(mycols);
1395:   PetscFree(rowners);

1397:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1398:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1399:   return(0);
1400: }