Actual source code: mpidense.c

  1: /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */
  6: 
 7:  #include src/mat/impls/dense/mpi/mpidense.h
 8:  #include src/vec/vecimpl.h

 10: EXTERN_C_BEGIN
 13: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
 14: {
 15:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
 16:   int          m = A->m,rstart = mdn->rstart,ierr;
 17:   PetscScalar  *array;
 18:   MPI_Comm     comm;

 21:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

 23:   /* The reuse aspect is not implemented efficiently */
 24:   if (reuse) { MatDestroy(*B);}

 26:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 27:   MatGetArray(mdn->A,&array);
 28:   MatCreateSeqDense(comm,m,m,array+m*rstart,B);
 29:   MatRestoreArray(mdn->A,&array);
 30:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
 31:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
 32: 
 33:   *iscopy = PETSC_TRUE;
 34:   return(0);
 35: }
 36: EXTERN_C_END

 40: int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
 41: {
 42:   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
 43:   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
 44:   PetscTruth   roworiented = A->roworiented;

 47:   for (i=0; i<m; i++) {
 48:     if (idxm[i] < 0) continue;
 49:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 50:     if (idxm[i] >= rstart && idxm[i] < rend) {
 51:       row = idxm[i] - rstart;
 52:       if (roworiented) {
 53:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
 54:       } else {
 55:         for (j=0; j<n; j++) {
 56:           if (idxn[j] < 0) continue;
 57:           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 58:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 59:         }
 60:       }
 61:     } else {
 62:       if (!A->donotstash) {
 63:         if (roworiented) {
 64:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 65:         } else {
 66:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 67:         }
 68:       }
 69:     }
 70:   }
 71:   return(0);
 72: }

 76: int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
 77: {
 78:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
 79:   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;

 82:   for (i=0; i<m; i++) {
 83:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 84:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 85:     if (idxm[i] >= rstart && idxm[i] < rend) {
 86:       row = idxm[i] - rstart;
 87:       for (j=0; j<n; j++) {
 88:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 89:         if (idxn[j] >= mat->N) {
 90:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 91:         }
 92:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
 93:       }
 94:     } else {
 95:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 96:     }
 97:   }
 98:   return(0);
 99: }

103: int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
104: {
105:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
106:   int          ierr;

109:   MatGetArray(a->A,array);
110:   return(0);
111: }

115: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
116: {
117:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
118:   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
119:   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
120:   PetscScalar  *av,*bv,*v = lmat->v;
121:   Mat          newmat;

124:   ISGetIndices(isrow,&irow);
125:   ISGetIndices(iscol,&icol);
126:   ISGetLocalSize(isrow,&nrows);
127:   ISGetLocalSize(iscol,&ncols);

129:   /* No parallel redistribution currently supported! Should really check each index set
130:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
131:      original matrix! */

133:   MatGetLocalSize(A,&nlrows,&nlcols);
134:   MatGetOwnershipRange(A,&rstart,&rend);
135: 
136:   /* Check submatrix call */
137:   if (scall == MAT_REUSE_MATRIX) {
138:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
139:     /* Really need to test rows and column sizes! */
140:     newmat = *B;
141:   } else {
142:     /* Create and fill new matrix */
143:     MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);
144:   }

146:   /* Now extract the data pointers and do the copy, column at a time */
147:   newmatd = (Mat_MPIDense*)newmat->data;
148:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
149: 
150:   for (i=0; i<ncols; i++) {
151:     av = v + nlrows*icol[i];
152:     for (j=0; j<nrows; j++) {
153:       *bv++ = av[irow[j] - rstart];
154:     }
155:   }

157:   /* Assemble the matrices so that the correct flags are set */
158:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
159:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

161:   /* Free work space */
162:   ISRestoreIndices(isrow,&irow);
163:   ISRestoreIndices(iscol,&icol);
164:   *B = newmat;
165:   return(0);
166: }

170: int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
171: {
173:   return(0);
174: }

178: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
179: {
180:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
181:   MPI_Comm     comm = mat->comm;
182:   int          ierr,nstash,reallocs;
183:   InsertMode   addv;

186:   /* make sure all processors are either in INSERTMODE or ADDMODE */
187:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
188:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
189:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
190:   }
191:   mat->insertmode = addv; /* in case this processor had no cache */

193:   MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
194:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
195:   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
196:   return(0);
197: }

201: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
202: {
203:   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
204:   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
205:   PetscScalar  *val;
206:   InsertMode   addv=mat->insertmode;

209:   /*  wait on receives */
210:   while (1) {
211:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
212:     if (!flg) break;
213: 
214:     for (i=0; i<n;) {
215:       /* Now identify the consecutive vals belonging to the same row */
216:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
217:       if (j < n) ncols = j-i;
218:       else       ncols = n-i;
219:       /* Now assemble all these values with a single function call */
220:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
221:       i = j;
222:     }
223:   }
224:   MatStashScatterEnd_Private(&mat->stash);
225: 
226:   MatAssemblyBegin(mdn->A,mode);
227:   MatAssemblyEnd(mdn->A,mode);

229:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
230:     MatSetUpMultiply_MPIDense(mat);
231:   }
232:   return(0);
233: }

237: int MatZeroEntries_MPIDense(Mat A)
238: {
239:   int          ierr;
240:   Mat_MPIDense *l = (Mat_MPIDense*)A->data;

243:   MatZeroEntries(l->A);
244:   return(0);
245: }

249: int MatGetBlockSize_MPIDense(Mat A,int *bs)
250: {
252:   *bs = 1;
253:   return(0);
254: }

256: /* the code does not do the diagonal entries correctly unless the 
257:    matrix is square and the column and row owerships are identical.
258:    This is a BUG. The only way to fix it seems to be to access 
259:    mdn->A and mdn->B directly and not through the MatZeroRows() 
260:    routine. 
261: */
264: int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
265: {
266:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
267:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
268:   int            *nprocs,j,idx,nsends;
269:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
270:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
271:   int            *lens,imdex,*lrows,*values;
272:   MPI_Comm       comm = A->comm;
273:   MPI_Request    *send_waits,*recv_waits;
274:   MPI_Status     recv_status,*send_status;
275:   IS             istmp;
276:   PetscTruth     found;

279:   ISGetLocalSize(is,&N);
280:   ISGetIndices(is,&rows);

282:   /*  first count number of contributors to each processor */
283:   PetscMalloc(2*size*sizeof(int),&nprocs);
284:   PetscMemzero(nprocs,2*size*sizeof(int));
285:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
286:   for (i=0; i<N; i++) {
287:     idx = rows[i];
288:     found = PETSC_FALSE;
289:     for (j=0; j<size; j++) {
290:       if (idx >= owners[j] && idx < owners[j+1]) {
291:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
292:       }
293:     }
294:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
295:   }
296:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

298:   /* inform other processors of number of messages and max length*/
299:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

301:   /* post receives:   */
302:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
303:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
304:   for (i=0; i<nrecvs; i++) {
305:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
306:   }

308:   /* do sends:
309:       1) starts[i] gives the starting index in svalues for stuff going to 
310:          the ith processor
311:   */
312:   PetscMalloc((N+1)*sizeof(int),&svalues);
313:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
314:   PetscMalloc((size+1)*sizeof(int),&starts);
315:   starts[0]  = 0;
316:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
317:   for (i=0; i<N; i++) {
318:     svalues[starts[owner[i]]++] = rows[i];
319:   }
320:   ISRestoreIndices(is,&rows);

322:   starts[0] = 0;
323:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
324:   count = 0;
325:   for (i=0; i<size; i++) {
326:     if (nprocs[2*i+1]) {
327:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
328:     }
329:   }
330:   PetscFree(starts);

332:   base = owners[rank];

334:   /*  wait on receives */
335:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
336:   source = lens + nrecvs;
337:   count  = nrecvs; slen = 0;
338:   while (count) {
339:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
340:     /* unpack receives into our local space */
341:     MPI_Get_count(&recv_status,MPI_INT,&n);
342:     source[imdex]  = recv_status.MPI_SOURCE;
343:     lens[imdex]  = n;
344:     slen += n;
345:     count--;
346:   }
347:   PetscFree(recv_waits);
348: 
349:   /* move the data into the send scatter */
350:   PetscMalloc((slen+1)*sizeof(int),&lrows);
351:   count = 0;
352:   for (i=0; i<nrecvs; i++) {
353:     values = rvalues + i*nmax;
354:     for (j=0; j<lens[i]; j++) {
355:       lrows[count++] = values[j] - base;
356:     }
357:   }
358:   PetscFree(rvalues);
359:   PetscFree(lens);
360:   PetscFree(owner);
361:   PetscFree(nprocs);
362: 
363:   /* actually zap the local rows */
364:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
365:   PetscLogObjectParent(A,istmp);
366:   PetscFree(lrows);
367:   MatZeroRows(l->A,istmp,diag);
368:   ISDestroy(istmp);

370:   /* wait on sends */
371:   if (nsends) {
372:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
373:     MPI_Waitall(nsends,send_waits,send_status);
374:     PetscFree(send_status);
375:   }
376:   PetscFree(send_waits);
377:   PetscFree(svalues);

379:   return(0);
380: }

384: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
385: {
386:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
387:   int          ierr;

390:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
391:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
392:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
393:   return(0);
394: }

398: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399: {
400:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
401:   int          ierr;

404:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
405:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
406:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
407:   return(0);
408: }

412: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
413: {
414:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
415:   int          ierr;
416:   PetscScalar  zero = 0.0;

419:   VecSet(&zero,yy);
420:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
421:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
422:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
423:   return(0);
424: }

428: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
429: {
430:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
431:   int          ierr;

434:   VecCopy(yy,zz);
435:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
436:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
437:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
438:   return(0);
439: }

443: int MatGetDiagonal_MPIDense(Mat A,Vec v)
444: {
445:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
446:   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
447:   int          ierr,len,i,n,m = A->m,radd;
448:   PetscScalar  *x,zero = 0.0;
449: 
451:   VecSet(&zero,v);
452:   VecGetArray(v,&x);
453:   VecGetSize(v,&n);
454:   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
455:   len  = PetscMin(a->A->m,a->A->n);
456:   radd = a->rstart*m;
457:   for (i=0; i<len; i++) {
458:     x[i] = aloc->v[radd + i*m + i];
459:   }
460:   VecRestoreArray(v,&x);
461:   return(0);
462: }

466: int MatDestroy_MPIDense(Mat mat)
467: {
468:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
469:   int          ierr;


473: #if defined(PETSC_USE_LOG)
474:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
475: #endif
476:   MatStashDestroy_Private(&mat->stash);
477:   PetscFree(mdn->rowners);
478:   MatDestroy(mdn->A);
479:   if (mdn->lvec)   VecDestroy(mdn->lvec);
480:   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
481:   if (mdn->factor) {
482:     if (mdn->factor->temp)   {PetscFree(mdn->factor->temp);}
483:     if (mdn->factor->tag)    {PetscFree(mdn->factor->tag);}
484:     if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
485:     PetscFree(mdn->factor);
486:   }
487:   PetscFree(mdn);
488:   return(0);
489: }

493: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
494: {
495:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
496:   int          ierr;

499:   if (mdn->size == 1) {
500:     MatView(mdn->A,viewer);
501:   }
502:   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
503:   return(0);
504: }

508: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
509: {
510:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
511:   int               ierr,size = mdn->size,rank = mdn->rank;
512:   PetscViewerType   vtype;
513:   PetscTruth        isascii,isdraw;
514:   PetscViewer       sviewer;
515:   PetscViewerFormat format;

518:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
519:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
520:   if (isascii) {
521:     PetscViewerGetType(viewer,&vtype);
522:     PetscViewerGetFormat(viewer,&format);
523:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
524:       MatInfo info;
525:       MatGetInfo(mat,MAT_LOCAL,&info);
526:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
527:                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
528:       PetscViewerFlush(viewer);
529:       VecScatterView(mdn->Mvctx,viewer);
530:       return(0);
531:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
532:       return(0);
533:     }
534:   } else if (isdraw) {
535:     PetscDraw       draw;
536:     PetscTruth isnull;

538:     PetscViewerDrawGetDraw(viewer,0,&draw);
539:     PetscDrawIsNull(draw,&isnull);
540:     if (isnull) return(0);
541:   }

543:   if (size == 1) {
544:     MatView(mdn->A,viewer);
545:   } else {
546:     /* assemble the entire matrix onto first processor. */
547:     Mat          A;
548:     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
549:     PetscScalar  *vals;

551:     if (!rank) {
552:       MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);
553:     } else {
554:       MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);
555:     }
556:     PetscLogObjectParent(mat,A);

558:     /* Copy the matrix ... This isn't the most efficient means,
559:        but it's quick for now */
560:     row = mdn->rstart; m = mdn->A->m;
561:     for (i=0; i<m; i++) {
562:       MatGetRow(mat,row,&nz,&cols,&vals);
563:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
564:       MatRestoreRow(mat,row,&nz,&cols,&vals);
565:       row++;
566:     }

568:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
569:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
570:     PetscViewerGetSingleton(viewer,&sviewer);
571:     if (!rank) {
572:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
573:     }
574:     PetscViewerRestoreSingleton(viewer,&sviewer);
575:     PetscViewerFlush(viewer);
576:     MatDestroy(A);
577:   }
578:   return(0);
579: }

583: int MatView_MPIDense(Mat mat,PetscViewer viewer)
584: {
585:   int        ierr;
586:   PetscTruth isascii,isbinary,isdraw,issocket;
587: 
589: 
590:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
591:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
592:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
593:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

595:   if (isascii || issocket || isdraw) {
596:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
597:   } else if (isbinary) {
598:     MatView_MPIDense_Binary(mat,viewer);
599:   } else {
600:     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
601:   }
602:   return(0);
603: }

607: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
608: {
609:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
610:   Mat          mdn = mat->A;
611:   int          ierr;
612:   PetscReal    isend[5],irecv[5];

615:   info->rows_global    = (double)A->M;
616:   info->columns_global = (double)A->N;
617:   info->rows_local     = (double)A->m;
618:   info->columns_local  = (double)A->N;
619:   info->block_size     = 1.0;
620:   MatGetInfo(mdn,MAT_LOCAL,info);
621:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
622:   isend[3] = info->memory;  isend[4] = info->mallocs;
623:   if (flag == MAT_LOCAL) {
624:     info->nz_used      = isend[0];
625:     info->nz_allocated = isend[1];
626:     info->nz_unneeded  = isend[2];
627:     info->memory       = isend[3];
628:     info->mallocs      = isend[4];
629:   } else if (flag == MAT_GLOBAL_MAX) {
630:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
631:     info->nz_used      = irecv[0];
632:     info->nz_allocated = irecv[1];
633:     info->nz_unneeded  = irecv[2];
634:     info->memory       = irecv[3];
635:     info->mallocs      = irecv[4];
636:   } else if (flag == MAT_GLOBAL_SUM) {
637:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
638:     info->nz_used      = irecv[0];
639:     info->nz_allocated = irecv[1];
640:     info->nz_unneeded  = irecv[2];
641:     info->memory       = irecv[3];
642:     info->mallocs      = irecv[4];
643:   }
644:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
645:   info->fill_ratio_needed = 0;
646:   info->factor_mallocs    = 0;
647:   return(0);
648: }

652: int MatSetOption_MPIDense(Mat A,MatOption op)
653: {
654:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
655:   int          ierr;

658:   switch (op) {
659:   case MAT_NO_NEW_NONZERO_LOCATIONS:
660:   case MAT_YES_NEW_NONZERO_LOCATIONS:
661:   case MAT_NEW_NONZERO_LOCATION_ERR:
662:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
663:   case MAT_COLUMNS_SORTED:
664:   case MAT_COLUMNS_UNSORTED:
665:     MatSetOption(a->A,op);
666:     break;
667:   case MAT_ROW_ORIENTED:
668:     a->roworiented = PETSC_TRUE;
669:     MatSetOption(a->A,op);
670:     break;
671:   case MAT_ROWS_SORTED:
672:   case MAT_ROWS_UNSORTED:
673:   case MAT_YES_NEW_DIAGONALS:
674:   case MAT_USE_HASH_TABLE:
675:     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
676:     break;
677:   case MAT_COLUMN_ORIENTED:
678:     a->roworiented = PETSC_FALSE;
679:     MatSetOption(a->A,op);
680:     break;
681:   case MAT_IGNORE_OFF_PROC_ENTRIES:
682:     a->donotstash = PETSC_TRUE;
683:     break;
684:   case MAT_NO_NEW_DIAGONALS:
685:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
686:   default:
687:     SETERRQ(PETSC_ERR_SUP,"unknown option");
688:   }
689:   return(0);
690: }

694: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
695: {
696:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
697:   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;

700:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
701:   lrow = row - rstart;
702:   MatGetRow(mat->A,lrow,nz,idx,v);
703:   return(0);
704: }

708: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
709: {

713:   if (idx) {PetscFree(*idx);}
714:   if (v) {PetscFree(*v);}
715:   return(0);
716: }

720: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
721: {
722:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
723:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
724:   PetscScalar  *l,*r,x,*v;
725:   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;

728:   MatGetLocalSize(A,&s2,&s3);
729:   if (ll) {
730:     VecGetLocalSize(ll,&s2a);
731:     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
732:     VecGetArray(ll,&l);
733:     for (i=0; i<m; i++) {
734:       x = l[i];
735:       v = mat->v + i;
736:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
737:     }
738:     VecRestoreArray(ll,&l);
739:     PetscLogFlops(n*m);
740:   }
741:   if (rr) {
742:     VecGetSize(rr,&s3a);
743:     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
744:     VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
745:     VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
746:     VecGetArray(mdn->lvec,&r);
747:     for (i=0; i<n; i++) {
748:       x = r[i];
749:       v = mat->v + i*m;
750:       for (j=0; j<m; j++) { (*v++) *= x;}
751:     }
752:     VecRestoreArray(mdn->lvec,&r);
753:     PetscLogFlops(n*m);
754:   }
755:   return(0);
756: }

760: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
761: {
762:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
763:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
764:   int          ierr,i,j;
765:   PetscReal    sum = 0.0;
766:   PetscScalar  *v = mat->v;

769:   if (mdn->size == 1) {
770:      MatNorm(mdn->A,type,nrm);
771:   } else {
772:     if (type == NORM_FROBENIUS) {
773:       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
774: #if defined(PETSC_USE_COMPLEX)
775:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
776: #else
777:         sum += (*v)*(*v); v++;
778: #endif
779:       }
780:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
781:       *nrm = sqrt(*nrm);
782:       PetscLogFlops(2*mdn->A->n*mdn->A->m);
783:     } else if (type == NORM_1) {
784:       PetscReal *tmp,*tmp2;
785:       PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
786:       tmp2 = tmp + A->N;
787:       PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
788:       *nrm = 0.0;
789:       v = mat->v;
790:       for (j=0; j<mdn->A->n; j++) {
791:         for (i=0; i<mdn->A->m; i++) {
792:           tmp[j] += PetscAbsScalar(*v);  v++;
793:         }
794:       }
795:       MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
796:       for (j=0; j<A->N; j++) {
797:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
798:       }
799:       PetscFree(tmp);
800:       PetscLogFlops(A->n*A->m);
801:     } else if (type == NORM_INFINITY) { /* max row norm */
802:       PetscReal ntemp;
803:       MatNorm(mdn->A,type,&ntemp);
804:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
805:     } else {
806:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
807:     }
808:   }
809:   return(0);
810: }

814: int MatTranspose_MPIDense(Mat A,Mat *matout)
815: {
816:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
817:   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
818:   Mat          B;
819:   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
820:   int          j,i,ierr;
821:   PetscScalar  *v;

824:   if (!matout && M != N) {
825:     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
826:   }
827:   MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);

829:   m = a->A->m; n = a->A->n; v = Aloc->v;
830:   PetscMalloc(n*sizeof(int),&rwork);
831:   for (j=0; j<n; j++) {
832:     for (i=0; i<m; i++) rwork[i] = rstart + i;
833:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
834:     v   += m;
835:   }
836:   PetscFree(rwork);
837:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
838:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
839:   if (matout) {
840:     *matout = B;
841:   } else {
842:     MatHeaderCopy(A,B);
843:   }
844:   return(0);
845: }

847:  #include petscblaslapack.h
850: int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
851: {
852:   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
853:   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
854:   int          one = 1,nz;

857:   nz = inA->m*inA->N;
858:   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
859:   PetscLogFlops(nz);
860:   return(0);
861: }

863: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);

867: int MatSetUpPreallocation_MPIDense(Mat A)
868: {
869:   int        ierr;

872:    MatMPIDenseSetPreallocation(A,0);
873:   return(0);
874: }

876: /* -------------------------------------------------------------------*/
877: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
878:        MatGetRow_MPIDense,
879:        MatRestoreRow_MPIDense,
880:        MatMult_MPIDense,
881: /* 4*/ MatMultAdd_MPIDense,
882:        MatMultTranspose_MPIDense,
883:        MatMultTransposeAdd_MPIDense,
884:        0,
885:        0,
886:        0,
887: /*10*/ 0,
888:        0,
889:        0,
890:        0,
891:        MatTranspose_MPIDense,
892: /*15*/ MatGetInfo_MPIDense,
893:        0,
894:        MatGetDiagonal_MPIDense,
895:        MatDiagonalScale_MPIDense,
896:        MatNorm_MPIDense,
897: /*20*/ MatAssemblyBegin_MPIDense,
898:        MatAssemblyEnd_MPIDense,
899:        0,
900:        MatSetOption_MPIDense,
901:        MatZeroEntries_MPIDense,
902: /*25*/ MatZeroRows_MPIDense,
903:        0,
904:        0,
905:        0,
906:        0,
907: /*30*/ MatSetUpPreallocation_MPIDense,
908:        0,
909:        0,
910:        MatGetArray_MPIDense,
911:        MatRestoreArray_MPIDense,
912: /*35*/ MatDuplicate_MPIDense,
913:        0,
914:        0,
915:        0,
916:        0,
917: /*40*/ 0,
918:        MatGetSubMatrices_MPIDense,
919:        0,
920:        MatGetValues_MPIDense,
921:        0,
922: /*45*/ 0,
923:        MatScale_MPIDense,
924:        0,
925:        0,
926:        0,
927: /*50*/ MatGetBlockSize_MPIDense,
928:        0,
929:        0,
930:        0,
931:        0,
932: /*55*/ 0,
933:        0,
934:        0,
935:        0,
936:        0,
937: /*60*/ MatGetSubMatrix_MPIDense,
938:        MatDestroy_MPIDense,
939:        MatView_MPIDense,
940:        MatGetPetscMaps_Petsc,
941:        0,
942: /*65*/ 0,
943:        0,
944:        0,
945:        0,
946:        0,
947: /*70*/ 0,
948:        0,
949:        0,
950:        0,
951:        0,
952: /*75*/ 0,
953:        0,
954:        0,
955:        0,
956:        0,
957: /*80*/ 0,
958:        0,
959:        0,
960:        0,
961:        0,
962: /*85*/ MatLoad_MPIDense};

964: EXTERN_C_BEGIN
967: int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
968: {
969:   Mat_MPIDense *a;
970:   int          ierr;

973:   mat->preallocated = PETSC_TRUE;
974:   /* Note:  For now, when data is specified above, this assumes the user correctly
975:    allocates the local dense storage space.  We should add error checking. */

977:   a    = (Mat_MPIDense*)mat->data;
978:   MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);
979:   PetscLogObjectParent(mat,a->A);
980:   return(0);
981: }
982: EXTERN_C_END

984: /*MC
985:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

987:    Options Database Keys:
988: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()

990:   Level: beginner

992: .seealso: MatCreateMPIDense
993: M*/

995: EXTERN_C_BEGIN
998: int MatCreate_MPIDense(Mat mat)
999: {
1000:   Mat_MPIDense *a;
1001:   int          ierr,i;

1004:   PetscNew(Mat_MPIDense,&a);
1005:   mat->data         = (void*)a;
1006:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1007:   mat->factor       = 0;
1008:   mat->mapping      = 0;

1010:   a->factor       = 0;
1011:   mat->insertmode = NOT_SET_VALUES;
1012:   MPI_Comm_rank(mat->comm,&a->rank);
1013:   MPI_Comm_size(mat->comm,&a->size);

1015:   PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
1016:   PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
1017:   a->nvec = mat->n;

1019:   /* the information in the maps duplicates the information computed below, eventually 
1020:      we should remove the duplicate information that is not contained in the maps */
1021:   PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
1022:   PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);

1024:   /* build local table of row and column ownerships */
1025:   PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
1026:   a->cowners = a->rowners + a->size + 1;
1027:   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1028:   MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
1029:   a->rowners[0] = 0;
1030:   for (i=2; i<=a->size; i++) {
1031:     a->rowners[i] += a->rowners[i-1];
1032:   }
1033:   a->rstart = a->rowners[a->rank];
1034:   a->rend   = a->rowners[a->rank+1];
1035:   MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
1036:   a->cowners[0] = 0;
1037:   for (i=2; i<=a->size; i++) {
1038:     a->cowners[i] += a->cowners[i-1];
1039:   }

1041:   /* build cache for off array entries formed */
1042:   a->donotstash = PETSC_FALSE;
1043:   MatStashCreate_Private(mat->comm,1,&mat->stash);

1045:   /* stuff used for matrix vector multiply */
1046:   a->lvec        = 0;
1047:   a->Mvctx       = 0;
1048:   a->roworiented = PETSC_TRUE;

1050:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1051:                                      "MatGetDiagonalBlock_MPIDense",
1052:                                      MatGetDiagonalBlock_MPIDense);
1053:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1054:                                      "MatMPIDenseSetPreallocation_MPIDense",
1055:                                      MatMPIDenseSetPreallocation_MPIDense);
1056:   return(0);
1057: }
1058: EXTERN_C_END

1060: /*MC
1061:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1063:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1064:    and MATMPIDENSE otherwise.

1066:    Options Database Keys:
1067: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1069:   Level: beginner

1071: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1072: M*/

1074: EXTERN_C_BEGIN
1077: int MatCreate_Dense(Mat A) {
1078:   int ierr,size;

1081:   PetscObjectChangeTypeName((PetscObject)A,MATDENSE);
1082:   MPI_Comm_size(A->comm,&size);
1083:   if (size == 1) {
1084:     MatSetType(A,MATSEQDENSE);
1085:   } else {
1086:     MatSetType(A,MATMPIDENSE);
1087:   }
1088:   return(0);
1089: }
1090: EXTERN_C_END

1094: /*@C
1095:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1097:    Not collective

1099:    Input Parameters:
1100: .  A - the matrix
1101: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1102:    to control all matrix memory allocation.

1104:    Notes:
1105:    The dense format is fully compatible with standard Fortran 77
1106:    storage by columns.

1108:    The data input variable is intended primarily for Fortran programmers
1109:    who wish to allocate their own matrix memory space.  Most users should
1110:    set data=PETSC_NULL.

1112:    Level: intermediate

1114: .keywords: matrix,dense, parallel

1116: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1117: @*/
1118: int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1119: {
1120:   int ierr,(*f)(Mat,PetscScalar *);

1123:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1124:   if (f) {
1125:     (*f)(mat,data);
1126:   }
1127:   return(0);
1128: }

1132: /*@C
1133:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

1135:    Collective on MPI_Comm

1137:    Input Parameters:
1138: +  comm - MPI communicator
1139: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1140: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1141: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1142: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1143: -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1144:    to control all matrix memory allocation.

1146:    Output Parameter:
1147: .  A - the matrix

1149:    Notes:
1150:    The dense format is fully compatible with standard Fortran 77
1151:    storage by columns.

1153:    The data input variable is intended primarily for Fortran programmers
1154:    who wish to allocate their own matrix memory space.  Most users should
1155:    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).

1157:    The user MUST specify either the local or global matrix dimensions
1158:    (possibly both).

1160:    Level: intermediate

1162: .keywords: matrix,dense, parallel

1164: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1165: @*/
1166: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1167: {
1168:   int ierr,size;

1171:   MatCreate(comm,m,n,M,N,A);
1172:   MPI_Comm_size(comm,&size);
1173:   if (size > 1) {
1174:     MatSetType(*A,MATMPIDENSE);
1175:     MatMPIDenseSetPreallocation(*A,data);
1176:   } else {
1177:     MatSetType(*A,MATSEQDENSE);
1178:     MatSeqDenseSetPreallocation(*A,data);
1179:   }
1180:   return(0);
1181: }

1185: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1186: {
1187:   Mat          mat;
1188:   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1189:   int          ierr;

1192:   *newmat       = 0;
1193:   MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1194:   MatSetType(mat,MATMPIDENSE);
1195:   PetscNew(Mat_MPIDense,&a);
1196:   mat->data         = (void*)a;
1197:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1198:   mat->factor       = A->factor;
1199:   mat->assembled    = PETSC_TRUE;
1200:   mat->preallocated = PETSC_TRUE;

1202:   a->rstart       = oldmat->rstart;
1203:   a->rend         = oldmat->rend;
1204:   a->size         = oldmat->size;
1205:   a->rank         = oldmat->rank;
1206:   mat->insertmode = NOT_SET_VALUES;
1207:   a->nvec         = oldmat->nvec;
1208:   a->donotstash   = oldmat->donotstash;
1209:   PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1210:   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1211:   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1212:   MatStashCreate_Private(A->comm,1,&mat->stash);

1214:   MatSetUpMultiply_MPIDense(mat);
1215:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1216:   PetscLogObjectParent(mat,a->A);
1217:   *newmat = mat;
1218:   return(0);
1219: }

1221:  #include petscsys.h

1225: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1226: {
1227:   int          *rowners,i,size,rank,m,ierr,nz,j;
1228:   PetscScalar  *array,*vals,*vals_ptr;
1229:   MPI_Status   status;

1232:   MPI_Comm_rank(comm,&rank);
1233:   MPI_Comm_size(comm,&size);

1235:   /* determine ownership of all rows */
1236:   m          = M/size + ((M % size) > rank);
1237:   PetscMalloc((size+2)*sizeof(int),&rowners);
1238:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1239:   rowners[0] = 0;
1240:   for (i=2; i<=size; i++) {
1241:     rowners[i] += rowners[i-1];
1242:   }

1244:   MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1245:   MatGetArray(*newmat,&array);

1247:   if (!rank) {
1248:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1250:     /* read in my part of the matrix numerical values  */
1251:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1252: 
1253:     /* insert into matrix-by row (this is why cannot directly read into array */
1254:     vals_ptr = vals;
1255:     for (i=0; i<m; i++) {
1256:       for (j=0; j<N; j++) {
1257:         array[i + j*m] = *vals_ptr++;
1258:       }
1259:     }

1261:     /* read in other processors and ship out */
1262:     for (i=1; i<size; i++) {
1263:       nz   = (rowners[i+1] - rowners[i])*N;
1264:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1265:       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1266:     }
1267:   } else {
1268:     /* receive numeric values */
1269:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1271:     /* receive message of values*/
1272:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);

1274:     /* insert into matrix-by row (this is why cannot directly read into array */
1275:     vals_ptr = vals;
1276:     for (i=0; i<m; i++) {
1277:       for (j=0; j<N; j++) {
1278:         array[i + j*m] = *vals_ptr++;
1279:       }
1280:     }
1281:   }
1282:   PetscFree(rowners);
1283:   PetscFree(vals);
1284:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1285:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1286:   return(0);
1287: }

1291: int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1292: {
1293:   Mat          A;
1294:   PetscScalar  *vals,*svals;
1295:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1296:   MPI_Status   status;
1297:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1298:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1299:   int          tag = ((PetscObject)viewer)->tag;
1300:   int          i,nz,ierr,j,rstart,rend,fd;

1303:   MPI_Comm_size(comm,&size);
1304:   MPI_Comm_rank(comm,&rank);
1305:   if (!rank) {
1306:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1307:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1308:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1309:   }

1311:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1312:   M = header[1]; N = header[2]; nz = header[3];

1314:   /*
1315:        Handle case where matrix is stored on disk as a dense matrix 
1316:   */
1317:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1318:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1319:     return(0);
1320:   }

1322:   /* determine ownership of all rows */
1323:   m          = M/size + ((M % size) > rank);
1324:   PetscMalloc((size+2)*sizeof(int),&rowners);
1325:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1326:   rowners[0] = 0;
1327:   for (i=2; i<=size; i++) {
1328:     rowners[i] += rowners[i-1];
1329:   }
1330:   rstart = rowners[rank];
1331:   rend   = rowners[rank+1];

1333:   /* distribute row lengths to all processors */
1334:   PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);
1335:   offlens = ourlens + (rend-rstart);
1336:   if (!rank) {
1337:     PetscMalloc(M*sizeof(int),&rowlengths);
1338:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1339:     PetscMalloc(size*sizeof(int),&sndcounts);
1340:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1341:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1342:     PetscFree(sndcounts);
1343:   } else {
1344:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1345:   }

1347:   if (!rank) {
1348:     /* calculate the number of nonzeros on each processor */
1349:     PetscMalloc(size*sizeof(int),&procsnz);
1350:     PetscMemzero(procsnz,size*sizeof(int));
1351:     for (i=0; i<size; i++) {
1352:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1353:         procsnz[i] += rowlengths[j];
1354:       }
1355:     }
1356:     PetscFree(rowlengths);

1358:     /* determine max buffer needed and allocate it */
1359:     maxnz = 0;
1360:     for (i=0; i<size; i++) {
1361:       maxnz = PetscMax(maxnz,procsnz[i]);
1362:     }
1363:     PetscMalloc(maxnz*sizeof(int),&cols);

1365:     /* read in my part of the matrix column indices  */
1366:     nz = procsnz[0];
1367:     PetscMalloc(nz*sizeof(int),&mycols);
1368:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1370:     /* read in every one elses and ship off */
1371:     for (i=1; i<size; i++) {
1372:       nz   = procsnz[i];
1373:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1374:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1375:     }
1376:     PetscFree(cols);
1377:   } else {
1378:     /* determine buffer space needed for message */
1379:     nz = 0;
1380:     for (i=0; i<m; i++) {
1381:       nz += ourlens[i];
1382:     }
1383:     PetscMalloc(nz*sizeof(int),&mycols);

1385:     /* receive message of column indices*/
1386:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1387:     MPI_Get_count(&status,MPI_INT,&maxnz);
1388:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1389:   }

1391:   /* loop over local rows, determining number of off diagonal entries */
1392:   PetscMemzero(offlens,m*sizeof(int));
1393:   jj = 0;
1394:   for (i=0; i<m; i++) {
1395:     for (j=0; j<ourlens[i]; j++) {
1396:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1397:       jj++;
1398:     }
1399:   }

1401:   /* create our matrix */
1402:   for (i=0; i<m; i++) {
1403:     ourlens[i] -= offlens[i];
1404:   }
1405:   MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1406:   A = *newmat;
1407:   for (i=0; i<m; i++) {
1408:     ourlens[i] += offlens[i];
1409:   }

1411:   if (!rank) {
1412:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1414:     /* read in my part of the matrix numerical values  */
1415:     nz = procsnz[0];
1416:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1417: 
1418:     /* insert into matrix */
1419:     jj      = rstart;
1420:     smycols = mycols;
1421:     svals   = vals;
1422:     for (i=0; i<m; i++) {
1423:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1424:       smycols += ourlens[i];
1425:       svals   += ourlens[i];
1426:       jj++;
1427:     }

1429:     /* read in other processors and ship out */
1430:     for (i=1; i<size; i++) {
1431:       nz   = procsnz[i];
1432:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1433:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1434:     }
1435:     PetscFree(procsnz);
1436:   } else {
1437:     /* receive numeric values */
1438:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1440:     /* receive message of values*/
1441:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1442:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1443:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1445:     /* insert into matrix */
1446:     jj      = rstart;
1447:     smycols = mycols;
1448:     svals   = vals;
1449:     for (i=0; i<m; i++) {
1450:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1451:       smycols += ourlens[i];
1452:       svals   += ourlens[i];
1453:       jj++;
1454:     }
1455:   }
1456:   PetscFree(ourlens);
1457:   PetscFree(vals);
1458:   PetscFree(mycols);
1459:   PetscFree(rowners);

1461:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1462:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1463:   return(0);
1464: }