Actual source code: mpidense.c
1: /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
6:
7: #include src/mat/impls/dense/mpi/mpidense.h
8: #include src/vec/vecimpl.h
10: EXTERN_C_BEGIN
13: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14: {
15: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16: int m = A->m,rstart = mdn->rstart,ierr;
17: PetscScalar *array;
18: MPI_Comm comm;
21: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
23: /* The reuse aspect is not implemented efficiently */
24: if (reuse) { MatDestroy(*B);}
26: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
27: MatGetArray(mdn->A,&array);
28: MatCreateSeqDense(comm,m,m,array+m*rstart,B);
29: MatRestoreArray(mdn->A,&array);
30: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
31: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
32:
33: *iscopy = PETSC_TRUE;
34: return(0);
35: }
36: EXTERN_C_END
40: int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41: {
42: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43: int ierr,i,j,rstart = A->rstart,rend = A->rend,row;
44: PetscTruth roworiented = A->roworiented;
47: for (i=0; i<m; i++) {
48: if (idxm[i] < 0) continue;
49: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
50: if (idxm[i] >= rstart && idxm[i] < rend) {
51: row = idxm[i] - rstart;
52: if (roworiented) {
53: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
54: } else {
55: for (j=0; j<n; j++) {
56: if (idxn[j] < 0) continue;
57: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
58: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
59: }
60: }
61: } else {
62: if (!A->donotstash) {
63: if (roworiented) {
64: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
65: } else {
66: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
67: }
68: }
69: }
70: }
71: return(0);
72: }
76: int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
77: {
78: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
79: int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82: for (i=0; i<m; i++) {
83: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
84: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
85: if (idxm[i] >= rstart && idxm[i] < rend) {
86: row = idxm[i] - rstart;
87: for (j=0; j<n; j++) {
88: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
89: if (idxn[j] >= mat->N) {
90: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
91: }
92: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
93: }
94: } else {
95: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
96: }
97: }
98: return(0);
99: }
103: int MatGetArray_MPIDense(Mat A,PetscScalar *array[])
104: {
105: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
106: int ierr;
109: MatGetArray(a->A,array);
110: return(0);
111: }
115: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
116: {
117: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
118: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
119: int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
120: PetscScalar *av,*bv,*v = lmat->v;
121: Mat newmat;
124: ISGetIndices(isrow,&irow);
125: ISGetIndices(iscol,&icol);
126: ISGetLocalSize(isrow,&nrows);
127: ISGetLocalSize(iscol,&ncols);
129: /* No parallel redistribution currently supported! Should really check each index set
130: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
131: original matrix! */
133: MatGetLocalSize(A,&nlrows,&nlcols);
134: MatGetOwnershipRange(A,&rstart,&rend);
135:
136: /* Check submatrix call */
137: if (scall == MAT_REUSE_MATRIX) {
138: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
139: /* Really need to test rows and column sizes! */
140: newmat = *B;
141: } else {
142: /* Create and fill new matrix */
143: MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);
144: }
146: /* Now extract the data pointers and do the copy, column at a time */
147: newmatd = (Mat_MPIDense*)newmat->data;
148: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
149:
150: for (i=0; i<ncols; i++) {
151: av = v + nlrows*icol[i];
152: for (j=0; j<nrows; j++) {
153: *bv++ = av[irow[j] - rstart];
154: }
155: }
157: /* Assemble the matrices so that the correct flags are set */
158: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
159: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
161: /* Free work space */
162: ISRestoreIndices(isrow,&irow);
163: ISRestoreIndices(iscol,&icol);
164: *B = newmat;
165: return(0);
166: }
170: int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
171: {
173: return(0);
174: }
178: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
179: {
180: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
181: MPI_Comm comm = mat->comm;
182: int ierr,nstash,reallocs;
183: InsertMode addv;
186: /* make sure all processors are either in INSERTMODE or ADDMODE */
187: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
188: if (addv == (ADD_VALUES|INSERT_VALUES)) {
189: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
190: }
191: mat->insertmode = addv; /* in case this processor had no cache */
193: MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
194: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
195: PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
196: return(0);
197: }
201: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
202: {
203: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
204: int i,n,ierr,*row,*col,flg,j,rstart,ncols;
205: PetscScalar *val;
206: InsertMode addv=mat->insertmode;
209: /* wait on receives */
210: while (1) {
211: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
212: if (!flg) break;
213:
214: for (i=0; i<n;) {
215: /* Now identify the consecutive vals belonging to the same row */
216: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
217: if (j < n) ncols = j-i;
218: else ncols = n-i;
219: /* Now assemble all these values with a single function call */
220: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
221: i = j;
222: }
223: }
224: MatStashScatterEnd_Private(&mat->stash);
225:
226: MatAssemblyBegin(mdn->A,mode);
227: MatAssemblyEnd(mdn->A,mode);
229: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
230: MatSetUpMultiply_MPIDense(mat);
231: }
232: return(0);
233: }
237: int MatZeroEntries_MPIDense(Mat A)
238: {
239: int ierr;
240: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
243: MatZeroEntries(l->A);
244: return(0);
245: }
249: int MatGetBlockSize_MPIDense(Mat A,int *bs)
250: {
252: *bs = 1;
253: return(0);
254: }
256: /* the code does not do the diagonal entries correctly unless the
257: matrix is square and the column and row owerships are identical.
258: This is a BUG. The only way to fix it seems to be to access
259: mdn->A and mdn->B directly and not through the MatZeroRows()
260: routine.
261: */
264: int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
265: {
266: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
267: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
268: int *nprocs,j,idx,nsends;
269: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
270: int *rvalues,tag = A->tag,count,base,slen,n,*source;
271: int *lens,imdex,*lrows,*values;
272: MPI_Comm comm = A->comm;
273: MPI_Request *send_waits,*recv_waits;
274: MPI_Status recv_status,*send_status;
275: IS istmp;
276: PetscTruth found;
279: ISGetLocalSize(is,&N);
280: ISGetIndices(is,&rows);
282: /* first count number of contributors to each processor */
283: PetscMalloc(2*size*sizeof(int),&nprocs);
284: PetscMemzero(nprocs,2*size*sizeof(int));
285: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
286: for (i=0; i<N; i++) {
287: idx = rows[i];
288: found = PETSC_FALSE;
289: for (j=0; j<size; j++) {
290: if (idx >= owners[j] && idx < owners[j+1]) {
291: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
292: }
293: }
294: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
295: }
296: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
298: /* inform other processors of number of messages and max length*/
299: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
301: /* post receives: */
302: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
303: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
304: for (i=0; i<nrecvs; i++) {
305: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
306: }
308: /* do sends:
309: 1) starts[i] gives the starting index in svalues for stuff going to
310: the ith processor
311: */
312: PetscMalloc((N+1)*sizeof(int),&svalues);
313: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
314: PetscMalloc((size+1)*sizeof(int),&starts);
315: starts[0] = 0;
316: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
317: for (i=0; i<N; i++) {
318: svalues[starts[owner[i]]++] = rows[i];
319: }
320: ISRestoreIndices(is,&rows);
322: starts[0] = 0;
323: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
324: count = 0;
325: for (i=0; i<size; i++) {
326: if (nprocs[2*i+1]) {
327: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
328: }
329: }
330: PetscFree(starts);
332: base = owners[rank];
334: /* wait on receives */
335: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
336: source = lens + nrecvs;
337: count = nrecvs; slen = 0;
338: while (count) {
339: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
340: /* unpack receives into our local space */
341: MPI_Get_count(&recv_status,MPI_INT,&n);
342: source[imdex] = recv_status.MPI_SOURCE;
343: lens[imdex] = n;
344: slen += n;
345: count--;
346: }
347: PetscFree(recv_waits);
348:
349: /* move the data into the send scatter */
350: PetscMalloc((slen+1)*sizeof(int),&lrows);
351: count = 0;
352: for (i=0; i<nrecvs; i++) {
353: values = rvalues + i*nmax;
354: for (j=0; j<lens[i]; j++) {
355: lrows[count++] = values[j] - base;
356: }
357: }
358: PetscFree(rvalues);
359: PetscFree(lens);
360: PetscFree(owner);
361: PetscFree(nprocs);
362:
363: /* actually zap the local rows */
364: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
365: PetscLogObjectParent(A,istmp);
366: PetscFree(lrows);
367: MatZeroRows(l->A,istmp,diag);
368: ISDestroy(istmp);
370: /* wait on sends */
371: if (nsends) {
372: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
373: MPI_Waitall(nsends,send_waits,send_status);
374: PetscFree(send_status);
375: }
376: PetscFree(send_waits);
377: PetscFree(svalues);
379: return(0);
380: }
384: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
385: {
386: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
387: int ierr;
390: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
391: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
392: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
393: return(0);
394: }
398: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
399: {
400: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
401: int ierr;
404: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
405: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
406: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
407: return(0);
408: }
412: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
413: {
414: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
415: int ierr;
416: PetscScalar zero = 0.0;
419: VecSet(&zero,yy);
420: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
421: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
422: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
423: return(0);
424: }
428: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
429: {
430: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
431: int ierr;
434: VecCopy(yy,zz);
435: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
436: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
437: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
438: return(0);
439: }
443: int MatGetDiagonal_MPIDense(Mat A,Vec v)
444: {
445: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
446: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
447: int ierr,len,i,n,m = A->m,radd;
448: PetscScalar *x,zero = 0.0;
449:
451: VecSet(&zero,v);
452: VecGetArray(v,&x);
453: VecGetSize(v,&n);
454: if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
455: len = PetscMin(a->A->m,a->A->n);
456: radd = a->rstart*m;
457: for (i=0; i<len; i++) {
458: x[i] = aloc->v[radd + i*m + i];
459: }
460: VecRestoreArray(v,&x);
461: return(0);
462: }
466: int MatDestroy_MPIDense(Mat mat)
467: {
468: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
469: int ierr;
473: #if defined(PETSC_USE_LOG)
474: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
475: #endif
476: MatStashDestroy_Private(&mat->stash);
477: PetscFree(mdn->rowners);
478: MatDestroy(mdn->A);
479: if (mdn->lvec) VecDestroy(mdn->lvec);
480: if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx);
481: if (mdn->factor) {
482: if (mdn->factor->temp) {PetscFree(mdn->factor->temp);}
483: if (mdn->factor->tag) {PetscFree(mdn->factor->tag);}
484: if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
485: PetscFree(mdn->factor);
486: }
487: PetscFree(mdn);
488: return(0);
489: }
493: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
494: {
495: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
496: int ierr;
499: if (mdn->size == 1) {
500: MatView(mdn->A,viewer);
501: }
502: else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
503: return(0);
504: }
508: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
509: {
510: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
511: int ierr,size = mdn->size,rank = mdn->rank;
512: PetscViewerType vtype;
513: PetscTruth isascii,isdraw;
514: PetscViewer sviewer;
515: PetscViewerFormat format;
518: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
519: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
520: if (isascii) {
521: PetscViewerGetType(viewer,&vtype);
522: PetscViewerGetFormat(viewer,&format);
523: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
524: MatInfo info;
525: MatGetInfo(mat,MAT_LOCAL,&info);
526: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
527: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
528: PetscViewerFlush(viewer);
529: VecScatterView(mdn->Mvctx,viewer);
530: return(0);
531: } else if (format == PETSC_VIEWER_ASCII_INFO) {
532: return(0);
533: }
534: } else if (isdraw) {
535: PetscDraw draw;
536: PetscTruth isnull;
538: PetscViewerDrawGetDraw(viewer,0,&draw);
539: PetscDrawIsNull(draw,&isnull);
540: if (isnull) return(0);
541: }
543: if (size == 1) {
544: MatView(mdn->A,viewer);
545: } else {
546: /* assemble the entire matrix onto first processor. */
547: Mat A;
548: int M = mat->M,N = mat->N,m,row,i,nz,*cols;
549: PetscScalar *vals;
551: if (!rank) {
552: MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);
553: } else {
554: MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);
555: }
556: PetscLogObjectParent(mat,A);
558: /* Copy the matrix ... This isn't the most efficient means,
559: but it's quick for now */
560: row = mdn->rstart; m = mdn->A->m;
561: for (i=0; i<m; i++) {
562: MatGetRow(mat,row,&nz,&cols,&vals);
563: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
564: MatRestoreRow(mat,row,&nz,&cols,&vals);
565: row++;
566: }
568: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
569: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
570: PetscViewerGetSingleton(viewer,&sviewer);
571: if (!rank) {
572: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
573: }
574: PetscViewerRestoreSingleton(viewer,&sviewer);
575: PetscViewerFlush(viewer);
576: MatDestroy(A);
577: }
578: return(0);
579: }
583: int MatView_MPIDense(Mat mat,PetscViewer viewer)
584: {
585: int ierr;
586: PetscTruth isascii,isbinary,isdraw,issocket;
587:
589:
590: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
591: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
592: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
593: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
595: if (isascii || issocket || isdraw) {
596: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
597: } else if (isbinary) {
598: MatView_MPIDense_Binary(mat,viewer);
599: } else {
600: SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
601: }
602: return(0);
603: }
607: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
608: {
609: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
610: Mat mdn = mat->A;
611: int ierr;
612: PetscReal isend[5],irecv[5];
615: info->rows_global = (double)A->M;
616: info->columns_global = (double)A->N;
617: info->rows_local = (double)A->m;
618: info->columns_local = (double)A->N;
619: info->block_size = 1.0;
620: MatGetInfo(mdn,MAT_LOCAL,info);
621: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
622: isend[3] = info->memory; isend[4] = info->mallocs;
623: if (flag == MAT_LOCAL) {
624: info->nz_used = isend[0];
625: info->nz_allocated = isend[1];
626: info->nz_unneeded = isend[2];
627: info->memory = isend[3];
628: info->mallocs = isend[4];
629: } else if (flag == MAT_GLOBAL_MAX) {
630: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
631: info->nz_used = irecv[0];
632: info->nz_allocated = irecv[1];
633: info->nz_unneeded = irecv[2];
634: info->memory = irecv[3];
635: info->mallocs = irecv[4];
636: } else if (flag == MAT_GLOBAL_SUM) {
637: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
638: info->nz_used = irecv[0];
639: info->nz_allocated = irecv[1];
640: info->nz_unneeded = irecv[2];
641: info->memory = irecv[3];
642: info->mallocs = irecv[4];
643: }
644: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
645: info->fill_ratio_needed = 0;
646: info->factor_mallocs = 0;
647: return(0);
648: }
652: int MatSetOption_MPIDense(Mat A,MatOption op)
653: {
654: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
655: int ierr;
658: switch (op) {
659: case MAT_NO_NEW_NONZERO_LOCATIONS:
660: case MAT_YES_NEW_NONZERO_LOCATIONS:
661: case MAT_NEW_NONZERO_LOCATION_ERR:
662: case MAT_NEW_NONZERO_ALLOCATION_ERR:
663: case MAT_COLUMNS_SORTED:
664: case MAT_COLUMNS_UNSORTED:
665: MatSetOption(a->A,op);
666: break;
667: case MAT_ROW_ORIENTED:
668: a->roworiented = PETSC_TRUE;
669: MatSetOption(a->A,op);
670: break;
671: case MAT_ROWS_SORTED:
672: case MAT_ROWS_UNSORTED:
673: case MAT_YES_NEW_DIAGONALS:
674: case MAT_USE_HASH_TABLE:
675: PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
676: break;
677: case MAT_COLUMN_ORIENTED:
678: a->roworiented = PETSC_FALSE;
679: MatSetOption(a->A,op);
680: break;
681: case MAT_IGNORE_OFF_PROC_ENTRIES:
682: a->donotstash = PETSC_TRUE;
683: break;
684: case MAT_NO_NEW_DIAGONALS:
685: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
686: default:
687: SETERRQ(PETSC_ERR_SUP,"unknown option");
688: }
689: return(0);
690: }
694: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
695: {
696: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
697: int lrow,rstart = mat->rstart,rend = mat->rend,ierr;
700: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
701: lrow = row - rstart;
702: MatGetRow(mat->A,lrow,nz,idx,v);
703: return(0);
704: }
708: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
709: {
713: if (idx) {PetscFree(*idx);}
714: if (v) {PetscFree(*v);}
715: return(0);
716: }
720: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
721: {
722: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
723: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
724: PetscScalar *l,*r,x,*v;
725: int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
728: MatGetLocalSize(A,&s2,&s3);
729: if (ll) {
730: VecGetLocalSize(ll,&s2a);
731: if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
732: VecGetArray(ll,&l);
733: for (i=0; i<m; i++) {
734: x = l[i];
735: v = mat->v + i;
736: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
737: }
738: VecRestoreArray(ll,&l);
739: PetscLogFlops(n*m);
740: }
741: if (rr) {
742: VecGetSize(rr,&s3a);
743: if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
744: VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
745: VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
746: VecGetArray(mdn->lvec,&r);
747: for (i=0; i<n; i++) {
748: x = r[i];
749: v = mat->v + i*m;
750: for (j=0; j<m; j++) { (*v++) *= x;}
751: }
752: VecRestoreArray(mdn->lvec,&r);
753: PetscLogFlops(n*m);
754: }
755: return(0);
756: }
760: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
761: {
762: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
763: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
764: int ierr,i,j;
765: PetscReal sum = 0.0;
766: PetscScalar *v = mat->v;
769: if (mdn->size == 1) {
770: MatNorm(mdn->A,type,nrm);
771: } else {
772: if (type == NORM_FROBENIUS) {
773: for (i=0; i<mdn->A->n*mdn->A->m; i++) {
774: #if defined(PETSC_USE_COMPLEX)
775: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
776: #else
777: sum += (*v)*(*v); v++;
778: #endif
779: }
780: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
781: *nrm = sqrt(*nrm);
782: PetscLogFlops(2*mdn->A->n*mdn->A->m);
783: } else if (type == NORM_1) {
784: PetscReal *tmp,*tmp2;
785: PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
786: tmp2 = tmp + A->N;
787: PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
788: *nrm = 0.0;
789: v = mat->v;
790: for (j=0; j<mdn->A->n; j++) {
791: for (i=0; i<mdn->A->m; i++) {
792: tmp[j] += PetscAbsScalar(*v); v++;
793: }
794: }
795: MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
796: for (j=0; j<A->N; j++) {
797: if (tmp2[j] > *nrm) *nrm = tmp2[j];
798: }
799: PetscFree(tmp);
800: PetscLogFlops(A->n*A->m);
801: } else if (type == NORM_INFINITY) { /* max row norm */
802: PetscReal ntemp;
803: MatNorm(mdn->A,type,&ntemp);
804: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
805: } else {
806: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
807: }
808: }
809: return(0);
810: }
814: int MatTranspose_MPIDense(Mat A,Mat *matout)
815: {
816: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
817: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
818: Mat B;
819: int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
820: int j,i,ierr;
821: PetscScalar *v;
824: if (!matout && M != N) {
825: SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
826: }
827: MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
829: m = a->A->m; n = a->A->n; v = Aloc->v;
830: PetscMalloc(n*sizeof(int),&rwork);
831: for (j=0; j<n; j++) {
832: for (i=0; i<m; i++) rwork[i] = rstart + i;
833: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
834: v += m;
835: }
836: PetscFree(rwork);
837: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
838: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
839: if (matout) {
840: *matout = B;
841: } else {
842: MatHeaderCopy(A,B);
843: }
844: return(0);
845: }
847: #include petscblaslapack.h
850: int MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
851: {
852: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
853: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
854: int one = 1,nz;
857: nz = inA->m*inA->N;
858: BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
859: PetscLogFlops(nz);
860: return(0);
861: }
863: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
867: int MatSetUpPreallocation_MPIDense(Mat A)
868: {
869: int ierr;
872: MatMPIDenseSetPreallocation(A,0);
873: return(0);
874: }
876: /* -------------------------------------------------------------------*/
877: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
878: MatGetRow_MPIDense,
879: MatRestoreRow_MPIDense,
880: MatMult_MPIDense,
881: /* 4*/ MatMultAdd_MPIDense,
882: MatMultTranspose_MPIDense,
883: MatMultTransposeAdd_MPIDense,
884: 0,
885: 0,
886: 0,
887: /*10*/ 0,
888: 0,
889: 0,
890: 0,
891: MatTranspose_MPIDense,
892: /*15*/ MatGetInfo_MPIDense,
893: 0,
894: MatGetDiagonal_MPIDense,
895: MatDiagonalScale_MPIDense,
896: MatNorm_MPIDense,
897: /*20*/ MatAssemblyBegin_MPIDense,
898: MatAssemblyEnd_MPIDense,
899: 0,
900: MatSetOption_MPIDense,
901: MatZeroEntries_MPIDense,
902: /*25*/ MatZeroRows_MPIDense,
903: 0,
904: 0,
905: 0,
906: 0,
907: /*30*/ MatSetUpPreallocation_MPIDense,
908: 0,
909: 0,
910: MatGetArray_MPIDense,
911: MatRestoreArray_MPIDense,
912: /*35*/ MatDuplicate_MPIDense,
913: 0,
914: 0,
915: 0,
916: 0,
917: /*40*/ 0,
918: MatGetSubMatrices_MPIDense,
919: 0,
920: MatGetValues_MPIDense,
921: 0,
922: /*45*/ 0,
923: MatScale_MPIDense,
924: 0,
925: 0,
926: 0,
927: /*50*/ MatGetBlockSize_MPIDense,
928: 0,
929: 0,
930: 0,
931: 0,
932: /*55*/ 0,
933: 0,
934: 0,
935: 0,
936: 0,
937: /*60*/ MatGetSubMatrix_MPIDense,
938: MatDestroy_MPIDense,
939: MatView_MPIDense,
940: MatGetPetscMaps_Petsc,
941: 0,
942: /*65*/ 0,
943: 0,
944: 0,
945: 0,
946: 0,
947: /*70*/ 0,
948: 0,
949: 0,
950: 0,
951: 0,
952: /*75*/ 0,
953: 0,
954: 0,
955: 0,
956: 0,
957: /*80*/ 0,
958: 0,
959: 0,
960: 0,
961: 0,
962: /*85*/ MatLoad_MPIDense};
964: EXTERN_C_BEGIN
967: int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
968: {
969: Mat_MPIDense *a;
970: int ierr;
973: mat->preallocated = PETSC_TRUE;
974: /* Note: For now, when data is specified above, this assumes the user correctly
975: allocates the local dense storage space. We should add error checking. */
977: a = (Mat_MPIDense*)mat->data;
978: MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);
979: PetscLogObjectParent(mat,a->A);
980: return(0);
981: }
982: EXTERN_C_END
984: /*MC
985: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
987: Options Database Keys:
988: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
990: Level: beginner
992: .seealso: MatCreateMPIDense
993: M*/
995: EXTERN_C_BEGIN
998: int MatCreate_MPIDense(Mat mat)
999: {
1000: Mat_MPIDense *a;
1001: int ierr,i;
1004: PetscNew(Mat_MPIDense,&a);
1005: mat->data = (void*)a;
1006: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1007: mat->factor = 0;
1008: mat->mapping = 0;
1010: a->factor = 0;
1011: mat->insertmode = NOT_SET_VALUES;
1012: MPI_Comm_rank(mat->comm,&a->rank);
1013: MPI_Comm_size(mat->comm,&a->size);
1015: PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
1016: PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
1017: a->nvec = mat->n;
1019: /* the information in the maps duplicates the information computed below, eventually
1020: we should remove the duplicate information that is not contained in the maps */
1021: PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
1022: PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);
1024: /* build local table of row and column ownerships */
1025: PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
1026: a->cowners = a->rowners + a->size + 1;
1027: PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1028: MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
1029: a->rowners[0] = 0;
1030: for (i=2; i<=a->size; i++) {
1031: a->rowners[i] += a->rowners[i-1];
1032: }
1033: a->rstart = a->rowners[a->rank];
1034: a->rend = a->rowners[a->rank+1];
1035: MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
1036: a->cowners[0] = 0;
1037: for (i=2; i<=a->size; i++) {
1038: a->cowners[i] += a->cowners[i-1];
1039: }
1041: /* build cache for off array entries formed */
1042: a->donotstash = PETSC_FALSE;
1043: MatStashCreate_Private(mat->comm,1,&mat->stash);
1045: /* stuff used for matrix vector multiply */
1046: a->lvec = 0;
1047: a->Mvctx = 0;
1048: a->roworiented = PETSC_TRUE;
1050: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1051: "MatGetDiagonalBlock_MPIDense",
1052: MatGetDiagonalBlock_MPIDense);
1053: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1054: "MatMPIDenseSetPreallocation_MPIDense",
1055: MatMPIDenseSetPreallocation_MPIDense);
1056: return(0);
1057: }
1058: EXTERN_C_END
1060: /*MC
1061: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1063: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1064: and MATMPIDENSE otherwise.
1066: Options Database Keys:
1067: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1069: Level: beginner
1071: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1072: M*/
1074: EXTERN_C_BEGIN
1077: int MatCreate_Dense(Mat A) {
1078: int ierr,size;
1081: PetscObjectChangeTypeName((PetscObject)A,MATDENSE);
1082: MPI_Comm_size(A->comm,&size);
1083: if (size == 1) {
1084: MatSetType(A,MATSEQDENSE);
1085: } else {
1086: MatSetType(A,MATMPIDENSE);
1087: }
1088: return(0);
1089: }
1090: EXTERN_C_END
1094: /*@C
1095: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1097: Not collective
1099: Input Parameters:
1100: . A - the matrix
1101: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1102: to control all matrix memory allocation.
1104: Notes:
1105: The dense format is fully compatible with standard Fortran 77
1106: storage by columns.
1108: The data input variable is intended primarily for Fortran programmers
1109: who wish to allocate their own matrix memory space. Most users should
1110: set data=PETSC_NULL.
1112: Level: intermediate
1114: .keywords: matrix,dense, parallel
1116: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1117: @*/
1118: int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1119: {
1120: int ierr,(*f)(Mat,PetscScalar *);
1123: PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1124: if (f) {
1125: (*f)(mat,data);
1126: }
1127: return(0);
1128: }
1132: /*@C
1133: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1135: Collective on MPI_Comm
1137: Input Parameters:
1138: + comm - MPI communicator
1139: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1140: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1141: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1142: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1143: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1144: to control all matrix memory allocation.
1146: Output Parameter:
1147: . A - the matrix
1149: Notes:
1150: The dense format is fully compatible with standard Fortran 77
1151: storage by columns.
1153: The data input variable is intended primarily for Fortran programmers
1154: who wish to allocate their own matrix memory space. Most users should
1155: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1157: The user MUST specify either the local or global matrix dimensions
1158: (possibly both).
1160: Level: intermediate
1162: .keywords: matrix,dense, parallel
1164: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1165: @*/
1166: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1167: {
1168: int ierr,size;
1171: MatCreate(comm,m,n,M,N,A);
1172: MPI_Comm_size(comm,&size);
1173: if (size > 1) {
1174: MatSetType(*A,MATMPIDENSE);
1175: MatMPIDenseSetPreallocation(*A,data);
1176: } else {
1177: MatSetType(*A,MATSEQDENSE);
1178: MatSeqDenseSetPreallocation(*A,data);
1179: }
1180: return(0);
1181: }
1185: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1186: {
1187: Mat mat;
1188: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1189: int ierr;
1192: *newmat = 0;
1193: MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1194: MatSetType(mat,MATMPIDENSE);
1195: PetscNew(Mat_MPIDense,&a);
1196: mat->data = (void*)a;
1197: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1198: mat->factor = A->factor;
1199: mat->assembled = PETSC_TRUE;
1200: mat->preallocated = PETSC_TRUE;
1202: a->rstart = oldmat->rstart;
1203: a->rend = oldmat->rend;
1204: a->size = oldmat->size;
1205: a->rank = oldmat->rank;
1206: mat->insertmode = NOT_SET_VALUES;
1207: a->nvec = oldmat->nvec;
1208: a->donotstash = oldmat->donotstash;
1209: PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1210: PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1211: PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1212: MatStashCreate_Private(A->comm,1,&mat->stash);
1214: MatSetUpMultiply_MPIDense(mat);
1215: MatDuplicate(oldmat->A,cpvalues,&a->A);
1216: PetscLogObjectParent(mat,a->A);
1217: *newmat = mat;
1218: return(0);
1219: }
1221: #include petscsys.h
1225: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1226: {
1227: int *rowners,i,size,rank,m,ierr,nz,j;
1228: PetscScalar *array,*vals,*vals_ptr;
1229: MPI_Status status;
1232: MPI_Comm_rank(comm,&rank);
1233: MPI_Comm_size(comm,&size);
1235: /* determine ownership of all rows */
1236: m = M/size + ((M % size) > rank);
1237: PetscMalloc((size+2)*sizeof(int),&rowners);
1238: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1239: rowners[0] = 0;
1240: for (i=2; i<=size; i++) {
1241: rowners[i] += rowners[i-1];
1242: }
1244: MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1245: MatGetArray(*newmat,&array);
1247: if (!rank) {
1248: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1250: /* read in my part of the matrix numerical values */
1251: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1252:
1253: /* insert into matrix-by row (this is why cannot directly read into array */
1254: vals_ptr = vals;
1255: for (i=0; i<m; i++) {
1256: for (j=0; j<N; j++) {
1257: array[i + j*m] = *vals_ptr++;
1258: }
1259: }
1261: /* read in other processors and ship out */
1262: for (i=1; i<size; i++) {
1263: nz = (rowners[i+1] - rowners[i])*N;
1264: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1265: MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1266: }
1267: } else {
1268: /* receive numeric values */
1269: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1271: /* receive message of values*/
1272: MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1274: /* insert into matrix-by row (this is why cannot directly read into array */
1275: vals_ptr = vals;
1276: for (i=0; i<m; i++) {
1277: for (j=0; j<N; j++) {
1278: array[i + j*m] = *vals_ptr++;
1279: }
1280: }
1281: }
1282: PetscFree(rowners);
1283: PetscFree(vals);
1284: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1285: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1286: return(0);
1287: }
1291: int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1292: {
1293: Mat A;
1294: PetscScalar *vals,*svals;
1295: MPI_Comm comm = ((PetscObject)viewer)->comm;
1296: MPI_Status status;
1297: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1298: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1299: int tag = ((PetscObject)viewer)->tag;
1300: int i,nz,ierr,j,rstart,rend,fd;
1303: MPI_Comm_size(comm,&size);
1304: MPI_Comm_rank(comm,&rank);
1305: if (!rank) {
1306: PetscViewerBinaryGetDescriptor(viewer,&fd);
1307: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1308: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1309: }
1311: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1312: M = header[1]; N = header[2]; nz = header[3];
1314: /*
1315: Handle case where matrix is stored on disk as a dense matrix
1316: */
1317: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1318: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1319: return(0);
1320: }
1322: /* determine ownership of all rows */
1323: m = M/size + ((M % size) > rank);
1324: PetscMalloc((size+2)*sizeof(int),&rowners);
1325: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1326: rowners[0] = 0;
1327: for (i=2; i<=size; i++) {
1328: rowners[i] += rowners[i-1];
1329: }
1330: rstart = rowners[rank];
1331: rend = rowners[rank+1];
1333: /* distribute row lengths to all processors */
1334: PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);
1335: offlens = ourlens + (rend-rstart);
1336: if (!rank) {
1337: PetscMalloc(M*sizeof(int),&rowlengths);
1338: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1339: PetscMalloc(size*sizeof(int),&sndcounts);
1340: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1341: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1342: PetscFree(sndcounts);
1343: } else {
1344: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1345: }
1347: if (!rank) {
1348: /* calculate the number of nonzeros on each processor */
1349: PetscMalloc(size*sizeof(int),&procsnz);
1350: PetscMemzero(procsnz,size*sizeof(int));
1351: for (i=0; i<size; i++) {
1352: for (j=rowners[i]; j< rowners[i+1]; j++) {
1353: procsnz[i] += rowlengths[j];
1354: }
1355: }
1356: PetscFree(rowlengths);
1358: /* determine max buffer needed and allocate it */
1359: maxnz = 0;
1360: for (i=0; i<size; i++) {
1361: maxnz = PetscMax(maxnz,procsnz[i]);
1362: }
1363: PetscMalloc(maxnz*sizeof(int),&cols);
1365: /* read in my part of the matrix column indices */
1366: nz = procsnz[0];
1367: PetscMalloc(nz*sizeof(int),&mycols);
1368: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1370: /* read in every one elses and ship off */
1371: for (i=1; i<size; i++) {
1372: nz = procsnz[i];
1373: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1374: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1375: }
1376: PetscFree(cols);
1377: } else {
1378: /* determine buffer space needed for message */
1379: nz = 0;
1380: for (i=0; i<m; i++) {
1381: nz += ourlens[i];
1382: }
1383: PetscMalloc(nz*sizeof(int),&mycols);
1385: /* receive message of column indices*/
1386: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1387: MPI_Get_count(&status,MPI_INT,&maxnz);
1388: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1389: }
1391: /* loop over local rows, determining number of off diagonal entries */
1392: PetscMemzero(offlens,m*sizeof(int));
1393: jj = 0;
1394: for (i=0; i<m; i++) {
1395: for (j=0; j<ourlens[i]; j++) {
1396: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1397: jj++;
1398: }
1399: }
1401: /* create our matrix */
1402: for (i=0; i<m; i++) {
1403: ourlens[i] -= offlens[i];
1404: }
1405: MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1406: A = *newmat;
1407: for (i=0; i<m; i++) {
1408: ourlens[i] += offlens[i];
1409: }
1411: if (!rank) {
1412: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1414: /* read in my part of the matrix numerical values */
1415: nz = procsnz[0];
1416: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1417:
1418: /* insert into matrix */
1419: jj = rstart;
1420: smycols = mycols;
1421: svals = vals;
1422: for (i=0; i<m; i++) {
1423: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1424: smycols += ourlens[i];
1425: svals += ourlens[i];
1426: jj++;
1427: }
1429: /* read in other processors and ship out */
1430: for (i=1; i<size; i++) {
1431: nz = procsnz[i];
1432: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1433: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1434: }
1435: PetscFree(procsnz);
1436: } else {
1437: /* receive numeric values */
1438: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1440: /* receive message of values*/
1441: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1442: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1443: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1445: /* insert into matrix */
1446: jj = rstart;
1447: smycols = mycols;
1448: svals = vals;
1449: for (i=0; i<m; i++) {
1450: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1451: smycols += ourlens[i];
1452: svals += ourlens[i];
1453: jj++;
1454: }
1455: }
1456: PetscFree(ourlens);
1457: PetscFree(vals);
1458: PetscFree(mycols);
1459: PetscFree(rowners);
1461: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1462: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1463: return(0);
1464: }