Actual source code: mpidense.c
1: /*$Id: mpidense.c,v 1.153 2001/03/23 23:21:49 balay Exp $*/
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
6:
7: #include src/mat/impls/dense/mpi/mpidense.h
8: #include src/vec/vecimpl.h
10: EXTERN_C_BEGIN
11: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
12: {
13: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
14: int m = A->m,rstart = mdn->rstart,ierr;
15: Scalar *array;
16: MPI_Comm comm;
19: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
21: /* The reuse aspect is not implemented efficiently */
22: if (reuse) { MatDestroy(*B);}
24: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
25: MatGetArray(mdn->A,&array);
26: MatCreateSeqDense(comm,m,m,array+m*rstart,B);
27: MatRestoreArray(mdn->A,&array);
28: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
29: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
30:
31: *iscopy = PETSC_TRUE;
32: return(0);
33: }
34: EXTERN_C_END
36: EXTERN int MatSetUpMultiply_MPIDense(Mat);
38: int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv)
39: {
40: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
41: int ierr,i,j,rstart = A->rstart,rend = A->rend,row;
42: PetscTruth roworiented = A->roworiented;
45: for (i=0; i<m; i++) {
46: if (idxm[i] < 0) continue;
47: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
48: if (idxm[i] >= rstart && idxm[i] < rend) {
49: row = idxm[i] - rstart;
50: if (roworiented) {
51: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
52: } else {
53: for (j=0; j<n; j++) {
54: if (idxn[j] < 0) continue;
55: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
56: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
57: }
58: }
59: } else {
60: if (!A->donotstash) {
61: if (roworiented) {
62: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
63: } else {
64: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
65: }
66: }
67: }
68: }
69: return(0);
70: }
72: int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
73: {
74: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
75: int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
78: for (i=0; i<m; i++) {
79: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
80: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
81: if (idxm[i] >= rstart && idxm[i] < rend) {
82: row = idxm[i] - rstart;
83: for (j=0; j<n; j++) {
84: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
85: if (idxn[j] >= mat->N) {
86: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
87: }
88: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
89: }
90: } else {
91: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
92: }
93: }
94: return(0);
95: }
97: int MatGetArray_MPIDense(Mat A,Scalar **array)
98: {
99: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
100: int ierr;
103: MatGetArray(a->A,array);
104: return(0);
105: }
107: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
108: {
109: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
110: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
111: int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
112: Scalar *av,*bv,*v = lmat->v;
113: Mat newmat;
116: ISGetIndices(isrow,&irow);
117: ISGetIndices(iscol,&icol);
118: ISGetLocalSize(isrow,&nrows);
119: ISGetLocalSize(iscol,&ncols);
121: /* No parallel redistribution currently supported! Should really check each index set
122: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
123: original matrix! */
125: MatGetLocalSize(A,&nlrows,&nlcols);
126: MatGetOwnershipRange(A,&rstart,&rend);
127:
128: /* Check submatrix call */
129: if (scall == MAT_REUSE_MATRIX) {
130: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
131: /* Really need to test rows and column sizes! */
132: newmat = *B;
133: } else {
134: /* Create and fill new matrix */
135: MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);
136: }
138: /* Now extract the data pointers and do the copy, column at a time */
139: newmatd = (Mat_MPIDense*)newmat->data;
140: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
141:
142: for (i=0; i<ncols; i++) {
143: av = v + nlrows*icol[i];
144: for (j=0; j<nrows; j++) {
145: *bv++ = av[irow[j] - rstart];
146: }
147: }
149: /* Assemble the matrices so that the correct flags are set */
150: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
151: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
153: /* Free work space */
154: ISRestoreIndices(isrow,&irow);
155: ISRestoreIndices(iscol,&icol);
156: *B = newmat;
157: return(0);
158: }
160: int MatRestoreArray_MPIDense(Mat A,Scalar **array)
161: {
163: return(0);
164: }
166: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
167: {
168: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
169: MPI_Comm comm = mat->comm;
170: int ierr,nstash,reallocs;
171: InsertMode addv;
174: /* make sure all processors are either in INSERTMODE or ADDMODE */
175: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
176: if (addv == (ADD_VALUES|INSERT_VALUES)) {
177: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
178: }
179: mat->insertmode = addv; /* in case this processor had no cache */
181: MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
182: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
183: PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
184: return(0);
185: }
187: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
188: {
189: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
190: int i,n,ierr,*row,*col,flg,j,rstart,ncols;
191: Scalar *val;
192: InsertMode addv=mat->insertmode;
195: /* wait on receives */
196: while (1) {
197: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
198: if (!flg) break;
199:
200: for (i=0; i<n;) {
201: /* Now identify the consecutive vals belonging to the same row */
202: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
203: if (j < n) ncols = j-i;
204: else ncols = n-i;
205: /* Now assemble all these values with a single function call */
206: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
207: i = j;
208: }
209: }
210: MatStashScatterEnd_Private(&mat->stash);
211:
212: MatAssemblyBegin(mdn->A,mode);
213: MatAssemblyEnd(mdn->A,mode);
215: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
216: MatSetUpMultiply_MPIDense(mat);
217: }
218: return(0);
219: }
221: int MatZeroEntries_MPIDense(Mat A)
222: {
223: int ierr;
224: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
227: MatZeroEntries(l->A);
228: return(0);
229: }
231: int MatGetBlockSize_MPIDense(Mat A,int *bs)
232: {
234: *bs = 1;
235: return(0);
236: }
238: /* the code does not do the diagonal entries correctly unless the
239: matrix is square and the column and row owerships are identical.
240: This is a BUG. The only way to fix it seems to be to access
241: mdn->A and mdn->B directly and not through the MatZeroRows()
242: routine.
243: */
244: int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag)
245: {
246: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
247: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
248: int *procs,*nprocs,j,idx,nsends,*work;
249: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
250: int *rvalues,tag = A->tag,count,base,slen,n,*source;
251: int *lens,imdex,*lrows,*values;
252: MPI_Comm comm = A->comm;
253: MPI_Request *send_waits,*recv_waits;
254: MPI_Status recv_status,*send_status;
255: IS istmp;
256: PetscTruth found;
259: ISGetLocalSize(is,&N);
260: ISGetIndices(is,&rows);
262: /* first count number of contributors to each processor */
263: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
264: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
265: procs = nprocs + size;
266: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
267: for (i=0; i<N; i++) {
268: idx = rows[i];
269: found = PETSC_FALSE;
270: for (j=0; j<size; j++) {
271: if (idx >= owners[j] && idx < owners[j+1]) {
272: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
273: }
274: }
275: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
276: }
277: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
279: /* inform other processors of number of messages and max length*/
280: ierr = PetscMalloc(2*size*sizeof(int),&work);
281: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
282: nmax = work[rank];
283: nrecvs = work[size+rank];
284: ierr = PetscFree(work);
286: /* post receives: */
287: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
288: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
289: for (i=0; i<nrecvs; i++) {
290: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
291: }
293: /* do sends:
294: 1) starts[i] gives the starting index in svalues for stuff going to
295: the ith processor
296: */
297: PetscMalloc((N+1)*sizeof(int),&svalues);
298: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
299: PetscMalloc((size+1)*sizeof(int),&starts);
300: starts[0] = 0;
301: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
302: for (i=0; i<N; i++) {
303: svalues[starts[owner[i]]++] = rows[i];
304: }
305: ISRestoreIndices(is,&rows);
307: starts[0] = 0;
308: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
309: count = 0;
310: for (i=0; i<size; i++) {
311: if (procs[i]) {
312: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
313: }
314: }
315: PetscFree(starts);
317: base = owners[rank];
319: /* wait on receives */
320: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
321: source = lens + nrecvs;
322: count = nrecvs; slen = 0;
323: while (count) {
324: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
325: /* unpack receives into our local space */
326: MPI_Get_count(&recv_status,MPI_INT,&n);
327: source[imdex] = recv_status.MPI_SOURCE;
328: lens[imdex] = n;
329: slen += n;
330: count--;
331: }
332: PetscFree(recv_waits);
333:
334: /* move the data into the send scatter */
335: PetscMalloc((slen+1)*sizeof(int),&lrows);
336: count = 0;
337: for (i=0; i<nrecvs; i++) {
338: values = rvalues + i*nmax;
339: for (j=0; j<lens[i]; j++) {
340: lrows[count++] = values[j] - base;
341: }
342: }
343: PetscFree(rvalues);
344: PetscFree(lens);
345: PetscFree(owner);
346: PetscFree(nprocs);
347:
348: /* actually zap the local rows */
349: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
350: PetscLogObjectParent(A,istmp);
351: PetscFree(lrows);
352: MatZeroRows(l->A,istmp,diag);
353: ISDestroy(istmp);
355: /* wait on sends */
356: if (nsends) {
357: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
358: MPI_Waitall(nsends,send_waits,send_status);
359: PetscFree(send_status);
360: }
361: PetscFree(send_waits);
362: PetscFree(svalues);
364: return(0);
365: }
367: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
368: {
369: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
370: int ierr;
373: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
374: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
375: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
376: return(0);
377: }
379: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
380: {
381: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
382: int ierr;
385: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
386: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
387: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
388: return(0);
389: }
391: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
392: {
393: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
394: int ierr;
395: Scalar zero = 0.0;
398: VecSet(&zero,yy);
399: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
400: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
401: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
402: return(0);
403: }
405: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
406: {
407: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
408: int ierr;
411: VecCopy(yy,zz);
412: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
413: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
414: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
415: return(0);
416: }
418: int MatGetDiagonal_MPIDense(Mat A,Vec v)
419: {
420: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
421: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
422: int ierr,len,i,n,m = A->m,radd;
423: Scalar *x,zero = 0.0;
424:
426: VecSet(&zero,v);
427: VecGetArray(v,&x);
428: VecGetSize(v,&n);
429: if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
430: len = PetscMin(a->A->m,a->A->n);
431: radd = a->rstart*m;
432: for (i=0; i<len; i++) {
433: x[i] = aloc->v[radd + i*m + i];
434: }
435: VecRestoreArray(v,&x);
436: return(0);
437: }
439: int MatDestroy_MPIDense(Mat mat)
440: {
441: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
442: int ierr;
446: #if defined(PETSC_USE_LOG)
447: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
448: #endif
449: MatStashDestroy_Private(&mat->stash);
450: PetscFree(mdn->rowners);
451: MatDestroy(mdn->A);
452: if (mdn->lvec) VecDestroy(mdn->lvec);
453: if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx);
454: if (mdn->factor) {
455: if (mdn->factor->temp) {PetscFree(mdn->factor->temp);}
456: if (mdn->factor->tag) {PetscFree(mdn->factor->tag);}
457: if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
458: PetscFree(mdn->factor);
459: }
460: PetscFree(mdn);
461: return(0);
462: }
464: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
465: {
466: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
467: int ierr;
470: if (mdn->size == 1) {
471: MatView(mdn->A,viewer);
472: }
473: else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
474: return(0);
475: }
477: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
478: {
479: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
480: int ierr,size = mdn->size,rank = mdn->rank;
481: PetscViewerType vtype;
482: PetscTruth isascii,isdraw;
483: PetscViewer sviewer;
484: PetscViewerFormat format;
487: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
488: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
489: if (isascii) {
490: PetscViewerGetType(viewer,&vtype);
491: PetscViewerGetFormat(viewer,&format);
492: if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
493: MatInfo info;
494: MatGetInfo(mat,MAT_LOCAL,&info);
495: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
496: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
497: PetscViewerFlush(viewer);
498: VecScatterView(mdn->Mvctx,viewer);
499: return(0);
500: } else if (format == PETSC_VIEWER_ASCII_INFO) {
501: return(0);
502: }
503: } else if (isdraw) {
504: PetscDraw draw;
505: PetscTruth isnull;
507: PetscViewerDrawGetDraw(viewer,0,&draw);
508: PetscDrawIsNull(draw,&isnull);
509: if (isnull) return(0);
510: }
512: if (size == 1) {
513: MatView(mdn->A,viewer);
514: } else {
515: /* assemble the entire matrix onto first processor. */
516: Mat A;
517: int M = mat->M,N = mat->N,m,row,i,nz,*cols;
518: Scalar *vals;
520: if (!rank) {
521: MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);
522: } else {
523: MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);
524: }
525: PetscLogObjectParent(mat,A);
527: /* Copy the matrix ... This isn't the most efficient means,
528: but it's quick for now */
529: row = mdn->rstart; m = mdn->A->m;
530: for (i=0; i<m; i++) {
531: MatGetRow(mat,row,&nz,&cols,&vals);
532: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
533: MatRestoreRow(mat,row,&nz,&cols,&vals);
534: row++;
535: }
537: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
538: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
539: PetscViewerGetSingleton(viewer,&sviewer);
540: if (!rank) {
541: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
542: }
543: PetscViewerRestoreSingleton(viewer,&sviewer);
544: PetscViewerFlush(viewer);
545: MatDestroy(A);
546: }
547: return(0);
548: }
550: int MatView_MPIDense(Mat mat,PetscViewer viewer)
551: {
552: int ierr;
553: PetscTruth isascii,isbinary,isdraw,issocket;
554:
556:
557: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
558: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
559: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
560: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
562: if (isascii || issocket || isdraw) {
563: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
564: } else if (isbinary) {
565: MatView_MPIDense_Binary(mat,viewer);
566: } else {
567: SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
568: }
569: return(0);
570: }
572: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
573: {
574: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
575: Mat mdn = mat->A;
576: int ierr;
577: PetscReal isend[5],irecv[5];
580: info->rows_global = (double)A->M;
581: info->columns_global = (double)A->N;
582: info->rows_local = (double)A->m;
583: info->columns_local = (double)A->N;
584: info->block_size = 1.0;
585: MatGetInfo(mdn,MAT_LOCAL,info);
586: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
587: isend[3] = info->memory; isend[4] = info->mallocs;
588: if (flag == MAT_LOCAL) {
589: info->nz_used = isend[0];
590: info->nz_allocated = isend[1];
591: info->nz_unneeded = isend[2];
592: info->memory = isend[3];
593: info->mallocs = isend[4];
594: } else if (flag == MAT_GLOBAL_MAX) {
595: MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);
596: info->nz_used = irecv[0];
597: info->nz_allocated = irecv[1];
598: info->nz_unneeded = irecv[2];
599: info->memory = irecv[3];
600: info->mallocs = irecv[4];
601: } else if (flag == MAT_GLOBAL_SUM) {
602: MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);
603: info->nz_used = irecv[0];
604: info->nz_allocated = irecv[1];
605: info->nz_unneeded = irecv[2];
606: info->memory = irecv[3];
607: info->mallocs = irecv[4];
608: }
609: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
610: info->fill_ratio_needed = 0;
611: info->factor_mallocs = 0;
612: return(0);
613: }
615: int MatSetOption_MPIDense(Mat A,MatOption op)
616: {
617: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
618: int ierr;
621: if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
622: op == MAT_YES_NEW_NONZERO_LOCATIONS ||
623: op == MAT_NEW_NONZERO_LOCATION_ERR ||
624: op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
625: op == MAT_COLUMNS_SORTED ||
626: op == MAT_COLUMNS_UNSORTED) {
627: MatSetOption(a->A,op);
628: } else if (op == MAT_ROW_ORIENTED) {
629: a->roworiented = PETSC_TRUE;
630: MatSetOption(a->A,op);
631: } else if (op == MAT_ROWS_SORTED ||
632: op == MAT_ROWS_UNSORTED ||
633: op == MAT_YES_NEW_DIAGONALS ||
634: op == MAT_USE_HASH_TABLE) {
635: PetscLogInfo(A,"MatSetOption_MPIDense:Option ignoredn");
636: } else if (op == MAT_COLUMN_ORIENTED) {
637: a->roworiented = PETSC_FALSE;
638: MatSetOption(a->A,op);
639: } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
640: a->donotstash = PETSC_TRUE;
641: } else if (op == MAT_NO_NEW_DIAGONALS) {
642: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
643: } else {
644: SETERRQ(PETSC_ERR_SUP,"unknown option");
645: }
646: return(0);
647: }
649: int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n)
650: {
651: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
654: if (m) *m = mat->rstart;
655: if (n) *n = mat->rend;
656: return(0);
657: }
659: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v)
660: {
661: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
662: int lrow,rstart = mat->rstart,rend = mat->rend,ierr;
665: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
666: lrow = row - rstart;
667: MatGetRow(mat->A,lrow,nz,idx,v);
668: return(0);
669: }
671: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v)
672: {
676: if (idx) {PetscFree(*idx);}
677: if (v) {PetscFree(*v);}
678: return(0);
679: }
681: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
682: {
683: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
684: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
685: Scalar *l,*r,x,*v;
686: int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
689: MatGetLocalSize(A,&s2,&s3);
690: if (ll) {
691: VecGetLocalSize(ll,&s2a);
692: if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
693: VecGetArray(ll,&l);
694: for (i=0; i<m; i++) {
695: x = l[i];
696: v = mat->v + i;
697: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
698: }
699: VecRestoreArray(ll,&l);
700: PetscLogFlops(n*m);
701: }
702: if (rr) {
703: VecGetSize(rr,&s3a);
704: if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
705: VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
706: VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
707: VecGetArray(mdn->lvec,&r);
708: for (i=0; i<n; i++) {
709: x = r[i];
710: v = mat->v + i*m;
711: for (j=0; j<m; j++) { (*v++) *= x;}
712: }
713: VecRestoreArray(mdn->lvec,&r);
714: PetscLogFlops(n*m);
715: }
716: return(0);
717: }
719: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *norm)
720: {
721: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
722: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
723: int ierr,i,j;
724: PetscReal sum = 0.0;
725: Scalar *v = mat->v;
728: if (mdn->size == 1) {
729: MatNorm(mdn->A,type,norm);
730: } else {
731: if (type == NORM_FROBENIUS) {
732: for (i=0; i<mdn->A->n*mdn->A->m; i++) {
733: #if defined(PETSC_USE_COMPLEX)
734: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
735: #else
736: sum += (*v)*(*v); v++;
737: #endif
738: }
739: MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);
740: *norm = sqrt(*norm);
741: PetscLogFlops(2*mdn->A->n*mdn->A->m);
742: } else if (type == NORM_1) {
743: PetscReal *tmp,*tmp2;
744: PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
745: tmp2 = tmp + A->N;
746: PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
747: *norm = 0.0;
748: v = mat->v;
749: for (j=0; j<mdn->A->n; j++) {
750: for (i=0; i<mdn->A->m; i++) {
751: tmp[j] += PetscAbsScalar(*v); v++;
752: }
753: }
754: MPI_Allreduce(tmp,tmp2,A->N,MPI_DOUBLE,MPI_SUM,A->comm);
755: for (j=0; j<A->N; j++) {
756: if (tmp2[j] > *norm) *norm = tmp2[j];
757: }
758: PetscFree(tmp);
759: PetscLogFlops(A->n*A->m);
760: } else if (type == NORM_INFINITY) { /* max row norm */
761: PetscReal ntemp;
762: MatNorm(mdn->A,type,&ntemp);
763: MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);
764: } else {
765: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
766: }
767: }
768: return(0);
769: }
771: int MatTranspose_MPIDense(Mat A,Mat *matout)
772: {
773: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
774: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
775: Mat B;
776: int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
777: int j,i,ierr;
778: Scalar *v;
781: if (!matout && M != N) {
782: SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
783: }
784: MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
786: m = a->A->m; n = a->A->n; v = Aloc->v;
787: PetscMalloc(n*sizeof(int),&rwork);
788: for (j=0; j<n; j++) {
789: for (i=0; i<m; i++) rwork[i] = rstart + i;
790: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
791: v += m;
792: }
793: PetscFree(rwork);
794: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
795: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
796: if (matout) {
797: *matout = B;
798: } else {
799: MatHeaderCopy(A,B);
800: }
801: return(0);
802: }
804: #include petscblaslapack.h
805: int MatScale_MPIDense(Scalar *alpha,Mat inA)
806: {
807: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
808: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
809: int one = 1,nz;
812: nz = inA->m*inA->N;
813: BLscal_(&nz,alpha,a->v,&one);
814: PetscLogFlops(nz);
815: return(0);
816: }
818: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
819: EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
821: int MatSetUpPreallocation_MPIDense(Mat A)
822: {
823: int ierr;
826: MatMPIDenseSetPreallocation(A,0);
827: return(0);
828: }
830: /* -------------------------------------------------------------------*/
831: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
832: MatGetRow_MPIDense,
833: MatRestoreRow_MPIDense,
834: MatMult_MPIDense,
835: MatMultAdd_MPIDense,
836: MatMultTranspose_MPIDense,
837: MatMultTransposeAdd_MPIDense,
838: 0,
839: 0,
840: 0,
841: 0,
842: 0,
843: 0,
844: 0,
845: MatTranspose_MPIDense,
846: MatGetInfo_MPIDense,0,
847: MatGetDiagonal_MPIDense,
848: MatDiagonalScale_MPIDense,
849: MatNorm_MPIDense,
850: MatAssemblyBegin_MPIDense,
851: MatAssemblyEnd_MPIDense,
852: 0,
853: MatSetOption_MPIDense,
854: MatZeroEntries_MPIDense,
855: MatZeroRows_MPIDense,
856: 0,
857: 0,
858: 0,
859: 0,
860: MatSetUpPreallocation_MPIDense,
861: 0,
862: MatGetOwnershipRange_MPIDense,
863: 0,
864: 0,
865: MatGetArray_MPIDense,
866: MatRestoreArray_MPIDense,
867: MatDuplicate_MPIDense,
868: 0,
869: 0,
870: 0,
871: 0,
872: 0,
873: MatGetSubMatrices_MPIDense,
874: 0,
875: MatGetValues_MPIDense,
876: 0,
877: 0,
878: MatScale_MPIDense,
879: 0,
880: 0,
881: 0,
882: MatGetBlockSize_MPIDense,
883: 0,
884: 0,
885: 0,
886: 0,
887: 0,
888: 0,
889: 0,
890: 0,
891: 0,
892: MatGetSubMatrix_MPIDense,
893: MatDestroy_MPIDense,
894: MatView_MPIDense,
895: MatGetMaps_Petsc};
897: EXTERN_C_BEGIN
898: int MatCreate_MPIDense(Mat mat)
899: {
900: Mat_MPIDense *a;
901: int ierr,i;
904: ierr = PetscNew(Mat_MPIDense,&a);
905: mat->data = (void*)a;
906: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
907: mat->factor = 0;
908: mat->mapping = 0;
910: a->factor = 0;
911: mat->insertmode = NOT_SET_VALUES;
912: MPI_Comm_rank(mat->comm,&a->rank);
913: MPI_Comm_size(mat->comm,&a->size);
915: PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
916: PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
917: a->nvec = mat->n;
919: /* the information in the maps duplicates the information computed below, eventually
920: we should remove the duplicate information that is not contained in the maps */
921: MapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
922: MapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);
924: /* build local table of row and column ownerships */
925: ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
926: a->cowners = a->rowners + a->size + 1;
927: PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
928: MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
929: a->rowners[0] = 0;
930: for (i=2; i<=a->size; i++) {
931: a->rowners[i] += a->rowners[i-1];
932: }
933: a->rstart = a->rowners[a->rank];
934: a->rend = a->rowners[a->rank+1];
935: ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
936: a->cowners[0] = 0;
937: for (i=2; i<=a->size; i++) {
938: a->cowners[i] += a->cowners[i-1];
939: }
941: /* build cache for off array entries formed */
942: a->donotstash = PETSC_FALSE;
943: MatStashCreate_Private(mat->comm,1,&mat->stash);
945: /* stuff used for matrix vector multiply */
946: a->lvec = 0;
947: a->Mvctx = 0;
948: a->roworiented = PETSC_TRUE;
950: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
951: "MatGetDiagonalBlock_MPIDense",
952: MatGetDiagonalBlock_MPIDense);
953: return(0);
954: }
955: EXTERN_C_END
957: /*@C
958: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
960: Not collective
962: Input Parameters:
963: . A - the matrix
964: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
965: to control all matrix memory allocation.
967: Notes:
968: The dense format is fully compatible with standard Fortran 77
969: storage by columns.
971: The data input variable is intended primarily for Fortran programmers
972: who wish to allocate their own matrix memory space. Most users should
973: set data=PETSC_NULL.
975: Level: intermediate
977: .keywords: matrix,dense, parallel
979: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
980: @*/
981: int MatMPIDenseSetPreallocation(Mat mat,Scalar *data)
982: {
983: Mat_MPIDense *a;
984: int ierr;
985: PetscTruth flg2;
988: PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);
989: if (!flg2) return(0);
990: mat->preallocated = PETSC_TRUE;
991: /* Note: For now, when data is specified above, this assumes the user correctly
992: allocates the local dense storage space. We should add error checking. */
994: a = (Mat_MPIDense*)mat->data;
995: MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);
996: PetscLogObjectParent(mat,a->A);
997: return(0);
998: }
1000: /*@C
1001: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1003: Collective on MPI_Comm
1005: Input Parameters:
1006: + comm - MPI communicator
1007: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1008: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1009: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1010: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1011: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1012: to control all matrix memory allocation.
1014: Output Parameter:
1015: . A - the matrix
1017: Notes:
1018: The dense format is fully compatible with standard Fortran 77
1019: storage by columns.
1021: The data input variable is intended primarily for Fortran programmers
1022: who wish to allocate their own matrix memory space. Most users should
1023: set data=PETSC_NULL.
1025: The user MUST specify either the local or global matrix dimensions
1026: (possibly both).
1028: Level: intermediate
1030: .keywords: matrix,dense, parallel
1032: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1033: @*/
1034: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A)
1035: {
1036: int ierr,size;
1039: MatCreate(comm,m,n,M,N,A);
1040: MPI_Comm_size(comm,&size);
1041: if (size > 1) {
1042: MatSetType(*A,MATMPIDENSE);
1043: MatMPIDenseSetPreallocation(*A,data);
1044: } else {
1045: MatSetType(*A,MATSEQDENSE);
1046: MatSeqDenseSetPreallocation(*A,data);
1047: }
1048: return(0);
1049: }
1051: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1052: {
1053: Mat mat;
1054: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1055: int ierr;
1058: *newmat = 0;
1059: MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1060: MatSetType(mat,MATMPIDENSE);
1061: ierr = PetscNew(Mat_MPIDense,&a);
1062: mat->data = (void*)a;
1063: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1064: mat->factor = A->factor;
1065: mat->assembled = PETSC_TRUE;
1066: mat->preallocated = PETSC_TRUE;
1068: a->rstart = oldmat->rstart;
1069: a->rend = oldmat->rend;
1070: a->size = oldmat->size;
1071: a->rank = oldmat->rank;
1072: mat->insertmode = NOT_SET_VALUES;
1073: a->nvec = oldmat->nvec;
1074: a->donotstash = oldmat->donotstash;
1075: ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1076: PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1077: PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1078: MatStashCreate_Private(A->comm,1,&mat->stash);
1080: MatSetUpMultiply_MPIDense(mat);
1081: MatDuplicate(oldmat->A,cpvalues,&a->A);
1082: PetscLogObjectParent(mat,a->A);
1083: *newmat = mat;
1084: return(0);
1085: }
1087: #include petscsys.h
1089: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1090: {
1091: int *rowners,i,size,rank,m,ierr,nz,j;
1092: Scalar *array,*vals,*vals_ptr;
1093: MPI_Status status;
1096: MPI_Comm_rank(comm,&rank);
1097: MPI_Comm_size(comm,&size);
1099: /* determine ownership of all rows */
1100: m = M/size + ((M % size) > rank);
1101: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1102: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1103: rowners[0] = 0;
1104: for (i=2; i<=size; i++) {
1105: rowners[i] += rowners[i-1];
1106: }
1108: MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1109: MatGetArray(*newmat,&array);
1111: if (!rank) {
1112: PetscMalloc(m*N*sizeof(Scalar),&vals);
1114: /* read in my part of the matrix numerical values */
1115: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1116:
1117: /* insert into matrix-by row (this is why cannot directly read into array */
1118: vals_ptr = vals;
1119: for (i=0; i<m; i++) {
1120: for (j=0; j<N; j++) {
1121: array[i + j*m] = *vals_ptr++;
1122: }
1123: }
1125: /* read in other processors and ship out */
1126: for (i=1; i<size; i++) {
1127: nz = (rowners[i+1] - rowners[i])*N;
1128: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1129: MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1130: }
1131: } else {
1132: /* receive numeric values */
1133: PetscMalloc(m*N*sizeof(Scalar),&vals);
1135: /* receive message of values*/
1136: MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1138: /* insert into matrix-by row (this is why cannot directly read into array */
1139: vals_ptr = vals;
1140: for (i=0; i<m; i++) {
1141: for (j=0; j<N; j++) {
1142: array[i + j*m] = *vals_ptr++;
1143: }
1144: }
1145: }
1146: PetscFree(rowners);
1147: PetscFree(vals);
1148: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1149: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1150: return(0);
1151: }
1153: EXTERN_C_BEGIN
1154: int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1155: {
1156: Mat A;
1157: Scalar *vals,*svals;
1158: MPI_Comm comm = ((PetscObject)viewer)->comm;
1159: MPI_Status status;
1160: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1161: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1162: int tag = ((PetscObject)viewer)->tag;
1163: int i,nz,ierr,j,rstart,rend,fd;
1166: MPI_Comm_size(comm,&size);
1167: MPI_Comm_rank(comm,&rank);
1168: if (!rank) {
1169: PetscViewerBinaryGetDescriptor(viewer,&fd);
1170: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1171: if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1172: }
1174: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1175: M = header[1]; N = header[2]; nz = header[3];
1177: /*
1178: Handle case where matrix is stored on disk as a dense matrix
1179: */
1180: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1181: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1182: return(0);
1183: }
1185: /* determine ownership of all rows */
1186: m = M/size + ((M % size) > rank);
1187: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1188: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1189: rowners[0] = 0;
1190: for (i=2; i<=size; i++) {
1191: rowners[i] += rowners[i-1];
1192: }
1193: rstart = rowners[rank];
1194: rend = rowners[rank+1];
1196: /* distribute row lengths to all processors */
1197: ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);
1198: offlens = ourlens + (rend-rstart);
1199: if (!rank) {
1200: PetscMalloc(M*sizeof(int),&rowlengths);
1201: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1202: PetscMalloc(size*sizeof(int),&sndcounts);
1203: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1204: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1205: PetscFree(sndcounts);
1206: } else {
1207: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1208: }
1210: if (!rank) {
1211: /* calculate the number of nonzeros on each processor */
1212: PetscMalloc(size*sizeof(int),&procsnz);
1213: PetscMemzero(procsnz,size*sizeof(int));
1214: for (i=0; i<size; i++) {
1215: for (j=rowners[i]; j< rowners[i+1]; j++) {
1216: procsnz[i] += rowlengths[j];
1217: }
1218: }
1219: PetscFree(rowlengths);
1221: /* determine max buffer needed and allocate it */
1222: maxnz = 0;
1223: for (i=0; i<size; i++) {
1224: maxnz = PetscMax(maxnz,procsnz[i]);
1225: }
1226: PetscMalloc(maxnz*sizeof(int),&cols);
1228: /* read in my part of the matrix column indices */
1229: nz = procsnz[0];
1230: PetscMalloc(nz*sizeof(int),&mycols);
1231: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1233: /* read in every one elses and ship off */
1234: for (i=1; i<size; i++) {
1235: nz = procsnz[i];
1236: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1237: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1238: }
1239: PetscFree(cols);
1240: } else {
1241: /* determine buffer space needed for message */
1242: nz = 0;
1243: for (i=0; i<m; i++) {
1244: nz += ourlens[i];
1245: }
1246: PetscMalloc(nz*sizeof(int),&mycols);
1248: /* receive message of column indices*/
1249: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1250: MPI_Get_count(&status,MPI_INT,&maxnz);
1251: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1252: }
1254: /* loop over local rows, determining number of off diagonal entries */
1255: PetscMemzero(offlens,m*sizeof(int));
1256: jj = 0;
1257: for (i=0; i<m; i++) {
1258: for (j=0; j<ourlens[i]; j++) {
1259: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1260: jj++;
1261: }
1262: }
1264: /* create our matrix */
1265: for (i=0; i<m; i++) {
1266: ourlens[i] -= offlens[i];
1267: }
1268: MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1269: A = *newmat;
1270: for (i=0; i<m; i++) {
1271: ourlens[i] += offlens[i];
1272: }
1274: if (!rank) {
1275: PetscMalloc(maxnz*sizeof(Scalar),&vals);
1277: /* read in my part of the matrix numerical values */
1278: nz = procsnz[0];
1279: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1280:
1281: /* insert into matrix */
1282: jj = rstart;
1283: smycols = mycols;
1284: svals = vals;
1285: for (i=0; i<m; i++) {
1286: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1287: smycols += ourlens[i];
1288: svals += ourlens[i];
1289: jj++;
1290: }
1292: /* read in other processors and ship out */
1293: for (i=1; i<size; i++) {
1294: nz = procsnz[i];
1295: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1296: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1297: }
1298: PetscFree(procsnz);
1299: } else {
1300: /* receive numeric values */
1301: PetscMalloc(nz*sizeof(Scalar),&vals);
1303: /* receive message of values*/
1304: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1305: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1306: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1308: /* insert into matrix */
1309: jj = rstart;
1310: smycols = mycols;
1311: svals = vals;
1312: for (i=0; i<m; i++) {
1313: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1314: smycols += ourlens[i];
1315: svals += ourlens[i];
1316: jj++;
1317: }
1318: }
1319: PetscFree(ourlens);
1320: PetscFree(vals);
1321: PetscFree(mycols);
1322: PetscFree(rowners);
1324: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1325: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1326: return(0);
1327: }
1328: EXTERN_C_END