Actual source code: mpibdiag.c
1: /*$Id: mpibdiag.c,v 1.205 2001/08/10 03:31:02 bsmith Exp $*/
2: /*
3: The basic matrix operations for the Block diagonal parallel
4: matrices.
5: */
6: #include src/mat/impls/bdiag/mpi/mpibdiag.h
7: #include src/vec/vecimpl.h
11: int MatSetValues_MPIBDiag(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
12: {
13: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
14: int ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
15: PetscTruth roworiented = mbd->roworiented;
18: for (i=0; i<m; i++) {
19: if (idxm[i] < 0) continue;
20: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
21: if (idxm[i] >= rstart && idxm[i] < rend) {
22: row = idxm[i] - rstart;
23: for (j=0; j<n; j++) {
24: if (idxn[j] < 0) continue;
25: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
26: if (roworiented) {
27: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
28: } else {
29: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
30: }
31: }
32: } else {
33: if (!mbd->donotstash) {
34: if (roworiented) {
35: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
36: } else {
37: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
38: }
39: }
40: }
41: }
42: return(0);
43: }
47: int MatGetValues_MPIBDiag(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
48: {
49: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
50: int ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
53: for (i=0; i<m; i++) {
54: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
55: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
56: if (idxm[i] >= rstart && idxm[i] < rend) {
57: row = idxm[i] - rstart;
58: for (j=0; j<n; j++) {
59: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
60: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
61: MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
62: }
63: } else {
64: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
65: }
66: }
67: return(0);
68: }
72: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
73: {
74: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
75: MPI_Comm comm = mat->comm;
76: int ierr,nstash,reallocs;
77: InsertMode addv;
80: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
81: if (addv == (ADD_VALUES|INSERT_VALUES)) {
82: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
83: }
84: mat->insertmode = addv; /* in case this processor had no cache */
85: MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
86: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
87: PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
88: return(0);
89: }
93: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
94: {
95: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
96: Mat_SeqBDiag *mlocal;
97: int i,n,*row,*col;
98: int *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
99: PetscScalar *val;
100: InsertMode addv = mat->insertmode;
104: while (1) {
105: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
106: if (!flg) break;
107:
108: for (i=0; i<n;) {
109: /* Now identify the consecutive vals belonging to the same row */
110: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
111: if (j < n) ncols = j-i;
112: else ncols = n-i;
113: /* Now assemble all these values with a single function call */
114: MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
115: i = j;
116: }
117: }
118: MatStashScatterEnd_Private(&mat->stash);
120: MatAssemblyBegin(mbd->A,mode);
121: MatAssemblyEnd(mbd->A,mode);
123: /* Fix main diagonal location and determine global diagonals */
124: mlocal = (Mat_SeqBDiag*)mbd->A->data;
125: Mblock = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
126: len = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
127: PetscMalloc(2*len*sizeof(int),&tmp1);
128: tmp2 = tmp1 + len;
129: PetscMemzero(tmp1,2*len*sizeof(int));
130: mlocal->mainbd = -1;
131: for (i=0; i<mlocal->nd; i++) {
132: if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
133: tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
134: }
135: MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
136: ict = 0;
137: for (i=0; i<len; i++) {
138: if (tmp2[i]) {
139: mbd->gdiag[ict] = i - Mblock;
140: ict++;
141: }
142: }
143: mbd->gnd = ict;
144: PetscFree(tmp1);
146: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
147: MatSetUpMultiply_MPIBDiag(mat);
148: }
149: return(0);
150: }
154: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
155: {
156: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
157: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;
160: *bs = dmat->bs;
161: return(0);
162: }
166: int MatZeroEntries_MPIBDiag(Mat A)
167: {
168: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
169: int ierr;
172: MatZeroEntries(l->A);
173: return(0);
174: }
176: /* again this uses the same basic stratagy as in the assembly and
177: scatter create routines, we should try to do it systematically
178: if we can figure out the proper level of generality. */
180: /* the code does not do the diagonal entries correctly unless the
181: matrix is square and the column and row owerships are identical.
182: This is a BUG. The only way to fix it seems to be to access
183: aij->A and aij->B directly and not through the MatZeroRows()
184: routine.
185: */
189: int MatZeroRows_MPIBDiag(Mat A,IS is,const PetscScalar *diag)
190: {
191: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
192: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
193: int *nprocs,j,idx,nsends;
194: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
195: int *rvalues,tag = A->tag,count,base,slen,n,*source;
196: int *lens,imdex,*lrows,*values;
197: MPI_Comm comm = A->comm;
198: MPI_Request *send_waits,*recv_waits;
199: MPI_Status recv_status,*send_status;
200: IS istmp;
201: PetscTruth found;
204: ISGetLocalSize(is,&N);
205: ISGetIndices(is,&rows);
207: /* first count number of contributors to each processor */
208: PetscMalloc(2*size*sizeof(int),&nprocs);
209: PetscMemzero(nprocs,2*size*sizeof(int));
210: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
211: for (i=0; i<N; i++) {
212: idx = rows[i];
213: found = PETSC_FALSE;
214: for (j=0; j<size; j++) {
215: if (idx >= owners[j] && idx < owners[j+1]) {
216: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
217: }
218: }
219: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
220: }
221: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
223: /* inform other processors of number of messages and max length*/
224: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
226: /* post receives: */
227: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
228: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
229: for (i=0; i<nrecvs; i++) {
230: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
231: }
233: /* do sends:
234: 1) starts[i] gives the starting index in svalues for stuff going to
235: the ith processor
236: */
237: PetscMalloc((N+1)*sizeof(int),&svalues);
238: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
239: PetscMalloc((size+1)*sizeof(int),&starts);
240: starts[0] = 0;
241: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
242: for (i=0; i<N; i++) {
243: svalues[starts[owner[i]]++] = rows[i];
244: }
245: ISRestoreIndices(is,&rows);
247: starts[0] = 0;
248: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
249: count = 0;
250: for (i=0; i<size; i++) {
251: if (nprocs[2*i+1]) {
252: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
253: }
254: }
255: PetscFree(starts);
257: base = owners[rank];
259: /* wait on receives */
260: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
261: source = lens + nrecvs;
262: count = nrecvs;
263: slen = 0;
264: while (count) {
265: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
266: /* unpack receives into our local space */
267: MPI_Get_count(&recv_status,MPI_INT,&n);
268: source[imdex] = recv_status.MPI_SOURCE;
269: lens[imdex] = n;
270: slen += n;
271: count--;
272: }
273: PetscFree(recv_waits);
274:
275: /* move the data into the send scatter */
276: PetscMalloc((slen+1)*sizeof(int),&lrows);
277: count = 0;
278: for (i=0; i<nrecvs; i++) {
279: values = rvalues + i*nmax;
280: for (j=0; j<lens[i]; j++) {
281: lrows[count++] = values[j] - base;
282: }
283: }
284: PetscFree(rvalues);
285: PetscFree(lens);
286: PetscFree(owner);
287: PetscFree(nprocs);
288:
289: /* actually zap the local rows */
290: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
291: PetscLogObjectParent(A,istmp);
292: PetscFree(lrows);
293: MatZeroRows(l->A,istmp,diag);
294: ISDestroy(istmp);
296: /* wait on sends */
297: if (nsends) {
298: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
299: MPI_Waitall(nsends,send_waits,send_status);
300: PetscFree(send_status);
301: }
302: PetscFree(send_waits);
303: PetscFree(svalues);
305: return(0);
306: }
310: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
311: {
312: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
313: int ierr;
316: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
317: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
318: (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
319: return(0);
320: }
324: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
325: {
326: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
327: int ierr;
330: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
331: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
332: (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
333: return(0);
334: }
338: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
339: {
340: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
341: int ierr;
342: PetscScalar zero = 0.0;
345: VecSet(&zero,yy);
346: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
347: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
348: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
349: return(0);
350: }
354: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
355: {
356: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
357: int ierr;
360: VecCopy(yy,zz);
361: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
362: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
363: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
364: return(0);
365: }
369: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
370: {
371: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
372: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
373: int ierr;
374: PetscReal isend[5],irecv[5];
377: info->block_size = (PetscReal)dmat->bs;
378: MatGetInfo(mat->A,MAT_LOCAL,info);
379: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
380: isend[3] = info->memory; isend[4] = info->mallocs;
381: if (flag == MAT_LOCAL) {
382: info->nz_used = isend[0];
383: info->nz_allocated = isend[1];
384: info->nz_unneeded = isend[2];
385: info->memory = isend[3];
386: info->mallocs = isend[4];
387: } else if (flag == MAT_GLOBAL_MAX) {
388: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
389: info->nz_used = irecv[0];
390: info->nz_allocated = irecv[1];
391: info->nz_unneeded = irecv[2];
392: info->memory = irecv[3];
393: info->mallocs = irecv[4];
394: } else if (flag == MAT_GLOBAL_SUM) {
395: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
396: info->nz_used = irecv[0];
397: info->nz_allocated = irecv[1];
398: info->nz_unneeded = irecv[2];
399: info->memory = irecv[3];
400: info->mallocs = irecv[4];
401: }
402: info->rows_global = (double)matin->M;
403: info->columns_global = (double)matin->N;
404: info->rows_local = (double)matin->m;
405: info->columns_local = (double)matin->N;
406: return(0);
407: }
411: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
412: {
413: int ierr;
414: Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;
417: MatGetDiagonal(A->A,v);
418: return(0);
419: }
423: int MatDestroy_MPIBDiag(Mat mat)
424: {
425: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
426: int ierr;
427: #if defined(PETSC_USE_LOG)
428: Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;
431: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
432: #else
434: #endif
435: MatStashDestroy_Private(&mat->stash);
436: PetscFree(mbd->rowners);
437: PetscFree(mbd->gdiag);
438: MatDestroy(mbd->A);
439: if (mbd->lvec) {VecDestroy(mbd->lvec);}
440: if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
441: PetscFree(mbd);
442: return(0);
443: }
448: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
449: {
450: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
451: int ierr;
454: if (mbd->size == 1) {
455: MatView(mbd->A,viewer);
456: } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
457: return(0);
458: }
462: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
463: {
464: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
465: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;
466: int ierr,i,size = mbd->size,rank = mbd->rank;
467: PetscTruth isascii,isdraw;
468: PetscViewer sviewer;
469: PetscViewerFormat format;
472: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
473: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
474: if (isascii) {
475: PetscViewerGetFormat(viewer,&format);
476: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
477: int nline = PetscMin(10,mbd->gnd),k,nk,np;
478: PetscViewerASCIIPrintf(viewer," block size=%d, total number of diagonals=%d\n",dmat->bs,mbd->gnd);
479: nk = (mbd->gnd-1)/nline + 1;
480: for (k=0; k<nk; k++) {
481: PetscViewerASCIIPrintf(viewer," global diag numbers:");
482: np = PetscMin(nline,mbd->gnd - nline*k);
483: for (i=0; i<np; i++) {
484: PetscViewerASCIIPrintf(viewer," %d",mbd->gdiag[i+nline*k]);
485: }
486: PetscViewerASCIIPrintf(viewer,"\n");
487: }
488: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
489: MatInfo info;
490: MPI_Comm_rank(mat->comm,&rank);
491: MatGetInfo(mat,MAT_LOCAL,&info);
492: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
493: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
494: PetscViewerFlush(viewer);
495: VecScatterView(mbd->Mvctx,viewer);
496: }
497: return(0);
498: }
499: }
501: if (isdraw) {
502: PetscDraw draw;
503: PetscTruth isnull;
504: PetscViewerDrawGetDraw(viewer,0,&draw);
505: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
506: }
508: if (size == 1) {
509: MatView(mbd->A,viewer);
510: } else {
511: /* assemble the entire matrix onto first processor. */
512: Mat A;
513: int M = mat->M,N = mat->N,m,row,nz,*cols;
514: PetscScalar *vals;
515: Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;
517: if (!rank) {
518: MatCreateMPIBDiag(mat->comm,M,M,N,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL,&A);
519: } else {
520: MatCreateMPIBDiag(mat->comm,0,M,N,0,Ambd->bs,PETSC_NULL,PETSC_NULL,&A);
521: }
522: PetscLogObjectParent(mat,A);
524: /* Copy the matrix ... This isn't the most efficient means,
525: but it's quick for now */
526: row = mbd->rstart;
527: m = mbd->A->m;
528: for (i=0; i<m; i++) {
529: MatGetRow(mat,row,&nz,&cols,&vals);
530: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
531: MatRestoreRow(mat,row,&nz,&cols,&vals);
532: row++;
533: }
534: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
535: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
536: PetscViewerGetSingleton(viewer,&sviewer);
537: if (!rank) {
538: MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
539: }
540: PetscViewerRestoreSingleton(viewer,&sviewer);
541: PetscViewerFlush(viewer);
542: MatDestroy(A);
543: }
544: return(0);
545: }
549: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
550: {
551: int ierr;
552: PetscTruth isascii,isdraw,isbinary;
555: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
556: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
557: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
558: if (isascii || isdraw) {
559: MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
560: } else if (isbinary) {
561: MatView_MPIBDiag_Binary(mat,viewer);
562: } else {
563: SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
564: }
565: return(0);
566: }
570: int MatSetOption_MPIBDiag(Mat A,MatOption op)
571: {
572: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
573: int ierr;
575: switch (op) {
576: case MAT_NO_NEW_NONZERO_LOCATIONS:
577: case MAT_YES_NEW_NONZERO_LOCATIONS:
578: case MAT_NEW_NONZERO_LOCATION_ERR:
579: case MAT_NEW_NONZERO_ALLOCATION_ERR:
580: case MAT_NO_NEW_DIAGONALS:
581: case MAT_YES_NEW_DIAGONALS:
582: MatSetOption(mbd->A,op);
583: break;
584: case MAT_ROW_ORIENTED:
585: mbd->roworiented = PETSC_TRUE;
586: MatSetOption(mbd->A,op);
587: break;
588: case MAT_COLUMN_ORIENTED:
589: mbd->roworiented = PETSC_FALSE;
590: MatSetOption(mbd->A,op);
591: break;
592: case MAT_IGNORE_OFF_PROC_ENTRIES:
593: mbd->donotstash = PETSC_TRUE;
594: break;
595: case MAT_ROWS_SORTED:
596: case MAT_ROWS_UNSORTED:
597: case MAT_COLUMNS_SORTED:
598: case MAT_COLUMNS_UNSORTED:
599: PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignored\n");
600: break;
601: default:
602: SETERRQ(PETSC_ERR_SUP,"unknown option");
603: }
604: return(0);
605: }
609: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
610: {
611: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
612: int lrow,ierr;
615: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
616: lrow = row - mat->rstart;
617: MatGetRow(mat->A,lrow,nz,idx,v);
618: return(0);
619: }
623: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
624: PetscScalar **v)
625: {
626: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
627: int lrow,ierr;
630: lrow = row - mat->rstart;
631: MatRestoreRow(mat->A,lrow,nz,idx,v);
632: return(0);
633: }
638: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
639: {
640: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
641: Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
642: PetscReal sum = 0.0;
643: int ierr,d,i,nd = a->nd,bs = a->bs,len;
644: PetscScalar *dv;
647: if (type == NORM_FROBENIUS) {
648: for (d=0; d<nd; d++) {
649: dv = a->diagv[d];
650: len = a->bdlen[d]*bs*bs;
651: for (i=0; i<len; i++) {
652: #if defined(PETSC_USE_COMPLEX)
653: sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
654: #else
655: sum += dv[i]*dv[i];
656: #endif
657: }
658: }
659: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
660: *nrm = sqrt(*nrm);
661: PetscLogFlops(2*A->n*A->m);
662: } else if (type == NORM_1) { /* max column norm */
663: PetscReal *tmp,*tmp2;
664: int j;
665: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
666: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
667: MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
668: *nrm = 0.0;
669: MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
670: for (j=0; j<mbd->A->n; j++) {
671: if (tmp2[j] > *nrm) *nrm = tmp2[j];
672: }
673: PetscFree(tmp);
674: PetscFree(tmp2);
675: } else if (type == NORM_INFINITY) { /* max row norm */
676: PetscReal normtemp;
677: MatNorm(mbd->A,type,&normtemp);
678: MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
679: }
680: return(0);
681: }
685: int MatPrintHelp_MPIBDiag(Mat A)
686: {
687: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
688: int ierr;
691: if (!a->rank) {
692: MatPrintHelp_SeqBDiag(a->A);
693: }
694: return(0);
695: }
699: int MatScale_MPIBDiag(const PetscScalar *alpha,Mat A)
700: {
701: int ierr;
702: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
705: MatScale_SeqBDiag(alpha,a->A);
706: return(0);
707: }
711: int MatSetUpPreallocation_MPIBDiag(Mat A)
712: {
713: int ierr;
716: MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
717: return(0);
718: }
720: /* -------------------------------------------------------------------*/
722: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
723: MatGetRow_MPIBDiag,
724: MatRestoreRow_MPIBDiag,
725: MatMult_MPIBDiag,
726: /* 4*/ MatMultAdd_MPIBDiag,
727: MatMultTranspose_MPIBDiag,
728: MatMultTransposeAdd_MPIBDiag,
729: 0,
730: 0,
731: 0,
732: /*10*/ 0,
733: 0,
734: 0,
735: 0,
736: 0,
737: /*15*/ MatGetInfo_MPIBDiag,
738: 0,
739: MatGetDiagonal_MPIBDiag,
740: 0,
741: MatNorm_MPIBDiag,
742: /*20*/ MatAssemblyBegin_MPIBDiag,
743: MatAssemblyEnd_MPIBDiag,
744: 0,
745: MatSetOption_MPIBDiag,
746: MatZeroEntries_MPIBDiag,
747: /*25*/ MatZeroRows_MPIBDiag,
748: 0,
749: 0,
750: 0,
751: 0,
752: /*30*/ MatSetUpPreallocation_MPIBDiag,
753: 0,
754: 0,
755: 0,
756: 0,
757: /*35*/ 0,
758: 0,
759: 0,
760: 0,
761: 0,
762: /*40*/ 0,
763: 0,
764: 0,
765: MatGetValues_MPIBDiag,
766: 0,
767: /*45*/ MatPrintHelp_MPIBDiag,
768: MatScale_MPIBDiag,
769: 0,
770: 0,
771: 0,
772: /*50*/ MatGetBlockSize_MPIBDiag,
773: 0,
774: 0,
775: 0,
776: 0,
777: /*55*/ 0,
778: 0,
779: 0,
780: 0,
781: 0,
782: /*60*/ 0,
783: MatDestroy_MPIBDiag,
784: MatView_MPIBDiag,
785: MatGetPetscMaps_Petsc,
786: 0,
787: /*65*/ 0,
788: 0,
789: 0,
790: 0,
791: 0,
792: /*70*/ 0,
793: 0,
794: 0,
795: 0,
796: 0,
797: /*75*/ 0,
798: 0,
799: 0,
800: 0,
801: 0,
802: /*80*/ 0,
803: 0,
804: 0,
805: 0,
806: 0,
807: /*85*/ MatLoad_MPIBDiag
808: };
810: EXTERN_C_BEGIN
813: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
814: {
815: Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
816: int ierr,lrows,lcols,rstart,rend;
817: IS localc,localr;
820: MatGetLocalSize(A,&lrows,&lcols);
821: MatGetOwnershipRange(A,&rstart,&rend);
822: ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
823: ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
824: MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
825: ISDestroy(localr);
826: ISDestroy(localc);
828: *iscopy = PETSC_TRUE;
829: return(0);
830: }
831: EXTERN_C_END
833: EXTERN_C_BEGIN
836: int MatMPIBDiagSetPreallocation_MPIBDiag(Mat B,int nd,int bs,int *diag,PetscScalar **diagv)
837: {
838: Mat_MPIBDiag *b;
839: int ierr,i,k,*ldiag,len,nd2;
840: PetscScalar **ldiagv = 0;
841: PetscTruth flg2;
844: B->preallocated = PETSC_TRUE;
845: if (bs == PETSC_DEFAULT) bs = 1;
846: if (nd == PETSC_DEFAULT) nd = 0;
847: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
848: PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
849: PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
850: if (nd && !diag) {
851: PetscMalloc(nd*sizeof(int),&diag);
852: nd2 = nd;
853: PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
854: if (nd2 != nd) {
855: SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
856: }
857: } else if (flg2) {
858: SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
859: }
861: if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");
863: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
864: B->n = B->N = PetscMax(B->n,B->N);
866: if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
867: if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
868: if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");
870: /* the information in the maps duplicates the information computed below, eventually
871: we should remove the duplicate information that is not contained in the maps */
872: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
873: PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);
876: b = (Mat_MPIBDiag*)B->data;
877: b->gnd = nd;
879: MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
880: b->rowners[0] = 0;
881: for (i=2; i<=b->size; i++) {
882: b->rowners[i] += b->rowners[i-1];
883: }
884: b->rstart = b->rowners[b->rank];
885: b->rend = b->rowners[b->rank+1];
886: b->brstart = (b->rstart)/bs;
887: b->brend = (b->rend)/bs;
890: /* Determine local diagonals; for now, assume global rows = global cols */
891: /* These are sorted in MatCreateSeqBDiag */
892: PetscMalloc((nd+1)*sizeof(int),&ldiag);
893: len = B->M/bs + B->N/bs + 1;
894: PetscMalloc(len*sizeof(int),&b->gdiag);
895: k = 0;
896: PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
897: + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
898: if (diagv) {
899: PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
900: }
901: for (i=0; i<nd; i++) {
902: b->gdiag[i] = diag[i];
903: if (diag[i] > 0) { /* lower triangular */
904: if (diag[i] < b->brend) {
905: ldiag[k] = diag[i] - b->brstart;
906: if (diagv) ldiagv[k] = diagv[i];
907: k++;
908: }
909: } else { /* upper triangular */
910: if (B->M/bs - diag[i] > B->N/bs) {
911: if (B->M/bs + diag[i] > b->brstart) {
912: ldiag[k] = diag[i] - b->brstart;
913: if (diagv) ldiagv[k] = diagv[i];
914: k++;
915: }
916: } else {
917: if (B->M/bs > b->brstart) {
918: ldiag[k] = diag[i] - b->brstart;
919: if (diagv) ldiagv[k] = diagv[i];
920: k++;
921: }
922: }
923: }
924: }
926: /* Form local matrix */
927: MatCreateSeqBDiag(PETSC_COMM_SELF,B->m,B->n,k,bs,ldiag,ldiagv,&b->A);
928: PetscLogObjectParent(B,b->A);
929: PetscFree(ldiag);
930: if (ldiagv) {PetscFree(ldiagv);}
932: return(0);
933: }
934: EXTERN_C_END
936: /*MC
937: MATMPIBDIAG - MATMPIBDIAG = "mpibdiag" - A matrix type to be used for distributed block diagonal matrices.
939: Options Database Keys:
940: . -mat_type mpibdiag - sets the matrix type to "mpibdiag" during a call to MatSetFromOptions()
942: Level: beginner
944: .seealso: MatCreateMPIBDiag
945: M*/
947: EXTERN_C_BEGIN
950: int MatCreate_MPIBDiag(Mat B)
951: {
952: Mat_MPIBDiag *b;
953: int ierr;
956: PetscNew(Mat_MPIBDiag,&b);
957: B->data = (void*)b;
958: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
959: B->factor = 0;
960: B->mapping = 0;
962: B->insertmode = NOT_SET_VALUES;
963: MPI_Comm_rank(B->comm,&b->rank);
964: MPI_Comm_size(B->comm,&b->size);
966: /* build local table of row ownerships */
967: PetscMalloc((b->size+2)*sizeof(int),&b->rowners);
969: /* build cache for off array entries formed */
970: MatStashCreate_Private(B->comm,1,&B->stash);
971: b->donotstash = PETSC_FALSE;
973: /* stuff used for matrix-vector multiply */
974: b->lvec = 0;
975: b->Mvctx = 0;
977: /* used for MatSetValues() input */
978: b->roworiented = PETSC_TRUE;
980: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
981: "MatGetDiagonalBlock_MPIBDiag",
982: MatGetDiagonalBlock_MPIBDiag);
983: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBDiagSetPreallocation_C",
984: "MatMPIBDiagSetPreallocation_MPIBDiag",
985: MatMPIBDiagSetPreallocation_MPIBDiag);
986: return(0);
987: }
988: EXTERN_C_END
990: /*MC
991: MATBDIAG - MATBDIAG = "bdiag" - A matrix type to be used for block diagonal matrices.
993: This matrix type is identical to MATSEQBDIAG when constructed with a single process communicator,
994: and MATMPIBDIAG otherwise.
996: Options Database Keys:
997: . -mat_type bdiag - sets the matrix type to "bdiag" during a call to MatSetFromOptions()
999: Level: beginner
1001: .seealso: MatCreateMPIBDiag,MATSEQBDIAG,MATMPIBDIAG
1002: M*/
1004: EXTERN_C_BEGIN
1007: int MatCreate_BDiag(Mat A) {
1008: int ierr,size;
1011: PetscObjectChangeTypeName((PetscObject)A,MATBDIAG);
1012: MPI_Comm_size(A->comm,&size);
1013: if (size == 1) {
1014: MatSetType(A,MATSEQBDIAG);
1015: } else {
1016: MatSetType(A,MATMPIBDIAG);
1017: }
1018: return(0);
1019: }
1020: EXTERN_C_END
1024: /*@C
1025: MatMPIBDiagSetPreallocation -
1027: Collective on Mat
1029: Input Parameters:
1030: + A - the matrix
1031: . nd - number of block diagonals (global) (optional)
1032: . bs - each element of a diagonal is an bs x bs dense matrix
1033: . diag - optional array of block diagonal numbers (length nd).
1034: For a matrix element A[i,j], where i=row and j=column, the
1035: diagonal number is
1036: $ diag = i/bs - j/bs (integer division)
1037: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1038: needed (expensive).
1039: - diagv - pointer to actual diagonals (in same order as diag array),
1040: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1041: to control memory allocation.
1044: Options Database Keys:
1045: . -mat_block_size <bs> - Sets blocksize
1046: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1048: Notes:
1049: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1050: than it must be used on all processors that share the object for that argument.
1052: The parallel matrix is partitioned across the processors by rows, where
1053: each local rectangular matrix is stored in the uniprocessor block
1054: diagonal format. See the users manual for further details.
1056: The user MUST specify either the local or global numbers of rows
1057: (possibly both).
1059: The case bs=1 (conventional diagonal storage) is implemented as
1060: a special case.
1062: Fortran Notes:
1063: Fortran programmers cannot set diagv; this variable is ignored.
1065: Level: intermediate
1067: .keywords: matrix, block, diagonal, parallel, sparse
1069: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1070: @*/
1071: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,const int diag[],PetscScalar *diagv[])
1072: {
1073: int ierr,(*f)(Mat,int,int,const int[],PetscScalar*[]);
1076: PetscObjectQueryFunction((PetscObject)B,"MatMPIBDiagSetPreallocation_C",(void (**)(void))&f);
1077: if (f) {
1078: (*f)(B,nd,bs,diag,diagv);
1079: }
1080: return(0);
1081: }
1085: /*@C
1086: MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.
1088: Collective on MPI_Comm
1090: Input Parameters:
1091: + comm - MPI communicator
1092: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1093: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1094: . N - number of columns (local and global)
1095: . nd - number of block diagonals (global) (optional)
1096: . bs - each element of a diagonal is an bs x bs dense matrix
1097: . diag - optional array of block diagonal numbers (length nd).
1098: For a matrix element A[i,j], where i=row and j=column, the
1099: diagonal number is
1100: $ diag = i/bs - j/bs (integer division)
1101: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1102: needed (expensive).
1103: - diagv - pointer to actual diagonals (in same order as diag array),
1104: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1105: to control memory allocation.
1107: Output Parameter:
1108: . A - the matrix
1110: Options Database Keys:
1111: . -mat_block_size <bs> - Sets blocksize
1112: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1114: Notes:
1115: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1116: than it must be used on all processors that share the object for that argument.
1118: The parallel matrix is partitioned across the processors by rows, where
1119: each local rectangular matrix is stored in the uniprocessor block
1120: diagonal format. See the users manual for further details.
1122: The user MUST specify either the local or global numbers of rows
1123: (possibly both).
1125: The case bs=1 (conventional diagonal storage) is implemented as
1126: a special case.
1128: Fortran Notes:
1129: Fortran programmers cannot set diagv; this variable is ignored.
1131: Level: intermediate
1133: .keywords: matrix, block, diagonal, parallel, sparse
1135: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1136: @*/
1137: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,const int diag[],PetscScalar *diagv[],Mat *A)
1138: {
1139: int ierr,size;
1142: MatCreate(comm,m,m,M,N,A);
1143: MPI_Comm_size(comm,&size);
1144: if (size > 1) {
1145: MatSetType(*A,MATMPIBDIAG);
1146: MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1147: } else {
1148: MatSetType(*A,MATSEQBDIAG);
1149: MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1150: }
1151: return(0);
1152: }
1156: /*@C
1157: MatBDiagGetData - Gets the data for the block diagonal matrix format.
1158: For the parallel case, this returns information for the local submatrix.
1160: Input Parameters:
1161: . mat - the matrix, stored in block diagonal format.
1163: Not Collective
1165: Output Parameters:
1166: + m - number of rows
1167: . n - number of columns
1168: . nd - number of block diagonals
1169: . bs - each element of a diagonal is an bs x bs dense matrix
1170: . bdlen - array of total block lengths of block diagonals
1171: . diag - optional array of block diagonal numbers (length nd).
1172: For a matrix element A[i,j], where i=row and j=column, the
1173: diagonal number is
1174: $ diag = i/bs - j/bs (integer division)
1175: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1176: needed (expensive).
1177: - diagv - pointer to actual diagonals (in same order as diag array),
1179: Level: advanced
1181: Notes:
1182: See the users manual for further details regarding this storage format.
1184: .keywords: matrix, block, diagonal, get, data
1186: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1187: @*/
1188: int MatBDiagGetData(Mat mat,int *nd,int *bs,int *diag[],int *bdlen[],PetscScalar ***diagv)
1189: {
1190: Mat_MPIBDiag *pdmat;
1191: Mat_SeqBDiag *dmat = 0;
1192: PetscTruth isseq,ismpi;
1193: int ierr;
1197: PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1198: PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1199: if (isseq) {
1200: dmat = (Mat_SeqBDiag*)mat->data;
1201: } else if (ismpi) {
1202: pdmat = (Mat_MPIBDiag*)mat->data;
1203: dmat = (Mat_SeqBDiag*)pdmat->A->data;
1204: } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1205: *nd = dmat->nd;
1206: *bs = dmat->bs;
1207: *diag = dmat->diag;
1208: *bdlen = dmat->bdlen;
1209: *diagv = dmat->diagv;
1210: return(0);
1211: }
1213: #include petscsys.h
1217: int MatLoad_MPIBDiag(PetscViewer viewer,MatType type,Mat *newmat)
1218: {
1219: Mat A;
1220: PetscScalar *vals,*svals;
1221: MPI_Comm comm = ((PetscObject)viewer)->comm;
1222: MPI_Status status;
1223: int bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1224: int header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1225: int *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1226: int tag = ((PetscObject)viewer)->tag,extra_rows;
1229: MPI_Comm_size(comm,&size);
1230: MPI_Comm_rank(comm,&rank);
1231: if (!rank) {
1232: PetscViewerBinaryGetDescriptor(viewer,&fd);
1233: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1234: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1235: if (header[3] < 0) {
1236: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1237: }
1238: }
1239: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1240: M = header[1]; N = header[2];
1242: bs = 1; /* uses a block size of 1 by default; */
1243: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1245: /*
1246: This code adds extra rows to make sure the number of rows is
1247: divisible by the blocksize
1248: */
1249: Mbs = M/bs;
1250: extra_rows = bs - M + bs*(Mbs);
1251: if (extra_rows == bs) extra_rows = 0;
1252: else Mbs++;
1253: if (extra_rows && !rank) {
1254: PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksize\n");
1255: }
1257: /* determine ownership of all rows */
1258: m = bs*(Mbs/size + ((Mbs % size) > rank));
1259: PetscMalloc((size+2)*sizeof(int),&rowners);
1260: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1261: rowners[0] = 0;
1262: for (i=2; i<=size; i++) {
1263: rowners[i] += rowners[i-1];
1264: }
1265: rstart = rowners[rank];
1266: rend = rowners[rank+1];
1268: /* distribute row lengths to all processors */
1269: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1270: if (!rank) {
1271: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1272: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1273: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1274: PetscMalloc(size*sizeof(int),&sndcounts);
1275: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1276: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1277: PetscFree(sndcounts);
1278: } else {
1279: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1280: }
1282: if (!rank) {
1283: /* calculate the number of nonzeros on each processor */
1284: PetscMalloc(size*sizeof(int),&procsnz);
1285: PetscMemzero(procsnz,size*sizeof(int));
1286: for (i=0; i<size; i++) {
1287: for (j=rowners[i]; j<rowners[i+1]; j++) {
1288: procsnz[i] += rowlengths[j];
1289: }
1290: }
1291: PetscFree(rowlengths);
1293: /* determine max buffer needed and allocate it */
1294: maxnz = 0;
1295: for (i=0; i<size; i++) {
1296: maxnz = PetscMax(maxnz,procsnz[i]);
1297: }
1298: PetscMalloc(maxnz*sizeof(int),&cols);
1300: /* read in my part of the matrix column indices */
1301: nz = procsnz[0];
1302: PetscMalloc(nz*sizeof(int),&mycols);
1303: if (size == 1) nz -= extra_rows;
1304: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1305: if (size == 1) for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }
1307: /* read in every one elses and ship off */
1308: for (i=1; i<size-1; i++) {
1309: nz = procsnz[i];
1310: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1311: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1312: }
1313: /* read in the stuff for the last proc */
1314: if (size != 1) {
1315: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
1316: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1317: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1318: MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1319: }
1320: PetscFree(cols);
1321: } else {
1322: /* determine buffer space needed for message */
1323: nz = 0;
1324: for (i=0; i<m; i++) {
1325: nz += ourlens[i];
1326: }
1327: PetscMalloc(nz*sizeof(int),&mycols);
1329: /* receive message of column indices*/
1330: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1331: MPI_Get_count(&status,MPI_INT,&maxnz);
1332: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1333: }
1335: MatCreateMPIBDiag(comm,m,M+extra_rows,N+extra_rows,PETSC_NULL,bs,PETSC_NULL,PETSC_NULL,newmat);
1336: A = *newmat;
1338: if (!rank) {
1339: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1341: /* read in my part of the matrix numerical values */
1342: nz = procsnz[0];
1343: if (size == 1) nz -= extra_rows;
1344: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1345: if (size == 1) for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }
1347: /* insert into matrix */
1348: jj = rstart;
1349: smycols = mycols;
1350: svals = vals;
1351: for (i=0; i<m; i++) {
1352: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1353: smycols += ourlens[i];
1354: svals += ourlens[i];
1355: jj++;
1356: }
1358: /* read in other processors (except the last one) and ship out */
1359: for (i=1; i<size-1; i++) {
1360: nz = procsnz[i];
1361: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1362: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1363: }
1364: /* the last proc */
1365: if (size != 1){
1366: nz = procsnz[i] - extra_rows;
1367: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1368: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1369: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1370: }
1371: PetscFree(procsnz);
1372: } else {
1373: /* receive numeric values */
1374: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1376: /* receive message of values*/
1377: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1378: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1379: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1381: /* insert into matrix */
1382: jj = rstart;
1383: smycols = mycols;
1384: svals = vals;
1385: for (i=0; i<m; i++) {
1386: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1387: smycols += ourlens[i];
1388: svals += ourlens[i];
1389: jj++;
1390: }
1391: }
1392: PetscFree(ourlens);
1393: PetscFree(vals);
1394: PetscFree(mycols);
1395: PetscFree(rowners);
1397: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1398: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1399: return(0);
1400: }