Actual source code: mpiaij.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/inline/spops.h
6: /*
7: Local utility routine that creates a mapping from the global column
8: number to the local number in the off-diagonal part of the local
9: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
10: a slightly higher hash table cost; without it it is not scalable (each processor
11: has an order N integer array but is fast to acess.
12: */
15: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16: {
17: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19: PetscInt n = aij->B->cmap.n,i;
22: #if defined (PETSC_USE_CTABLE)
23: PetscTableCreate(n,&aij->colmap);
24: for (i=0; i<n; i++){
25: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
26: }
27: #else
28: PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);
29: PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));
30: PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));
31: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32: #endif
33: return(0);
34: }
37: #define CHUNKSIZE 15
38: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39: { \
40: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41: lastcol1 = col;\
42: while (high1-low1 > 5) { \
43: t = (low1+high1)/2; \
44: if (rp1[t] > col) high1 = t; \
45: else low1 = t; \
46: } \
47: for (_i=low1; _i<high1; _i++) { \
48: if (rp1[_i] > col) break; \
49: if (rp1[_i] == col) { \
50: if (addv == ADD_VALUES) ap1[_i] += value; \
51: else ap1[_i] = value; \
52: goto a_noinsert; \
53: } \
54: } \
55: if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56: if (nonew == 1) goto a_noinsert; \
57: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59: N = nrow1++ - 1; a->nz++; high1++; \
60: /* shift up all the later entries in this row */ \
61: for (ii=N; ii>=_i; ii--) { \
62: rp1[ii+1] = rp1[ii]; \
63: ap1[ii+1] = ap1[ii]; \
64: } \
65: rp1[_i] = col; \
66: ap1[_i] = value; \
67: a_noinsert: ; \
68: ailen[row] = nrow1; \
69: }
72: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73: { \
74: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75: lastcol2 = col;\
76: while (high2-low2 > 5) { \
77: t = (low2+high2)/2; \
78: if (rp2[t] > col) high2 = t; \
79: else low2 = t; \
80: } \
81: for (_i=low2; _i<high2; _i++) { \
82: if (rp2[_i] > col) break; \
83: if (rp2[_i] == col) { \
84: if (addv == ADD_VALUES) ap2[_i] += value; \
85: else ap2[_i] = value; \
86: goto b_noinsert; \
87: } \
88: } \
89: if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90: if (nonew == 1) goto b_noinsert; \
91: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93: N = nrow2++ - 1; b->nz++; high2++;\
94: /* shift up all the later entries in this row */ \
95: for (ii=N; ii>=_i; ii--) { \
96: rp2[ii+1] = rp2[ii]; \
97: ap2[ii+1] = ap2[ii]; \
98: } \
99: rp2[_i] = col; \
100: ap2[_i] = value; \
101: b_noinsert: ; \
102: bilen[row] = nrow2; \
103: }
107: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108: {
109: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
110: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
112: PetscInt l,*garray = mat->garray,diag;
115: /* code only works for square matrices A */
117: /* find size of row to the left of the diagonal part */
118: MatGetOwnershipRange(A,&diag,0);
119: row = row - diag;
120: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121: if (garray[b->j[b->i[row]+l]] > diag) break;
122: }
123: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
125: /* diagonal part */
126: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
128: /* right of diagonal part */
129: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
130: return(0);
131: }
135: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136: {
137: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
138: PetscScalar value;
140: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142: PetscTruth roworiented = aij->roworiented;
144: /* Some Variables required in the macro */
145: Mat A = aij->A;
146: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
147: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148: PetscScalar *aa = a->a;
149: PetscTruth ignorezeroentries = a->ignorezeroentries;
150: Mat B = aij->B;
151: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
152: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153: PetscScalar *ba = b->a;
155: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156: PetscInt nonew = a->nonew;
157: PetscScalar *ap1,*ap2;
160: for (i=0; i<m; i++) {
161: if (im[i] < 0) continue;
162: #if defined(PETSC_USE_DEBUG)
163: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164: #endif
165: if (im[i] >= rstart && im[i] < rend) {
166: row = im[i] - rstart;
167: lastcol1 = -1;
168: rp1 = aj + ai[row];
169: ap1 = aa + ai[row];
170: rmax1 = aimax[row];
171: nrow1 = ailen[row];
172: low1 = 0;
173: high1 = nrow1;
174: lastcol2 = -1;
175: rp2 = bj + bi[row];
176: ap2 = ba + bi[row];
177: rmax2 = bimax[row];
178: nrow2 = bilen[row];
179: low2 = 0;
180: high2 = nrow2;
182: for (j=0; j<n; j++) {
183: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185: if (in[j] >= cstart && in[j] < cend){
186: col = in[j] - cstart;
187: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188: } else if (in[j] < 0) continue;
189: #if defined(PETSC_USE_DEBUG)
190: else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191: #endif
192: else {
193: if (mat->was_assembled) {
194: if (!aij->colmap) {
195: CreateColmap_MPIAIJ_Private(mat);
196: }
197: #if defined (PETSC_USE_CTABLE)
198: PetscTableFind(aij->colmap,in[j]+1,&col);
199: col--;
200: #else
201: col = aij->colmap[in[j]] - 1;
202: #endif
203: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204: DisAssemble_MPIAIJ(mat);
205: col = in[j];
206: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207: B = aij->B;
208: b = (Mat_SeqAIJ*)B->data;
209: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210: rp2 = bj + bi[row];
211: ap2 = ba + bi[row];
212: rmax2 = bimax[row];
213: nrow2 = bilen[row];
214: low2 = 0;
215: high2 = nrow2;
216: bm = aij->B->rmap.n;
217: ba = b->a;
218: }
219: } else col = in[j];
220: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221: }
222: }
223: } else {
224: if (!aij->donotstash) {
225: if (roworiented) {
226: if (ignorezeroentries && v[i*n] == 0.0) continue;
227: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
228: } else {
229: if (ignorezeroentries && v[i] == 0.0) continue;
230: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
231: }
232: }
233: }
234: }
235: return(0);
236: }
240: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241: {
242: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
244: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
248: for (i=0; i<m; i++) {
249: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250: if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251: if (idxm[i] >= rstart && idxm[i] < rend) {
252: row = idxm[i] - rstart;
253: for (j=0; j<n; j++) {
254: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255: if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256: if (idxn[j] >= cstart && idxn[j] < cend){
257: col = idxn[j] - cstart;
258: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
259: } else {
260: if (!aij->colmap) {
261: CreateColmap_MPIAIJ_Private(mat);
262: }
263: #if defined (PETSC_USE_CTABLE)
264: PetscTableFind(aij->colmap,idxn[j]+1,&col);
265: col --;
266: #else
267: col = aij->colmap[idxn[j]] - 1;
268: #endif
269: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270: else {
271: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
272: }
273: }
274: }
275: } else {
276: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277: }
278: }
279: return(0);
280: }
284: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285: {
286: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
288: PetscInt nstash,reallocs;
289: InsertMode addv;
292: if (aij->donotstash) {
293: return(0);
294: }
296: /* make sure all processors are either in INSERTMODE or ADDMODE */
297: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
298: if (addv == (ADD_VALUES|INSERT_VALUES)) {
299: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300: }
301: mat->insertmode = addv; /* in case this processor had no cache */
303: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);
304: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
305: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
306: return(0);
307: }
311: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312: {
313: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
314: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
316: PetscMPIInt n;
317: PetscInt i,j,rstart,ncols,flg;
318: PetscInt *row,*col,other_disassembled;
319: PetscScalar *val;
320: InsertMode addv = mat->insertmode;
322: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324: if (!aij->donotstash) {
325: while (1) {
326: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
327: if (!flg) break;
329: for (i=0; i<n;) {
330: /* Now identify the consecutive vals belonging to the same row */
331: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332: if (j < n) ncols = j-i;
333: else ncols = n-i;
334: /* Now assemble all these values with a single function call */
335: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
336: i = j;
337: }
338: }
339: MatStashScatterEnd_Private(&mat->stash);
340: }
341: a->compressedrow.use = PETSC_FALSE;
342: MatAssemblyBegin(aij->A,mode);
343: MatAssemblyEnd(aij->A,mode);
345: /* determine if any processor has disassembled, if so we must
346: also disassemble ourselfs, in order that we may reassemble. */
347: /*
348: if nonzero structure of submatrix B cannot change then we know that
349: no processor disassembled thus we can skip this stuff
350: */
351: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
352: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
353: if (mat->was_assembled && !other_disassembled) {
354: DisAssemble_MPIAIJ(mat);
355: }
356: }
357: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358: MatSetUpMultiply_MPIAIJ(mat);
359: }
360: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
361: ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362: MatAssemblyBegin(aij->B,mode);
363: MatAssemblyEnd(aij->B,mode);
365: PetscFree(aij->rowvalues);
366: aij->rowvalues = 0;
368: /* used by MatAXPY() */
369: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
370: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
372: return(0);
373: }
377: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378: {
379: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
383: MatZeroEntries(l->A);
384: MatZeroEntries(l->B);
385: return(0);
386: }
390: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391: {
392: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
394: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
395: PetscInt i,*owners = A->rmap.range;
396: PetscInt *nprocs,j,idx,nsends,row;
397: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
398: PetscInt *rvalues,count,base,slen,*source;
399: PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart;
400: MPI_Comm comm = ((PetscObject)A)->comm;
401: MPI_Request *send_waits,*recv_waits;
402: MPI_Status recv_status,*send_status;
403: #if defined(PETSC_DEBUG)
404: PetscTruth found = PETSC_FALSE;
405: #endif
408: /* first count number of contributors to each processor */
409: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
410: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
411: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
412: j = 0;
413: for (i=0; i<N; i++) {
414: if (lastidx > (idx = rows[i])) j = 0;
415: lastidx = idx;
416: for (; j<size; j++) {
417: if (idx >= owners[j] && idx < owners[j+1]) {
418: nprocs[2*j]++;
419: nprocs[2*j+1] = 1;
420: owner[i] = j;
421: #if defined(PETSC_DEBUG)
422: found = PETSC_TRUE;
423: #endif
424: break;
425: }
426: }
427: #if defined(PETSC_DEBUG)
428: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429: found = PETSC_FALSE;
430: #endif
431: }
432: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434: /* inform other processors of number of messages and max length*/
435: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
437: /* post receives: */
438: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
439: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
440: for (i=0; i<nrecvs; i++) {
441: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
442: }
444: /* do sends:
445: 1) starts[i] gives the starting index in svalues for stuff going to
446: the ith processor
447: */
448: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
449: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
450: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
451: starts[0] = 0;
452: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453: for (i=0; i<N; i++) {
454: svalues[starts[owner[i]]++] = rows[i];
455: }
457: starts[0] = 0;
458: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459: count = 0;
460: for (i=0; i<size; i++) {
461: if (nprocs[2*i+1]) {
462: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
463: }
464: }
465: PetscFree(starts);
467: base = owners[rank];
469: /* wait on receives */
470: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
471: source = lens + nrecvs;
472: count = nrecvs; slen = 0;
473: while (count) {
474: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
475: /* unpack receives into our local space */
476: MPI_Get_count(&recv_status,MPIU_INT,&n);
477: source[imdex] = recv_status.MPI_SOURCE;
478: lens[imdex] = n;
479: slen += n;
480: count--;
481: }
482: PetscFree(recv_waits);
483:
484: /* move the data into the send scatter */
485: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
486: count = 0;
487: for (i=0; i<nrecvs; i++) {
488: values = rvalues + i*nmax;
489: for (j=0; j<lens[i]; j++) {
490: lrows[count++] = values[j] - base;
491: }
492: }
493: PetscFree(rvalues);
494: PetscFree(lens);
495: PetscFree(owner);
496: PetscFree(nprocs);
497:
498: /* actually zap the local rows */
499: /*
500: Zero the required rows. If the "diagonal block" of the matrix
501: is square and the user wishes to set the diagonal we use separate
502: code so that MatSetValues() is not called for each diagonal allocating
503: new memory, thus calling lots of mallocs and slowing things down.
505: Contributed by: Matthew Knepley
506: */
507: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508: MatZeroRows(l->B,slen,lrows,0.0);
509: if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510: MatZeroRows(l->A,slen,lrows,diag);
511: } else if (diag != 0.0) {
512: MatZeroRows(l->A,slen,lrows,0.0);
513: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516: }
517: for (i = 0; i < slen; i++) {
518: row = lrows[i] + rstart;
519: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
520: }
521: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
522: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
523: } else {
524: MatZeroRows(l->A,slen,lrows,0.0);
525: }
526: PetscFree(lrows);
528: /* wait on sends */
529: if (nsends) {
530: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
531: MPI_Waitall(nsends,send_waits,send_status);
532: PetscFree(send_status);
533: }
534: PetscFree(send_waits);
535: PetscFree(svalues);
537: return(0);
538: }
542: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543: {
544: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
546: PetscInt nt;
549: VecGetLocalSize(xx,&nt);
550: if (nt != A->cmap.n) {
551: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552: }
553: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
554: (*a->A->ops->mult)(a->A,xx,yy);
555: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
556: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
557: return(0);
558: }
562: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563: {
564: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
568: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
569: (*a->A->ops->multadd)(a->A,xx,yy,zz);
570: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
571: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
572: return(0);
573: }
577: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578: {
579: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
581: PetscTruth merged;
584: VecScatterGetMerged(a->Mvctx,&merged);
585: /* do nondiagonal part */
586: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
587: if (!merged) {
588: /* send it on its way */
589: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
590: /* do local part */
591: (*a->A->ops->multtranspose)(a->A,xx,yy);
592: /* receive remote parts: note this assumes the values are not actually */
593: /* added in yy until the next line, */
594: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
595: } else {
596: /* do local part */
597: (*a->A->ops->multtranspose)(a->A,xx,yy);
598: /* send it on its way */
599: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
600: /* values actually were received in the Begin() but we need to call this nop */
601: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
602: }
603: return(0);
604: }
609: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610: {
611: MPI_Comm comm;
612: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614: IS Me,Notme;
616: PetscInt M,N,first,last,*notme,i;
617: PetscMPIInt size;
621: /* Easy test: symmetric diagonal block */
622: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623: MatIsTranspose(Adia,Bdia,tol,f);
624: if (!*f) return(0);
625: PetscObjectGetComm((PetscObject)Amat,&comm);
626: MPI_Comm_size(comm,&size);
627: if (size == 1) return(0);
629: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630: MatGetSize(Amat,&M,&N);
631: MatGetOwnershipRange(Amat,&first,&last);
632: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
633: for (i=0; i<first; i++) notme[i] = i;
634: for (i=last; i<M; i++) notme[i-last+first] = i;
635: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
636: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
637: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
638: Aoff = Aoffs[0];
639: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
640: Boff = Boffs[0];
641: MatIsTranspose(Aoff,Boff,tol,f);
642: MatDestroyMatrices(1,&Aoffs);
643: MatDestroyMatrices(1,&Boffs);
644: ISDestroy(Me);
645: ISDestroy(Notme);
647: return(0);
648: }
653: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654: {
655: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
659: /* do nondiagonal part */
660: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
661: /* send it on its way */
662: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
663: /* do local part */
664: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
665: /* receive remote parts */
666: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
667: return(0);
668: }
670: /*
671: This only works correctly for square matrices where the subblock A->A is the
672: diagonal block
673: */
676: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677: {
679: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
682: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683: if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685: }
686: MatGetDiagonal(a->A,v);
687: return(0);
688: }
692: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693: {
694: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
698: MatScale(a->A,aa);
699: MatScale(a->B,aa);
700: return(0);
701: }
705: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706: {
707: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
711: #if defined(PETSC_USE_LOG)
712: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713: #endif
714: MatStashDestroy_Private(&mat->stash);
715: MatDestroy(aij->A);
716: MatDestroy(aij->B);
717: #if defined (PETSC_USE_CTABLE)
718: if (aij->colmap) {PetscTableDestroy(aij->colmap);}
719: #else
720: PetscFree(aij->colmap);
721: #endif
722: PetscFree(aij->garray);
723: if (aij->lvec) {VecDestroy(aij->lvec);}
724: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
725: PetscFree(aij->rowvalues);
726: PetscFree(aij->ld);
727: PetscFree(aij);
729: PetscObjectChangeTypeName((PetscObject)mat,0);
730: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
731: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
732: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
733: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
734: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
735: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
736: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
737: return(0);
738: }
742: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743: {
744: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
745: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
746: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
747: PetscErrorCode ierr;
748: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
749: int fd;
750: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
751: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752: PetscScalar *column_values;
755: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
756: MPI_Comm_size(((PetscObject)mat)->comm,&size);
757: nz = A->nz + B->nz;
758: if (!rank) {
759: header[0] = MAT_FILE_COOKIE;
760: header[1] = mat->rmap.N;
761: header[2] = mat->cmap.N;
762: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
763: PetscViewerBinaryGetDescriptor(viewer,&fd);
764: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
765: /* get largest number of rows any processor has */
766: rlen = mat->rmap.n;
767: range = mat->rmap.range;
768: for (i=1; i<size; i++) {
769: rlen = PetscMax(rlen,range[i+1] - range[i]);
770: }
771: } else {
772: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
773: rlen = mat->rmap.n;
774: }
776: /* load up the local row counts */
777: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
778: for (i=0; i<mat->rmap.n; i++) {
779: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780: }
782: /* store the row lengths to the file */
783: if (!rank) {
784: MPI_Status status;
785: PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);
786: for (i=1; i<size; i++) {
787: rlen = range[i+1] - range[i];
788: MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
789: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
790: }
791: } else {
792: MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);
793: }
794: PetscFree(row_lengths);
796: /* load up the local column indices */
797: nzmax = nz; /* )th processor needs space a largest processor needs */
798: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);
799: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
800: cnt = 0;
801: for (i=0; i<mat->rmap.n; i++) {
802: for (j=B->i[i]; j<B->i[i+1]; j++) {
803: if ( (col = garray[B->j[j]]) > cstart) break;
804: column_indices[cnt++] = col;
805: }
806: for (k=A->i[i]; k<A->i[i+1]; k++) {
807: column_indices[cnt++] = A->j[k] + cstart;
808: }
809: for (; j<B->i[i+1]; j++) {
810: column_indices[cnt++] = garray[B->j[j]];
811: }
812: }
813: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
815: /* store the column indices to the file */
816: if (!rank) {
817: MPI_Status status;
818: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
819: for (i=1; i<size; i++) {
820: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
821: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822: MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
823: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
824: }
825: } else {
826: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
827: MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);
828: }
829: PetscFree(column_indices);
831: /* load up the local column values */
832: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
833: cnt = 0;
834: for (i=0; i<mat->rmap.n; i++) {
835: for (j=B->i[i]; j<B->i[i+1]; j++) {
836: if ( garray[B->j[j]] > cstart) break;
837: column_values[cnt++] = B->a[j];
838: }
839: for (k=A->i[i]; k<A->i[i+1]; k++) {
840: column_values[cnt++] = A->a[k];
841: }
842: for (; j<B->i[i+1]; j++) {
843: column_values[cnt++] = B->a[j];
844: }
845: }
846: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
848: /* store the column values to the file */
849: if (!rank) {
850: MPI_Status status;
851: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
852: for (i=1; i<size; i++) {
853: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
854: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855: MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);
856: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
857: }
858: } else {
859: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
860: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
861: }
862: PetscFree(column_values);
863: return(0);
864: }
868: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869: {
870: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
871: PetscErrorCode ierr;
872: PetscMPIInt rank = aij->rank,size = aij->size;
873: PetscTruth isdraw,iascii,isbinary;
874: PetscViewer sviewer;
875: PetscViewerFormat format;
878: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
879: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
880: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
881: if (iascii) {
882: PetscViewerGetFormat(viewer,&format);
883: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884: MatInfo info;
885: PetscTruth inodes;
887: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
888: MatGetInfo(mat,MAT_LOCAL,&info);
889: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
890: if (!inodes) {
891: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892: rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
893: } else {
894: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895: rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
896: }
897: MatGetInfo(aij->A,MAT_LOCAL,&info);
898: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
899: MatGetInfo(aij->B,MAT_LOCAL,&info);
900: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
901: PetscViewerFlush(viewer);
902: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
903: VecScatterView(aij->Mvctx,viewer);
904: return(0);
905: } else if (format == PETSC_VIEWER_ASCII_INFO) {
906: PetscInt inodecount,inodelimit,*inodes;
907: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
908: if (inodes) {
909: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
910: } else {
911: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
912: }
913: return(0);
914: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915: return(0);
916: }
917: } else if (isbinary) {
918: if (size == 1) {
919: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
920: MatView(aij->A,viewer);
921: } else {
922: MatView_MPIAIJ_Binary(mat,viewer);
923: }
924: return(0);
925: } else if (isdraw) {
926: PetscDraw draw;
927: PetscTruth isnull;
928: PetscViewerDrawGetDraw(viewer,0,&draw);
929: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
930: }
932: if (size == 1) {
933: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
934: MatView(aij->A,viewer);
935: } else {
936: /* assemble the entire matrix onto first processor. */
937: Mat A;
938: Mat_SeqAIJ *Aloc;
939: PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940: PetscScalar *a;
942: MatCreate(((PetscObject)mat)->comm,&A);
943: if (!rank) {
944: MatSetSizes(A,M,N,M,N);
945: } else {
946: MatSetSizes(A,0,0,M,N);
947: }
948: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949: MatSetType(A,MATMPIAIJ);
950: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
951: PetscLogObjectParent(mat,A);
953: /* copy over the A part */
954: Aloc = (Mat_SeqAIJ*)aij->A->data;
955: m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956: row = mat->rmap.rstart;
957: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958: for (i=0; i<m; i++) {
959: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
960: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961: }
962: aj = Aloc->j;
963: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
965: /* copy over the B part */
966: Aloc = (Mat_SeqAIJ*)aij->B->data;
967: m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968: row = mat->rmap.rstart;
969: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
970: ct = cols;
971: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972: for (i=0; i<m; i++) {
973: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
974: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975: }
976: PetscFree(ct);
977: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
978: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
979: /*
980: Everyone has to call to draw the matrix since the graphics waits are
981: synchronized across all processors that share the PetscDraw object
982: */
983: PetscViewerGetSingleton(viewer,&sviewer);
984: if (!rank) {
985: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
986: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
987: }
988: PetscViewerRestoreSingleton(viewer,&sviewer);
989: MatDestroy(A);
990: }
991: return(0);
992: }
996: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997: {
999: PetscTruth iascii,isdraw,issocket,isbinary;
1000:
1002: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1003: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1004: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1005: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1006: if (iascii || isdraw || isbinary || issocket) {
1007: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1008: } else {
1009: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010: }
1011: return(0);
1012: }
1016: PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017: {
1018: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1020: Vec bb1;
1023: VecDuplicate(bb,&bb1);
1025: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1026: if (flag & SOR_ZERO_INITIAL_GUESS) {
1027: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1028: its--;
1029: }
1030:
1031: while (its--) {
1032: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1033: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1035: /* update rhs: bb1 = bb - B*x */
1036: VecScale(mat->lvec,-1.0);
1037: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1039: /* local sweep */
1040: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1041: }
1042: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1043: if (flag & SOR_ZERO_INITIAL_GUESS) {
1044: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1045: its--;
1046: }
1047: while (its--) {
1048: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1049: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1051: /* update rhs: bb1 = bb - B*x */
1052: VecScale(mat->lvec,-1.0);
1053: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1055: /* local sweep */
1056: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1057: }
1058: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1059: if (flag & SOR_ZERO_INITIAL_GUESS) {
1060: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1061: its--;
1062: }
1063: while (its--) {
1064: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1065: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1067: /* update rhs: bb1 = bb - B*x */
1068: VecScale(mat->lvec,-1.0);
1069: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1071: /* local sweep */
1072: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1073: }
1074: } else {
1075: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1076: }
1078: VecDestroy(bb1);
1079: return(0);
1080: }
1084: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1085: {
1086: MPI_Comm comm,pcomm;
1087: PetscInt first,local_size,nrows,*rows;
1088: int ntids;
1089: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1093: PetscObjectGetComm((PetscObject)A,&comm);
1094: /* make a collective version of 'rowp' */
1095: PetscObjectGetComm((PetscObject)rowp,&pcomm);
1096: if (pcomm==comm) {
1097: crowp = rowp;
1098: } else {
1099: ISGetSize(rowp,&nrows);
1100: ISGetIndices(rowp,&rows);
1101: ISCreateGeneral(comm,nrows,rows,&crowp);
1102: ISRestoreIndices(rowp,&rows);
1103: }
1104: /* collect the global row permutation and invert it */
1105: ISAllGather(crowp,&growp);
1106: ISSetPermutation(growp);
1107: if (pcomm!=comm) {
1108: ISDestroy(crowp);
1109: }
1110: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1111: /* get the local target indices */
1112: MatGetOwnershipRange(A,&first,PETSC_NULL);
1113: MatGetLocalSize(A,&local_size,PETSC_NULL);
1114: ISGetIndices(irowp,&rows);
1115: ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);
1116: ISRestoreIndices(irowp,&rows);
1117: ISDestroy(irowp);
1118: /* the column permutation is so much easier;
1119: make a local version of 'colp' and invert it */
1120: PetscObjectGetComm((PetscObject)colp,&pcomm);
1121: MPI_Comm_size(pcomm,&ntids);
1122: if (ntids==1) {
1123: lcolp = colp;
1124: } else {
1125: ISGetSize(colp,&nrows);
1126: ISGetIndices(colp,&rows);
1127: ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);
1128: }
1129: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1130: ISSetPermutation(lcolp);
1131: if (ntids>1) {
1132: ISRestoreIndices(colp,&rows);
1133: ISDestroy(lcolp);
1134: }
1135: /* now we just get the submatrix */
1136: MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);
1137: /* clean up */
1138: ISDestroy(lrowp);
1139: ISDestroy(icolp);
1140: return(0);
1141: }
1145: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1146: {
1147: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1148: Mat A = mat->A,B = mat->B;
1150: PetscReal isend[5],irecv[5];
1153: info->block_size = 1.0;
1154: MatGetInfo(A,MAT_LOCAL,info);
1155: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1156: isend[3] = info->memory; isend[4] = info->mallocs;
1157: MatGetInfo(B,MAT_LOCAL,info);
1158: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159: isend[3] += info->memory; isend[4] += info->mallocs;
1160: if (flag == MAT_LOCAL) {
1161: info->nz_used = isend[0];
1162: info->nz_allocated = isend[1];
1163: info->nz_unneeded = isend[2];
1164: info->memory = isend[3];
1165: info->mallocs = isend[4];
1166: } else if (flag == MAT_GLOBAL_MAX) {
1167: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);
1168: info->nz_used = irecv[0];
1169: info->nz_allocated = irecv[1];
1170: info->nz_unneeded = irecv[2];
1171: info->memory = irecv[3];
1172: info->mallocs = irecv[4];
1173: } else if (flag == MAT_GLOBAL_SUM) {
1174: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);
1175: info->nz_used = irecv[0];
1176: info->nz_allocated = irecv[1];
1177: info->nz_unneeded = irecv[2];
1178: info->memory = irecv[3];
1179: info->mallocs = irecv[4];
1180: }
1181: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1182: info->fill_ratio_needed = 0;
1183: info->factor_mallocs = 0;
1184: info->rows_global = (double)matin->rmap.N;
1185: info->columns_global = (double)matin->cmap.N;
1186: info->rows_local = (double)matin->rmap.n;
1187: info->columns_local = (double)matin->cmap.N;
1189: return(0);
1190: }
1194: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1195: {
1196: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1200: switch (op) {
1201: case MAT_NEW_NONZERO_LOCATIONS:
1202: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1203: case MAT_KEEP_ZEROED_ROWS:
1204: case MAT_NEW_NONZERO_LOCATION_ERR:
1205: case MAT_USE_INODES:
1206: case MAT_IGNORE_ZERO_ENTRIES:
1207: MatSetOption(a->A,op,flg);
1208: MatSetOption(a->B,op,flg);
1209: break;
1210: case MAT_ROW_ORIENTED:
1211: a->roworiented = flg;
1212: MatSetOption(a->A,op,flg);
1213: MatSetOption(a->B,op,flg);
1214: break;
1215: case MAT_NEW_DIAGONALS:
1216: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1217: break;
1218: case MAT_IGNORE_OFF_PROC_ENTRIES:
1219: a->donotstash = PETSC_TRUE;
1220: break;
1221: case MAT_SYMMETRIC:
1222: MatSetOption(a->A,op,flg);
1223: break;
1224: case MAT_STRUCTURALLY_SYMMETRIC:
1225: case MAT_HERMITIAN:
1226: case MAT_SYMMETRY_ETERNAL:
1227: MatSetOption(a->A,op,flg);
1228: break;
1229: default:
1230: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1231: }
1232: return(0);
1233: }
1237: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1238: {
1239: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1240: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1242: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1243: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1244: PetscInt *cmap,*idx_p;
1247: if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1248: mat->getrowactive = PETSC_TRUE;
1250: if (!mat->rowvalues && (idx || v)) {
1251: /*
1252: allocate enough space to hold information from the longest row.
1253: */
1254: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1255: PetscInt max = 1,tmp;
1256: for (i=0; i<matin->rmap.n; i++) {
1257: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1258: if (max < tmp) { max = tmp; }
1259: }
1260: PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1261: mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1262: }
1264: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1265: lrow = row - rstart;
1267: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1268: if (!v) {pvA = 0; pvB = 0;}
1269: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1270: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1271: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1272: nztot = nzA + nzB;
1274: cmap = mat->garray;
1275: if (v || idx) {
1276: if (nztot) {
1277: /* Sort by increasing column numbers, assuming A and B already sorted */
1278: PetscInt imark = -1;
1279: if (v) {
1280: *v = v_p = mat->rowvalues;
1281: for (i=0; i<nzB; i++) {
1282: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1283: else break;
1284: }
1285: imark = i;
1286: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1287: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1288: }
1289: if (idx) {
1290: *idx = idx_p = mat->rowindices;
1291: if (imark > -1) {
1292: for (i=0; i<imark; i++) {
1293: idx_p[i] = cmap[cworkB[i]];
1294: }
1295: } else {
1296: for (i=0; i<nzB; i++) {
1297: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1298: else break;
1299: }
1300: imark = i;
1301: }
1302: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1303: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1304: }
1305: } else {
1306: if (idx) *idx = 0;
1307: if (v) *v = 0;
1308: }
1309: }
1310: *nz = nztot;
1311: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1312: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1313: return(0);
1314: }
1318: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1319: {
1320: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1323: if (!aij->getrowactive) {
1324: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1325: }
1326: aij->getrowactive = PETSC_FALSE;
1327: return(0);
1328: }
1332: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1333: {
1334: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1335: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1337: PetscInt i,j,cstart = mat->cmap.rstart;
1338: PetscReal sum = 0.0;
1339: PetscScalar *v;
1342: if (aij->size == 1) {
1343: MatNorm(aij->A,type,norm);
1344: } else {
1345: if (type == NORM_FROBENIUS) {
1346: v = amat->a;
1347: for (i=0; i<amat->nz; i++) {
1348: #if defined(PETSC_USE_COMPLEX)
1349: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1350: #else
1351: sum += (*v)*(*v); v++;
1352: #endif
1353: }
1354: v = bmat->a;
1355: for (i=0; i<bmat->nz; i++) {
1356: #if defined(PETSC_USE_COMPLEX)
1357: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1358: #else
1359: sum += (*v)*(*v); v++;
1360: #endif
1361: }
1362: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
1363: *norm = sqrt(*norm);
1364: } else if (type == NORM_1) { /* max column norm */
1365: PetscReal *tmp,*tmp2;
1366: PetscInt *jj,*garray = aij->garray;
1367: PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);
1368: PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);
1369: PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));
1370: *norm = 0.0;
1371: v = amat->a; jj = amat->j;
1372: for (j=0; j<amat->nz; j++) {
1373: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1374: }
1375: v = bmat->a; jj = bmat->j;
1376: for (j=0; j<bmat->nz; j++) {
1377: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1378: }
1379: MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
1380: for (j=0; j<mat->cmap.N; j++) {
1381: if (tmp2[j] > *norm) *norm = tmp2[j];
1382: }
1383: PetscFree(tmp);
1384: PetscFree(tmp2);
1385: } else if (type == NORM_INFINITY) { /* max row norm */
1386: PetscReal ntemp = 0.0;
1387: for (j=0; j<aij->A->rmap.n; j++) {
1388: v = amat->a + amat->i[j];
1389: sum = 0.0;
1390: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1391: sum += PetscAbsScalar(*v); v++;
1392: }
1393: v = bmat->a + bmat->i[j];
1394: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1395: sum += PetscAbsScalar(*v); v++;
1396: }
1397: if (sum > ntemp) ntemp = sum;
1398: }
1399: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);
1400: } else {
1401: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1402: }
1403: }
1404: return(0);
1405: }
1409: PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1410: {
1411: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1412: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1414: PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1415: PetscInt cstart=A->cmap.rstart,ncol;
1416: Mat B;
1417: PetscScalar *array;
1420: if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1422: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1423: ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1424: ai = Aloc->i; aj = Aloc->j;
1425: bi = Bloc->i; bj = Bloc->j;
1426: PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);
1427: cols = d_nnz + na + 1; /* work space to be used by B part */
1428: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
1429: for (i=0; i<ai[ma]; i++){
1430: d_nnz[aj[i]] ++;
1431: aj[i] += cstart; /* global col index to be used by MatSetValues() */
1432: }
1434: MatCreate(((PetscObject)A)->comm,&B);
1435: MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);
1436: MatSetType(B,((PetscObject)A)->type_name);
1437: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
1439: /* copy over the A part */
1440: array = Aloc->a;
1441: row = A->rmap.rstart;
1442: for (i=0; i<ma; i++) {
1443: ncol = ai[i+1]-ai[i];
1444: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
1445: row++; array += ncol; aj += ncol;
1446: }
1447: aj = Aloc->j;
1448: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1450: /* copy over the B part */
1451: array = Bloc->a;
1452: row = A->rmap.rstart;
1453: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1454: for (i=0; i<mb; i++) {
1455: ncol = bi[i+1]-bi[i];
1456: MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);
1457: row++; array += ncol; cols += ncol;
1458: }
1459: PetscFree(d_nnz);
1460: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1461: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1462: if (matout) {
1463: *matout = B;
1464: } else {
1465: MatHeaderCopy(A,B);
1466: }
1467: return(0);
1468: }
1472: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1473: {
1474: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1475: Mat a = aij->A,b = aij->B;
1477: PetscInt s1,s2,s3;
1480: MatGetLocalSize(mat,&s2,&s3);
1481: if (rr) {
1482: VecGetLocalSize(rr,&s1);
1483: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1484: /* Overlap communication with computation. */
1485: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1486: }
1487: if (ll) {
1488: VecGetLocalSize(ll,&s1);
1489: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1490: (*b->ops->diagonalscale)(b,ll,0);
1491: }
1492: /* scale the diagonal block */
1493: (*a->ops->diagonalscale)(a,ll,rr);
1495: if (rr) {
1496: /* Do a scatter end and then right scale the off-diagonal block */
1497: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1498: (*b->ops->diagonalscale)(b,0,aij->lvec);
1499: }
1500:
1501: return(0);
1502: }
1506: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1507: {
1508: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1512: MatSetBlockSize(a->A,bs);
1513: MatSetBlockSize(a->B,bs);
1514: return(0);
1515: }
1518: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1519: {
1520: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1524: MatSetUnfactored(a->A);
1525: return(0);
1526: }
1530: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1531: {
1532: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1533: Mat a,b,c,d;
1534: PetscTruth flg;
1538: a = matA->A; b = matA->B;
1539: c = matB->A; d = matB->B;
1541: MatEqual(a,c,&flg);
1542: if (flg) {
1543: MatEqual(b,d,&flg);
1544: }
1545: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
1546: return(0);
1547: }
1551: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1552: {
1554: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1555: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1558: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1559: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1560: /* because of the column compression in the off-processor part of the matrix a->B,
1561: the number of columns in a->B and b->B may be different, hence we cannot call
1562: the MatCopy() directly on the two parts. If need be, we can provide a more
1563: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1564: then copying the submatrices */
1565: MatCopy_Basic(A,B,str);
1566: } else {
1567: MatCopy(a->A,b->A,str);
1568: MatCopy(a->B,b->B,str);
1569: }
1570: return(0);
1571: }
1575: PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1576: {
1580: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1581: return(0);
1582: }
1584: #include petscblaslapack.h
1587: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1588: {
1590: PetscInt i;
1591: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1592: PetscBLASInt bnz,one=1;
1593: Mat_SeqAIJ *x,*y;
1596: if (str == SAME_NONZERO_PATTERN) {
1597: PetscScalar alpha = a;
1598: x = (Mat_SeqAIJ *)xx->A->data;
1599: y = (Mat_SeqAIJ *)yy->A->data;
1600: bnz = (PetscBLASInt)x->nz;
1601: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1602: x = (Mat_SeqAIJ *)xx->B->data;
1603: y = (Mat_SeqAIJ *)yy->B->data;
1604: bnz = (PetscBLASInt)x->nz;
1605: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1606: } else if (str == SUBSET_NONZERO_PATTERN) {
1607: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
1609: x = (Mat_SeqAIJ *)xx->B->data;
1610: y = (Mat_SeqAIJ *)yy->B->data;
1611: if (y->xtoy && y->XtoY != xx->B) {
1612: PetscFree(y->xtoy);
1613: MatDestroy(y->XtoY);
1614: }
1615: if (!y->xtoy) { /* get xtoy */
1616: MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1617: y->XtoY = xx->B;
1618: PetscObjectReference((PetscObject)xx->B);
1619: }
1620: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1621: } else {
1622: MatAXPY_Basic(Y,a,X,str);
1623: }
1624: return(0);
1625: }
1627: EXTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
1631: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
1632: {
1633: #if defined(PETSC_USE_COMPLEX)
1635: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1638: MatConjugate_SeqAIJ(aij->A);
1639: MatConjugate_SeqAIJ(aij->B);
1640: #else
1642: #endif
1643: return(0);
1644: }
1648: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1649: {
1650: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1654: MatRealPart(a->A);
1655: MatRealPart(a->B);
1656: return(0);
1657: }
1661: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1662: {
1663: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1667: MatImaginaryPart(a->A);
1668: MatImaginaryPart(a->B);
1669: return(0);
1670: }
1672: #ifdef PETSC_HAVE_PBGL
1674: #include <boost/parallel/mpi/bsp_process_group.hpp>
1675: #include <boost/graph/distributed/ilu_default_graph.hpp>
1676: #include <boost/graph/distributed/ilu_0_block.hpp>
1677: #include <boost/graph/distributed/ilu_preconditioner.hpp>
1678: #include <boost/graph/distributed/petsc/interface.hpp>
1679: #include <boost/multi_array.hpp>
1680: #include <boost/parallel/distributed_property_map.hpp>
1684: /*
1685: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1686: */
1687: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1688: {
1689: namespace petsc = boost::distributed::petsc;
1690:
1691: namespace graph_dist = boost::graph::distributed;
1692: using boost::graph::distributed::ilu_default::process_group_type;
1693: using boost::graph::ilu_permuted;
1695: PetscTruth row_identity, col_identity;
1696: PetscContainer c;
1697: PetscInt m, n, M, N;
1698: PetscErrorCode ierr;
1701: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1702: ISIdentity(isrow, &row_identity);
1703: ISIdentity(iscol, &col_identity);
1704: if (!row_identity || !col_identity) {
1705: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1706: }
1708: process_group_type pg;
1709: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
1710: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1711: lgraph_type& level_graph = *lgraph_p;
1712: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
1714: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1715: ilu_permuted(level_graph);
1717: /* put together the new matrix */
1718: MatCreate(((PetscObject)A)->comm, fact);
1719: MatGetLocalSize(A, &m, &n);
1720: MatGetSize(A, &M, &N);
1721: MatSetSizes(*fact, m, n, M, N);
1722: MatSetType(*fact, ((PetscObject)A)->type_name);
1723: MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);
1724: MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);
1725: (*fact)->factor = FACTOR_LU;
1727: PetscContainerCreate(((PetscObject)A)->comm, &c);
1728: PetscContainerSetPointer(c, lgraph_p);
1729: PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1730: return(0);
1731: }
1735: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1736: {
1738: return(0);
1739: }
1743: /*
1744: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1745: */
1746: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1747: {
1748: namespace graph_dist = boost::graph::distributed;
1750: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
1751: lgraph_type* lgraph_p;
1752: PetscContainer c;
1756: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
1757: PetscContainerGetPointer(c, (void **) &lgraph_p);
1758: VecCopy(b, x);
1760: PetscScalar* array_x;
1761: VecGetArray(x, &array_x);
1762: PetscInt sx;
1763: VecGetSize(x, &sx);
1764:
1765: PetscScalar* array_b;
1766: VecGetArray(b, &array_b);
1767: PetscInt sb;
1768: VecGetSize(b, &sb);
1770: lgraph_type& level_graph = *lgraph_p;
1771: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
1773: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1774: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
1775: ref_x(array_x, boost::extents[num_vertices(graph)]);
1777: typedef boost::iterator_property_map<array_ref_type::iterator,
1778: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
1779: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1780: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1781:
1782: ilu_set_solve(*lgraph_p, vector_b, vector_x);
1784: return(0);
1785: }
1786: #endif
1788: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1789: PetscInt nzlocal,nsends,nrecvs;
1790: PetscMPIInt *send_rank;
1791: PetscInt *sbuf_nz,*sbuf_j,**rbuf_j;
1792: PetscScalar *sbuf_a,**rbuf_a;
1793: PetscErrorCode (*MatDestroy)(Mat);
1794: } Mat_Redundant;
1798: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1799: {
1800: PetscErrorCode ierr;
1801: Mat_Redundant *redund=(Mat_Redundant*)ptr;
1802: PetscInt i;
1805: PetscFree(redund->send_rank);
1806: PetscFree(redund->sbuf_j);
1807: PetscFree(redund->sbuf_a);
1808: for (i=0; i<redund->nrecvs; i++){
1809: PetscFree(redund->rbuf_j[i]);
1810: PetscFree(redund->rbuf_a[i]);
1811: }
1812: PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);
1813: PetscFree(redund);
1814: return(0);
1815: }
1819: PetscErrorCode MatDestroy_MatRedundant(Mat A)
1820: {
1821: PetscErrorCode ierr;
1822: PetscContainer container;
1823: Mat_Redundant *redund=PETSC_NULL;
1826: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
1827: if (container) {
1828: PetscContainerGetPointer(container,(void **)&redund);
1829: } else {
1830: SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1831: }
1832: A->ops->destroy = redund->MatDestroy;
1833: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
1834: (*A->ops->destroy)(A);
1835: PetscContainerDestroy(container);
1836: return(0);
1837: }
1841: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1842: {
1843: PetscMPIInt rank,size;
1844: MPI_Comm comm=((PetscObject)mat)->comm;
1846: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
1847: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1848: PetscInt *rowrange=mat->rmap.range;
1849: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1850: Mat A=aij->A,B=aij->B,C=*matredundant;
1851: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1852: PetscScalar *sbuf_a;
1853: PetscInt nzlocal=a->nz+b->nz;
1854: PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1855: PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1856: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1857: PetscScalar *vals,*aworkA,*aworkB;
1858: PetscMPIInt tag1,tag2,tag3,imdex;
1859: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1860: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1861: MPI_Status recv_status,*send_status;
1862: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1863: PetscInt **rbuf_j=PETSC_NULL;
1864: PetscScalar **rbuf_a=PETSC_NULL;
1865: Mat_Redundant *redund=PETSC_NULL;
1866: PetscContainer container;
1869: MPI_Comm_rank(comm,&rank);
1870: MPI_Comm_size(comm,&size);
1872: if (reuse == MAT_REUSE_MATRIX) {
1873: MatGetSize(C,&M,&N);
1874: if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1875: MatGetLocalSize(C,&M,&N);
1876: if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1877: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
1878: if (container) {
1879: PetscContainerGetPointer(container,(void **)&redund);
1880: } else {
1881: SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1882: }
1883: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1885: nsends = redund->nsends;
1886: nrecvs = redund->nrecvs;
1887: send_rank = redund->send_rank; recv_rank = send_rank + size;
1888: sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends;
1889: sbuf_j = redund->sbuf_j;
1890: sbuf_a = redund->sbuf_a;
1891: rbuf_j = redund->rbuf_j;
1892: rbuf_a = redund->rbuf_a;
1893: }
1895: if (reuse == MAT_INITIAL_MATRIX){
1896: PetscMPIInt subrank,subsize;
1897: PetscInt nleftover,np_subcomm;
1898: /* get the destination processors' id send_rank, nsends and nrecvs */
1899: MPI_Comm_rank(subcomm,&subrank);
1900: MPI_Comm_size(subcomm,&subsize);
1901: PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1902: recv_rank = send_rank + size;
1903: np_subcomm = size/nsubcomm;
1904: nleftover = size - nsubcomm*np_subcomm;
1905: nsends = 0; nrecvs = 0;
1906: for (i=0; i<size; i++){ /* i=rank*/
1907: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1908: send_rank[nsends] = i; nsends++;
1909: recv_rank[nrecvs++] = i;
1910: }
1911: }
1912: if (rank >= size - nleftover){/* this proc is a leftover processor */
1913: i = size-nleftover-1;
1914: j = 0;
1915: while (j < nsubcomm - nleftover){
1916: send_rank[nsends++] = i;
1917: i--; j++;
1918: }
1919: }
1921: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1922: for (i=0; i<nleftover; i++){
1923: recv_rank[nrecvs++] = size-nleftover+i;
1924: }
1925: }
1927: /* allocate sbuf_j, sbuf_a */
1928: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1929: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
1930: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
1931: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1932:
1933: /* copy mat's local entries into the buffers */
1934: if (reuse == MAT_INITIAL_MATRIX){
1935: rownz_max = 0;
1936: rptr = sbuf_j;
1937: cols = sbuf_j + rend-rstart + 1;
1938: vals = sbuf_a;
1939: rptr[0] = 0;
1940: for (i=0; i<rend-rstart; i++){
1941: row = i + rstart;
1942: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1943: ncols = nzA + nzB;
1944: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1945: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1946: /* load the column indices for this row into cols */
1947: lwrite = 0;
1948: for (l=0; l<nzB; l++) {
1949: if ((ctmp = bmap[cworkB[l]]) < cstart){
1950: vals[lwrite] = aworkB[l];
1951: cols[lwrite++] = ctmp;
1952: }
1953: }
1954: for (l=0; l<nzA; l++){
1955: vals[lwrite] = aworkA[l];
1956: cols[lwrite++] = cstart + cworkA[l];
1957: }
1958: for (l=0; l<nzB; l++) {
1959: if ((ctmp = bmap[cworkB[l]]) >= cend){
1960: vals[lwrite] = aworkB[l];
1961: cols[lwrite++] = ctmp;
1962: }
1963: }
1964: vals += ncols;
1965: cols += ncols;
1966: rptr[i+1] = rptr[i] + ncols;
1967: if (rownz_max < ncols) rownz_max = ncols;
1968: }
1969: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1970: } else { /* only copy matrix values into sbuf_a */
1971: rptr = sbuf_j;
1972: vals = sbuf_a;
1973: rptr[0] = 0;
1974: for (i=0; i<rend-rstart; i++){
1975: row = i + rstart;
1976: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1977: ncols = nzA + nzB;
1978: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1979: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1980: lwrite = 0;
1981: for (l=0; l<nzB; l++) {
1982: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1983: }
1984: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1985: for (l=0; l<nzB; l++) {
1986: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1987: }
1988: vals += ncols;
1989: rptr[i+1] = rptr[i] + ncols;
1990: }
1991: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1993: /* send nzlocal to others, and recv other's nzlocal */
1994: /*--------------------------------------------------*/
1995: if (reuse == MAT_INITIAL_MATRIX){
1996: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
1997: s_waits2 = s_waits3 + nsends;
1998: s_waits1 = s_waits2 + nsends;
1999: r_waits1 = s_waits1 + nsends;
2000: r_waits2 = r_waits1 + nrecvs;
2001: r_waits3 = r_waits2 + nrecvs;
2002: } else {
2003: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2004: r_waits3 = s_waits3 + nsends;
2005: }
2007: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2008: if (reuse == MAT_INITIAL_MATRIX){
2009: /* get new tags to keep the communication clean */
2010: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2011: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2012: PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2013: rbuf_nz = sbuf_nz + nsends;
2014:
2015: /* post receives of other's nzlocal */
2016: for (i=0; i<nrecvs; i++){
2017: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2018: }
2019: /* send nzlocal to others */
2020: for (i=0; i<nsends; i++){
2021: sbuf_nz[i] = nzlocal;
2022: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2023: }
2024: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2025: count = nrecvs;
2026: while (count) {
2027: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2028: recv_rank[imdex] = recv_status.MPI_SOURCE;
2029: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2030: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2032: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2033: rbuf_nz[imdex] += i + 2;
2034: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2035: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2036: count--;
2037: }
2038: /* wait on sends of nzlocal */
2039: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2040: /* send mat->i,j to others, and recv from other's */
2041: /*------------------------------------------------*/
2042: for (i=0; i<nsends; i++){
2043: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2044: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2045: }
2046: /* wait on receives of mat->i,j */
2047: /*------------------------------*/
2048: count = nrecvs;
2049: while (count) {
2050: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2051: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2052: count--;
2053: }
2054: /* wait on sends of mat->i,j */
2055: /*---------------------------*/
2056: if (nsends) {
2057: MPI_Waitall(nsends,s_waits2,send_status);
2058: }
2059: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2061: /* post receives, send and receive mat->a */
2062: /*----------------------------------------*/
2063: for (imdex=0; imdex<nrecvs; imdex++) {
2064: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2065: }
2066: for (i=0; i<nsends; i++){
2067: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2068: }
2069: count = nrecvs;
2070: while (count) {
2071: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2072: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2073: count--;
2074: }
2075: if (nsends) {
2076: MPI_Waitall(nsends,s_waits3,send_status);
2077: }
2079: PetscFree2(s_waits3,send_status);
2080:
2081: /* create redundant matrix */
2082: /*-------------------------*/
2083: if (reuse == MAT_INITIAL_MATRIX){
2084: /* compute rownz_max for preallocation */
2085: for (imdex=0; imdex<nrecvs; imdex++){
2086: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2087: rptr = rbuf_j[imdex];
2088: for (i=0; i<j; i++){
2089: ncols = rptr[i+1] - rptr[i];
2090: if (rownz_max < ncols) rownz_max = ncols;
2091: }
2092: }
2093:
2094: MatCreate(subcomm,&C);
2095: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2096: MatSetFromOptions(C);
2097: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2098: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2099: } else {
2100: C = *matredundant;
2101: }
2103: /* insert local matrix entries */
2104: rptr = sbuf_j;
2105: cols = sbuf_j + rend-rstart + 1;
2106: vals = sbuf_a;
2107: for (i=0; i<rend-rstart; i++){
2108: row = i + rstart;
2109: ncols = rptr[i+1] - rptr[i];
2110: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2111: vals += ncols;
2112: cols += ncols;
2113: }
2114: /* insert received matrix entries */
2115: for (imdex=0; imdex<nrecvs; imdex++){
2116: rstart = rowrange[recv_rank[imdex]];
2117: rend = rowrange[recv_rank[imdex]+1];
2118: rptr = rbuf_j[imdex];
2119: cols = rbuf_j[imdex] + rend-rstart + 1;
2120: vals = rbuf_a[imdex];
2121: for (i=0; i<rend-rstart; i++){
2122: row = i + rstart;
2123: ncols = rptr[i+1] - rptr[i];
2124: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2125: vals += ncols;
2126: cols += ncols;
2127: }
2128: }
2129: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2130: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2131: MatGetSize(C,&M,&N);
2132: if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2133: if (reuse == MAT_INITIAL_MATRIX){
2134: PetscContainer container;
2135: *matredundant = C;
2136: /* create a supporting struct and attach it to C for reuse */
2137: PetscNewLog(C,Mat_Redundant,&redund);
2138: PetscContainerCreate(PETSC_COMM_SELF,&container);
2139: PetscContainerSetPointer(container,redund);
2140: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2141: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2142:
2143: redund->nzlocal = nzlocal;
2144: redund->nsends = nsends;
2145: redund->nrecvs = nrecvs;
2146: redund->send_rank = send_rank;
2147: redund->sbuf_nz = sbuf_nz;
2148: redund->sbuf_j = sbuf_j;
2149: redund->sbuf_a = sbuf_a;
2150: redund->rbuf_j = rbuf_j;
2151: redund->rbuf_a = rbuf_a;
2153: redund->MatDestroy = C->ops->destroy;
2154: C->ops->destroy = MatDestroy_MatRedundant;
2155: }
2156: return(0);
2157: }
2161: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2162: {
2163: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2164: PetscInt n = A->rmap.n;
2165: PetscInt cstart = A->cmap.rstart;
2166: PetscInt *cmap = mat->garray;
2167: PetscInt *diagIdx, *offdiagIdx;
2168: Vec diagV, offdiagV;
2169: PetscScalar *a, *diagA, *offdiagA;
2170: PetscInt r;
2174: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2175: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2176: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2177: MatGetRowMin(mat->A, diagV, diagIdx);
2178: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2179: VecGetArray(v, &a);
2180: VecGetArray(diagV, &diagA);
2181: VecGetArray(offdiagV, &offdiagA);
2182: for(r = 0; r < n; ++r) {
2183: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2184: a[r] = diagA[r];
2185: idx[r] = cstart + diagIdx[r];
2186: } else {
2187: a[r] = offdiagA[r];
2188: idx[r] = cmap[offdiagIdx[r]];
2189: }
2190: }
2191: VecRestoreArray(v, &a);
2192: VecRestoreArray(diagV, &diagA);
2193: VecRestoreArray(offdiagV, &offdiagA);
2194: VecDestroy(diagV);
2195: VecDestroy(offdiagV);
2196: PetscFree2(diagIdx, offdiagIdx);
2197: return(0);
2198: }
2202: PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2203: {
2207: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);
2208: return(0);
2209: }
2211: /* -------------------------------------------------------------------*/
2212: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2213: MatGetRow_MPIAIJ,
2214: MatRestoreRow_MPIAIJ,
2215: MatMult_MPIAIJ,
2216: /* 4*/ MatMultAdd_MPIAIJ,
2217: MatMultTranspose_MPIAIJ,
2218: MatMultTransposeAdd_MPIAIJ,
2219: #ifdef PETSC_HAVE_PBGL
2220: MatSolve_MPIAIJ,
2221: #else
2222: 0,
2223: #endif
2224: 0,
2225: 0,
2226: /*10*/ 0,
2227: 0,
2228: 0,
2229: MatRelax_MPIAIJ,
2230: MatTranspose_MPIAIJ,
2231: /*15*/ MatGetInfo_MPIAIJ,
2232: MatEqual_MPIAIJ,
2233: MatGetDiagonal_MPIAIJ,
2234: MatDiagonalScale_MPIAIJ,
2235: MatNorm_MPIAIJ,
2236: /*20*/ MatAssemblyBegin_MPIAIJ,
2237: MatAssemblyEnd_MPIAIJ,
2238: 0,
2239: MatSetOption_MPIAIJ,
2240: MatZeroEntries_MPIAIJ,
2241: /*25*/ MatZeroRows_MPIAIJ,
2242: 0,
2243: #ifdef PETSC_HAVE_PBGL
2244: MatLUFactorNumeric_MPIAIJ,
2245: #else
2246: 0,
2247: #endif
2248: 0,
2249: 0,
2250: /*30*/ MatSetUpPreallocation_MPIAIJ,
2251: #ifdef PETSC_HAVE_PBGL
2252: MatILUFactorSymbolic_MPIAIJ,
2253: #else
2254: 0,
2255: #endif
2256: 0,
2257: 0,
2258: 0,
2259: /*35*/ MatDuplicate_MPIAIJ,
2260: 0,
2261: 0,
2262: 0,
2263: 0,
2264: /*40*/ MatAXPY_MPIAIJ,
2265: MatGetSubMatrices_MPIAIJ,
2266: MatIncreaseOverlap_MPIAIJ,
2267: MatGetValues_MPIAIJ,
2268: MatCopy_MPIAIJ,
2269: /*45*/ 0,
2270: MatScale_MPIAIJ,
2271: 0,
2272: 0,
2273: 0,
2274: /*50*/ MatSetBlockSize_MPIAIJ,
2275: 0,
2276: 0,
2277: 0,
2278: 0,
2279: /*55*/ MatFDColoringCreate_MPIAIJ,
2280: 0,
2281: MatSetUnfactored_MPIAIJ,
2282: MatPermute_MPIAIJ,
2283: 0,
2284: /*60*/ MatGetSubMatrix_MPIAIJ,
2285: MatDestroy_MPIAIJ,
2286: MatView_MPIAIJ,
2287: 0,
2288: 0,
2289: /*65*/ 0,
2290: 0,
2291: 0,
2292: 0,
2293: 0,
2294: /*70*/ 0,
2295: 0,
2296: MatSetColoring_MPIAIJ,
2297: #if defined(PETSC_HAVE_ADIC)
2298: MatSetValuesAdic_MPIAIJ,
2299: #else
2300: 0,
2301: #endif
2302: MatSetValuesAdifor_MPIAIJ,
2303: /*75*/ 0,
2304: 0,
2305: 0,
2306: 0,
2307: 0,
2308: /*80*/ 0,
2309: 0,
2310: 0,
2311: 0,
2312: /*84*/ MatLoad_MPIAIJ,
2313: 0,
2314: 0,
2315: 0,
2316: 0,
2317: 0,
2318: /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2319: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2320: MatMatMultNumeric_MPIAIJ_MPIAIJ,
2321: MatPtAP_Basic,
2322: MatPtAPSymbolic_MPIAIJ,
2323: /*95*/ MatPtAPNumeric_MPIAIJ,
2324: 0,
2325: 0,
2326: 0,
2327: 0,
2328: /*100*/0,
2329: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2330: MatPtAPNumeric_MPIAIJ_MPIAIJ,
2331: MatConjugate_MPIAIJ,
2332: 0,
2333: /*105*/MatSetValuesRow_MPIAIJ,
2334: MatRealPart_MPIAIJ,
2335: MatImaginaryPart_MPIAIJ,
2336: 0,
2337: 0,
2338: /*110*/0,
2339: MatGetRedundantMatrix_MPIAIJ,
2340: MatGetRowMin_MPIAIJ,
2341: 0,
2342: 0,
2343: /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2345: /* ----------------------------------------------------------------------------------------*/
2350: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2351: {
2352: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2356: MatStoreValues(aij->A);
2357: MatStoreValues(aij->B);
2358: return(0);
2359: }
2365: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2366: {
2367: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2371: MatRetrieveValues(aij->A);
2372: MatRetrieveValues(aij->B);
2373: return(0);
2374: }
2377: #include petscpc.h
2381: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2382: {
2383: Mat_MPIAIJ *b;
2385: PetscInt i;
2388: B->preallocated = PETSC_TRUE;
2389: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2390: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2391: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2392: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2394: B->rmap.bs = B->cmap.bs = 1;
2395: PetscMapSetUp(&B->rmap);
2396: PetscMapSetUp(&B->cmap);
2397: if (d_nnz) {
2398: for (i=0; i<B->rmap.n; i++) {
2399: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2400: }
2401: }
2402: if (o_nnz) {
2403: for (i=0; i<B->rmap.n; i++) {
2404: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2405: }
2406: }
2407: b = (Mat_MPIAIJ*)B->data;
2409: /* Explicitly create 2 MATSEQAIJ matrices. */
2410: MatCreate(PETSC_COMM_SELF,&b->A);
2411: MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);
2412: MatSetType(b->A,MATSEQAIJ);
2413: PetscLogObjectParent(B,b->A);
2414: MatCreate(PETSC_COMM_SELF,&b->B);
2415: MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);
2416: MatSetType(b->B,MATSEQAIJ);
2417: PetscLogObjectParent(B,b->B);
2419: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
2420: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
2422: return(0);
2423: }
2428: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2429: {
2430: Mat mat;
2431: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2435: *newmat = 0;
2436: MatCreate(((PetscObject)matin)->comm,&mat);
2437: MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);
2438: MatSetType(mat,((PetscObject)matin)->type_name);
2439: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
2440: a = (Mat_MPIAIJ*)mat->data;
2441:
2442: mat->factor = matin->factor;
2443: mat->rmap.bs = matin->rmap.bs;
2444: mat->assembled = PETSC_TRUE;
2445: mat->insertmode = NOT_SET_VALUES;
2446: mat->preallocated = PETSC_TRUE;
2448: a->size = oldmat->size;
2449: a->rank = oldmat->rank;
2450: a->donotstash = oldmat->donotstash;
2451: a->roworiented = oldmat->roworiented;
2452: a->rowindices = 0;
2453: a->rowvalues = 0;
2454: a->getrowactive = PETSC_FALSE;
2456: PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);
2457: PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);
2459: MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);
2460: if (oldmat->colmap) {
2461: #if defined (PETSC_USE_CTABLE)
2462: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2463: #else
2464: PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);
2465: PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));
2466: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));
2467: #endif
2468: } else a->colmap = 0;
2469: if (oldmat->garray) {
2470: PetscInt len;
2471: len = oldmat->B->cmap.n;
2472: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
2473: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2474: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
2475: } else a->garray = 0;
2476:
2477: VecDuplicate(oldmat->lvec,&a->lvec);
2478: PetscLogObjectParent(mat,a->lvec);
2479: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2480: PetscLogObjectParent(mat,a->Mvctx);
2481: MatDuplicate(oldmat->A,cpvalues,&a->A);
2482: PetscLogObjectParent(mat,a->A);
2483: MatDuplicate(oldmat->B,cpvalues,&a->B);
2484: PetscLogObjectParent(mat,a->B);
2485: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2486: *newmat = mat;
2487: return(0);
2488: }
2490: #include petscsys.h
2494: PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2495: {
2496: Mat A;
2497: PetscScalar *vals,*svals;
2498: MPI_Comm comm = ((PetscObject)viewer)->comm;
2499: MPI_Status status;
2501: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2502: PetscInt i,nz,j,rstart,rend,mmax;
2503: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
2504: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2505: PetscInt cend,cstart,n,*rowners;
2506: int fd;
2509: MPI_Comm_size(comm,&size);
2510: MPI_Comm_rank(comm,&rank);
2511: if (!rank) {
2512: PetscViewerBinaryGetDescriptor(viewer,&fd);
2513: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2514: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2515: }
2517: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2518: M = header[1]; N = header[2];
2519: /* determine ownership of all rows */
2520: m = M/size + ((M % size) > rank);
2521: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
2522: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
2524: /* First process needs enough room for process with most rows */
2525: if (!rank) {
2526: mmax = rowners[1];
2527: for (i=2; i<size; i++) {
2528: mmax = PetscMax(mmax,rowners[i]);
2529: }
2530: } else mmax = m;
2532: rowners[0] = 0;
2533: for (i=2; i<=size; i++) {
2534: rowners[i] += rowners[i-1];
2535: }
2536: rstart = rowners[rank];
2537: rend = rowners[rank+1];
2539: /* distribute row lengths to all processors */
2540: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
2541: if (!rank) {
2542: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
2543: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
2544: PetscMalloc(size*sizeof(PetscInt),&procsnz);
2545: PetscMemzero(procsnz,size*sizeof(PetscInt));
2546: for (j=0; j<m; j++) {
2547: procsnz[0] += ourlens[j];
2548: }
2549: for (i=1; i<size; i++) {
2550: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
2551: /* calculate the number of nonzeros on each processor */
2552: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2553: procsnz[i] += rowlengths[j];
2554: }
2555: MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
2556: }
2557: PetscFree(rowlengths);
2558: } else {
2559: MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);
2560: }
2562: if (!rank) {
2563: /* determine max buffer needed and allocate it */
2564: maxnz = 0;
2565: for (i=0; i<size; i++) {
2566: maxnz = PetscMax(maxnz,procsnz[i]);
2567: }
2568: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
2570: /* read in my part of the matrix column indices */
2571: nz = procsnz[0];
2572: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2573: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2575: /* read in every one elses and ship off */
2576: for (i=1; i<size; i++) {
2577: nz = procsnz[i];
2578: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2579: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2580: }
2581: PetscFree(cols);
2582: } else {
2583: /* determine buffer space needed for message */
2584: nz = 0;
2585: for (i=0; i<m; i++) {
2586: nz += ourlens[i];
2587: }
2588: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2590: /* receive message of column indices*/
2591: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2592: MPI_Get_count(&status,MPIU_INT,&maxnz);
2593: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2594: }
2596: /* determine column ownership if matrix is not square */
2597: if (N != M) {
2598: n = N/size + ((N % size) > rank);
2599: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
2600: cstart = cend - n;
2601: } else {
2602: cstart = rstart;
2603: cend = rend;
2604: n = cend - cstart;
2605: }
2607: /* loop over local rows, determining number of off diagonal entries */
2608: PetscMemzero(offlens,m*sizeof(PetscInt));
2609: jj = 0;
2610: for (i=0; i<m; i++) {
2611: for (j=0; j<ourlens[i]; j++) {
2612: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2613: jj++;
2614: }
2615: }
2617: /* create our matrix */
2618: for (i=0; i<m; i++) {
2619: ourlens[i] -= offlens[i];
2620: }
2621: MatCreate(comm,&A);
2622: MatSetSizes(A,m,n,M,N);
2623: MatSetType(A,type);
2624: MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);
2626: for (i=0; i<m; i++) {
2627: ourlens[i] += offlens[i];
2628: }
2630: if (!rank) {
2631: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
2633: /* read in my part of the matrix numerical values */
2634: nz = procsnz[0];
2635: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2636:
2637: /* insert into matrix */
2638: jj = rstart;
2639: smycols = mycols;
2640: svals = vals;
2641: for (i=0; i<m; i++) {
2642: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2643: smycols += ourlens[i];
2644: svals += ourlens[i];
2645: jj++;
2646: }
2648: /* read in other processors and ship out */
2649: for (i=1; i<size; i++) {
2650: nz = procsnz[i];
2651: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2652: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);
2653: }
2654: PetscFree(procsnz);
2655: } else {
2656: /* receive numeric values */
2657: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2659: /* receive message of values*/
2660: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);
2661: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2662: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2664: /* insert into matrix */
2665: jj = rstart;
2666: smycols = mycols;
2667: svals = vals;
2668: for (i=0; i<m; i++) {
2669: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2670: smycols += ourlens[i];
2671: svals += ourlens[i];
2672: jj++;
2673: }
2674: }
2675: PetscFree2(ourlens,offlens);
2676: PetscFree(vals);
2677: PetscFree(mycols);
2678: PetscFree(rowners);
2680: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2681: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2682: *newmat = A;
2683: return(0);
2684: }
2688: /*
2689: Not great since it makes two copies of the submatrix, first an SeqAIJ
2690: in local and then by concatenating the local matrices the end result.
2691: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2692: */
2693: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2694: {
2696: PetscMPIInt rank,size;
2697: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j;
2698: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2699: Mat *local,M,Mreuse;
2700: PetscScalar *vwork,*aa;
2701: MPI_Comm comm = ((PetscObject)mat)->comm;
2702: Mat_SeqAIJ *aij;
2706: MPI_Comm_rank(comm,&rank);
2707: MPI_Comm_size(comm,&size);
2709: if (call == MAT_REUSE_MATRIX) {
2710: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2711: if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2712: local = &Mreuse;
2713: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2714: } else {
2715: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2716: Mreuse = *local;
2717: PetscFree(local);
2718: }
2720: /*
2721: m - number of local rows
2722: n - number of columns (same on all processors)
2723: rstart - first row in new global matrix generated
2724: */
2725: MatGetSize(Mreuse,&m,&n);
2726: if (call == MAT_INITIAL_MATRIX) {
2727: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2728: ii = aij->i;
2729: jj = aij->j;
2731: /*
2732: Determine the number of non-zeros in the diagonal and off-diagonal
2733: portions of the matrix in order to do correct preallocation
2734: */
2736: /* first get start and end of "diagonal" columns */
2737: if (csize == PETSC_DECIDE) {
2738: ISGetSize(isrow,&mglobal);
2739: if (mglobal == n) { /* square matrix */
2740: nlocal = m;
2741: } else {
2742: nlocal = n/size + ((n % size) > rank);
2743: }
2744: } else {
2745: nlocal = csize;
2746: }
2747: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2748: rstart = rend - nlocal;
2749: if (rank == size - 1 && rend != n) {
2750: SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2751: }
2753: /* next, compute all the lengths */
2754: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
2755: olens = dlens + m;
2756: for (i=0; i<m; i++) {
2757: jend = ii[i+1] - ii[i];
2758: olen = 0;
2759: dlen = 0;
2760: for (j=0; j<jend; j++) {
2761: if (*jj < rstart || *jj >= rend) olen++;
2762: else dlen++;
2763: jj++;
2764: }
2765: olens[i] = olen;
2766: dlens[i] = dlen;
2767: }
2768: MatCreate(comm,&M);
2769: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
2770: MatSetType(M,((PetscObject)mat)->type_name);
2771: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
2772: PetscFree(dlens);
2773: } else {
2774: PetscInt ml,nl;
2776: M = *newmat;
2777: MatGetLocalSize(M,&ml,&nl);
2778: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2779: MatZeroEntries(M);
2780: /*
2781: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2782: rather than the slower MatSetValues().
2783: */
2784: M->was_assembled = PETSC_TRUE;
2785: M->assembled = PETSC_FALSE;
2786: }
2787: MatGetOwnershipRange(M,&rstart,&rend);
2788: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2789: ii = aij->i;
2790: jj = aij->j;
2791: aa = aij->a;
2792: for (i=0; i<m; i++) {
2793: row = rstart + i;
2794: nz = ii[i+1] - ii[i];
2795: cwork = jj; jj += nz;
2796: vwork = aa; aa += nz;
2797: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2798: }
2800: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2801: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2802: *newmat = M;
2804: /* save submatrix used in processor for next request */
2805: if (call == MAT_INITIAL_MATRIX) {
2806: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2807: PetscObjectDereference((PetscObject)Mreuse);
2808: }
2810: return(0);
2811: }
2816: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2817: {
2818: PetscInt m,cstart, cend,j,nnz,i,d;
2819: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2820: const PetscInt *JJ;
2821: PetscScalar *values;
2825: if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2827: B->rmap.bs = B->cmap.bs = 1;
2828: PetscMapSetUp(&B->rmap);
2829: PetscMapSetUp(&B->cmap);
2830: m = B->rmap.n;
2831: cstart = B->cmap.rstart;
2832: cend = B->cmap.rend;
2833: rstart = B->rmap.rstart;
2835: PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
2836: o_nnz = d_nnz + m;
2838: #if defined(PETSC_USE_DEBUGGING)
2839: for (i=0; i<m; i++) {
2840: nnz = Ii[i+1]- Ii[i];
2841: JJ = J + Ii[i];
2842: if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2843: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
2844: if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
2845: for (j=1; j<nnz; j++) {
2846: if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
2847: }
2848: }
2849: #endif
2851: for (i=0; i<m; i++) {
2852: nnz = Ii[i+1]- Ii[i];
2853: JJ = J + Ii[i];
2854: nnz_max = PetscMax(nnz_max,nnz);
2855: for (j=0; j<nnz; j++) {
2856: if (*JJ >= cstart) break;
2857: JJ++;
2858: }
2859: d = 0;
2860: for (; j<nnz; j++) {
2861: if (*JJ++ >= cend) break;
2862: d++;
2863: }
2864: d_nnz[i] = d;
2865: o_nnz[i] = nnz - d;
2866: }
2867: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
2868: PetscFree(d_nnz);
2870: if (v) values = (PetscScalar*)v;
2871: else {
2872: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
2873: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
2874: }
2876: for (i=0; i<m; i++) {
2877: ii = i + rstart;
2878: nnz = Ii[i+1]- Ii[i];
2879: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
2880: }
2881: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2882: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2884: if (!v) {
2885: PetscFree(values);
2886: }
2887: return(0);
2888: }
2893: /*@
2894: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2895: (the default parallel PETSc format).
2897: Collective on MPI_Comm
2899: Input Parameters:
2900: + B - the matrix
2901: . i - the indices into j for the start of each local row (starts with zero)
2902: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2903: - v - optional values in the matrix
2905: Level: developer
2907: Notes:
2908: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2909: thus you CANNOT change the matrix entries by changing the values of a[] after you have
2910: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2912: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2914: The format which is used for the sparse matrix input, is equivalent to a
2915: row-major ordering.. i.e for the following matrix, the input data expected is
2916: as shown:
2918: 1 0 0
2919: 2 0 3 P0
2920: -------
2921: 4 5 6 P1
2923: Process0 [P0]: rows_owned=[0,1]
2924: i = {0,1,3} [size = nrow+1 = 2+1]
2925: j = {0,0,2} [size = nz = 6]
2926: v = {1,2,3} [size = nz = 6]
2928: Process1 [P1]: rows_owned=[2]
2929: i = {0,3} [size = nrow+1 = 1+1]
2930: j = {0,1,2} [size = nz = 6]
2931: v = {4,5,6} [size = nz = 6]
2933: The column indices for each row MUST be sorted.
2935: .keywords: matrix, aij, compressed row, sparse, parallel
2937: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2938: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2939: @*/
2940: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2941: {
2942: PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2945: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);
2946: if (f) {
2947: (*f)(B,i,j,v);
2948: }
2949: return(0);
2950: }
2954: /*@C
2955: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2956: (the default parallel PETSc format). For good matrix assembly performance
2957: the user should preallocate the matrix storage by setting the parameters
2958: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2959: performance can be increased by more than a factor of 50.
2961: Collective on MPI_Comm
2963: Input Parameters:
2964: + A - the matrix
2965: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2966: (same value is used for all local rows)
2967: . d_nnz - array containing the number of nonzeros in the various rows of the
2968: DIAGONAL portion of the local submatrix (possibly different for each row)
2969: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2970: The size of this array is equal to the number of local rows, i.e 'm'.
2971: You must leave room for the diagonal entry even if it is zero.
2972: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2973: submatrix (same value is used for all local rows).
2974: - o_nnz - array containing the number of nonzeros in the various rows of the
2975: OFF-DIAGONAL portion of the local submatrix (possibly different for
2976: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2977: structure. The size of this array is equal to the number
2978: of local rows, i.e 'm'.
2980: If the *_nnz parameter is given then the *_nz parameter is ignored
2982: The AIJ format (also called the Yale sparse matrix format or
2983: compressed row storage (CSR)), is fully compatible with standard Fortran 77
2984: storage. The stored row and column indices begin with zero. See the users manual for details.
2986: The parallel matrix is partitioned such that the first m0 rows belong to
2987: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2988: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2990: The DIAGONAL portion of the local submatrix of a processor can be defined
2991: as the submatrix which is obtained by extraction the part corresponding
2992: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2993: first row that belongs to the processor, and r2 is the last row belonging
2994: to the this processor. This is a square mxm matrix. The remaining portion
2995: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2997: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2999: You can call MatGetInfo() to get information on how effective the preallocation was;
3000: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3001: You can also run with the option -info and look for messages with the string
3002: malloc in them to see if additional memory allocation was needed.
3004: Example usage:
3005:
3006: Consider the following 8x8 matrix with 34 non-zero values, that is
3007: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3008: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3009: as follows:
3011: .vb
3012: 1 2 0 | 0 3 0 | 0 4
3013: Proc0 0 5 6 | 7 0 0 | 8 0
3014: 9 0 10 | 11 0 0 | 12 0
3015: -------------------------------------
3016: 13 0 14 | 15 16 17 | 0 0
3017: Proc1 0 18 0 | 19 20 21 | 0 0
3018: 0 0 0 | 22 23 0 | 24 0
3019: -------------------------------------
3020: Proc2 25 26 27 | 0 0 28 | 29 0
3021: 30 0 0 | 31 32 33 | 0 34
3022: .ve
3024: This can be represented as a collection of submatrices as:
3026: .vb
3027: A B C
3028: D E F
3029: G H I
3030: .ve
3032: Where the submatrices A,B,C are owned by proc0, D,E,F are
3033: owned by proc1, G,H,I are owned by proc2.
3035: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3036: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3037: The 'M','N' parameters are 8,8, and have the same values on all procs.
3039: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3040: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3041: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3042: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3043: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3044: matrix, ans [DF] as another SeqAIJ matrix.
3046: When d_nz, o_nz parameters are specified, d_nz storage elements are
3047: allocated for every row of the local diagonal submatrix, and o_nz
3048: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3049: One way to choose d_nz and o_nz is to use the max nonzerors per local
3050: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3051: In this case, the values of d_nz,o_nz are:
3052: .vb
3053: proc0 : dnz = 2, o_nz = 2
3054: proc1 : dnz = 3, o_nz = 2
3055: proc2 : dnz = 1, o_nz = 4
3056: .ve
3057: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3058: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3059: for proc3. i.e we are using 12+15+10=37 storage locations to store
3060: 34 values.
3062: When d_nnz, o_nnz parameters are specified, the storage is specified
3063: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3064: In the above case the values for d_nnz,o_nnz are:
3065: .vb
3066: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3067: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3068: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3069: .ve
3070: Here the space allocated is sum of all the above values i.e 34, and
3071: hence pre-allocation is perfect.
3073: Level: intermediate
3075: .keywords: matrix, aij, compressed row, sparse, parallel
3077: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3078: MPIAIJ, MatGetInfo()
3079: @*/
3080: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3081: {
3082: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3085: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
3086: if (f) {
3087: (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
3088: }
3089: return(0);
3090: }
3094: /*@
3095: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3096: CSR format the local rows.
3098: Collective on MPI_Comm
3100: Input Parameters:
3101: + comm - MPI communicator
3102: . m - number of local rows (Cannot be PETSC_DECIDE)
3103: . n - This value should be the same as the local size used in creating the
3104: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3105: calculated if N is given) For square matrices n is almost always m.
3106: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3107: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3108: . i - row indices
3109: . j - column indices
3110: - a - matrix values
3112: Output Parameter:
3113: . mat - the matrix
3115: Level: intermediate
3117: Notes:
3118: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3119: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3120: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3122: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3124: The format which is used for the sparse matrix input, is equivalent to a
3125: row-major ordering.. i.e for the following matrix, the input data expected is
3126: as shown:
3128: 1 0 0
3129: 2 0 3 P0
3130: -------
3131: 4 5 6 P1
3133: Process0 [P0]: rows_owned=[0,1]
3134: i = {0,1,3} [size = nrow+1 = 2+1]
3135: j = {0,0,2} [size = nz = 6]
3136: v = {1,2,3} [size = nz = 6]
3138: Process1 [P1]: rows_owned=[2]
3139: i = {0,3} [size = nrow+1 = 1+1]
3140: j = {0,1,2} [size = nz = 6]
3141: v = {4,5,6} [size = nz = 6]
3143: .keywords: matrix, aij, compressed row, sparse, parallel
3145: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3146: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3147: @*/
3148: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3149: {
3153: if (i[0]) {
3154: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3155: }
3156: if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3157: MatCreate(comm,mat);
3158: MatSetSizes(*mat,m,n,M,N);
3159: MatSetType(*mat,MATMPIAIJ);
3160: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
3161: return(0);
3162: }
3166: /*@C
3167: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3168: (the default parallel PETSc format). For good matrix assembly performance
3169: the user should preallocate the matrix storage by setting the parameters
3170: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3171: performance can be increased by more than a factor of 50.
3173: Collective on MPI_Comm
3175: Input Parameters:
3176: + comm - MPI communicator
3177: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3178: This value should be the same as the local size used in creating the
3179: y vector for the matrix-vector product y = Ax.
3180: . n - This value should be the same as the local size used in creating the
3181: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3182: calculated if N is given) For square matrices n is almost always m.
3183: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3184: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3185: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3186: (same value is used for all local rows)
3187: . d_nnz - array containing the number of nonzeros in the various rows of the
3188: DIAGONAL portion of the local submatrix (possibly different for each row)
3189: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3190: The size of this array is equal to the number of local rows, i.e 'm'.
3191: You must leave room for the diagonal entry even if it is zero.
3192: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3193: submatrix (same value is used for all local rows).
3194: - o_nnz - array containing the number of nonzeros in the various rows of the
3195: OFF-DIAGONAL portion of the local submatrix (possibly different for
3196: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3197: structure. The size of this array is equal to the number
3198: of local rows, i.e 'm'.
3200: Output Parameter:
3201: . A - the matrix
3203: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3204: MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3205: true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3206: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3208: Notes:
3209: If the *_nnz parameter is given then the *_nz parameter is ignored
3211: m,n,M,N parameters specify the size of the matrix, and its partitioning across
3212: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3213: storage requirements for this matrix.
3215: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
3216: processor than it must be used on all processors that share the object for
3217: that argument.
3219: The user MUST specify either the local or global matrix dimensions
3220: (possibly both).
3222: The parallel matrix is partitioned across processors such that the
3223: first m0 rows belong to process 0, the next m1 rows belong to
3224: process 1, the next m2 rows belong to process 2 etc.. where
3225: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3226: values corresponding to [m x N] submatrix.
3228: The columns are logically partitioned with the n0 columns belonging
3229: to 0th partition, the next n1 columns belonging to the next
3230: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3232: The DIAGONAL portion of the local submatrix on any given processor
3233: is the submatrix corresponding to the rows and columns m,n
3234: corresponding to the given processor. i.e diagonal matrix on
3235: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3236: etc. The remaining portion of the local submatrix [m x (N-n)]
3237: constitute the OFF-DIAGONAL portion. The example below better
3238: illustrates this concept.
3240: For a square global matrix we define each processor's diagonal portion
3241: to be its local rows and the corresponding columns (a square submatrix);
3242: each processor's off-diagonal portion encompasses the remainder of the
3243: local matrix (a rectangular submatrix).
3245: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3247: When calling this routine with a single process communicator, a matrix of
3248: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
3249: type of communicator, use the construction mechanism:
3250: MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3252: By default, this format uses inodes (identical nodes) when possible.
3253: We search for consecutive rows with the same nonzero structure, thereby
3254: reusing matrix information to achieve increased efficiency.
3256: Options Database Keys:
3257: + -mat_no_inode - Do not use inodes
3258: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3259: - -mat_aij_oneindex - Internally use indexing starting at 1
3260: rather than 0. Note that when calling MatSetValues(),
3261: the user still MUST index entries starting at 0!
3264: Example usage:
3265:
3266: Consider the following 8x8 matrix with 34 non-zero values, that is
3267: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3268: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3269: as follows:
3271: .vb
3272: 1 2 0 | 0 3 0 | 0 4
3273: Proc0 0 5 6 | 7 0 0 | 8 0
3274: 9 0 10 | 11 0 0 | 12 0
3275: -------------------------------------
3276: 13 0 14 | 15 16 17 | 0 0
3277: Proc1 0 18 0 | 19 20 21 | 0 0
3278: 0 0 0 | 22 23 0 | 24 0
3279: -------------------------------------
3280: Proc2 25 26 27 | 0 0 28 | 29 0
3281: 30 0 0 | 31 32 33 | 0 34
3282: .ve
3284: This can be represented as a collection of submatrices as:
3286: .vb
3287: A B C
3288: D E F
3289: G H I
3290: .ve
3292: Where the submatrices A,B,C are owned by proc0, D,E,F are
3293: owned by proc1, G,H,I are owned by proc2.
3295: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3296: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3297: The 'M','N' parameters are 8,8, and have the same values on all procs.
3299: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3300: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3301: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3302: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3303: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3304: matrix, ans [DF] as another SeqAIJ matrix.
3306: When d_nz, o_nz parameters are specified, d_nz storage elements are
3307: allocated for every row of the local diagonal submatrix, and o_nz
3308: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3309: One way to choose d_nz and o_nz is to use the max nonzerors per local
3310: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3311: In this case, the values of d_nz,o_nz are:
3312: .vb
3313: proc0 : dnz = 2, o_nz = 2
3314: proc1 : dnz = 3, o_nz = 2
3315: proc2 : dnz = 1, o_nz = 4
3316: .ve
3317: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3318: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3319: for proc3. i.e we are using 12+15+10=37 storage locations to store
3320: 34 values.
3322: When d_nnz, o_nnz parameters are specified, the storage is specified
3323: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3324: In the above case the values for d_nnz,o_nnz are:
3325: .vb
3326: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3327: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3328: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3329: .ve
3330: Here the space allocated is sum of all the above values i.e 34, and
3331: hence pre-allocation is perfect.
3333: Level: intermediate
3335: .keywords: matrix, aij, compressed row, sparse, parallel
3337: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3338: MPIAIJ, MatCreateMPIAIJWithArrays()
3339: @*/
3340: PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3341: {
3343: PetscMPIInt size;
3346: MatCreate(comm,A);
3347: MatSetSizes(*A,m,n,M,N);
3348: MPI_Comm_size(comm,&size);
3349: if (size > 1) {
3350: MatSetType(*A,MATMPIAIJ);
3351: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
3352: } else {
3353: MatSetType(*A,MATSEQAIJ);
3354: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
3355: }
3356: return(0);
3357: }
3361: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3362: {
3363: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3366: *Ad = a->A;
3367: *Ao = a->B;
3368: *colmap = a->garray;
3369: return(0);
3370: }
3374: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3375: {
3377: PetscInt i;
3378: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3381: if (coloring->ctype == IS_COLORING_GLOBAL) {
3382: ISColoringValue *allcolors,*colors;
3383: ISColoring ocoloring;
3385: /* set coloring for diagonal portion */
3386: MatSetColoring_SeqAIJ(a->A,coloring);
3388: /* set coloring for off-diagonal portion */
3389: ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
3390: PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);
3391: for (i=0; i<a->B->cmap.n; i++) {
3392: colors[i] = allcolors[a->garray[i]];
3393: }
3394: PetscFree(allcolors);
3395: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);
3396: MatSetColoring_SeqAIJ(a->B,ocoloring);
3397: ISColoringDestroy(ocoloring);
3398: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3399: ISColoringValue *colors;
3400: PetscInt *larray;
3401: ISColoring ocoloring;
3403: /* set coloring for diagonal portion */
3404: PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);
3405: for (i=0; i<a->A->cmap.n; i++) {
3406: larray[i] = i + A->cmap.rstart;
3407: }
3408: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);
3409: PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);
3410: for (i=0; i<a->A->cmap.n; i++) {
3411: colors[i] = coloring->colors[larray[i]];
3412: }
3413: PetscFree(larray);
3414: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);
3415: MatSetColoring_SeqAIJ(a->A,ocoloring);
3416: ISColoringDestroy(ocoloring);
3418: /* set coloring for off-diagonal portion */
3419: PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);
3420: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);
3421: PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);
3422: for (i=0; i<a->B->cmap.n; i++) {
3423: colors[i] = coloring->colors[larray[i]];
3424: }
3425: PetscFree(larray);
3426: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);
3427: MatSetColoring_SeqAIJ(a->B,ocoloring);
3428: ISColoringDestroy(ocoloring);
3429: } else {
3430: SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3431: }
3433: return(0);
3434: }
3436: #if defined(PETSC_HAVE_ADIC)
3439: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3440: {
3441: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3445: MatSetValuesAdic_SeqAIJ(a->A,advalues);
3446: MatSetValuesAdic_SeqAIJ(a->B,advalues);
3447: return(0);
3448: }
3449: #endif
3453: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3454: {
3455: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3459: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
3460: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
3461: return(0);
3462: }
3466: /*@
3467: MatMerge - Creates a single large PETSc matrix by concatinating sequential
3468: matrices from each processor
3470: Collective on MPI_Comm
3472: Input Parameters:
3473: + comm - the communicators the parallel matrix will live on
3474: . inmat - the input sequential matrices
3475: . n - number of local columns (or PETSC_DECIDE)
3476: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3478: Output Parameter:
3479: . outmat - the parallel matrix generated
3481: Level: advanced
3483: Notes: The number of columns of the matrix in EACH processor MUST be the same.
3485: @*/
3486: PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3487: {
3489: PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz;
3490: PetscInt *indx;
3491: PetscScalar *values;
3494: MatGetSize(inmat,&m,&N);
3495: if (scall == MAT_INITIAL_MATRIX){
3496: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3497: if (n == PETSC_DECIDE){
3498: PetscSplitOwnership(comm,&n,&N);
3499: }
3500: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
3501: rstart -= m;
3503: MatPreallocateInitialize(comm,m,n,dnz,onz);
3504: for (i=0;i<m;i++) {
3505: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
3506: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
3507: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
3508: }
3509: /* This routine will ONLY return MPIAIJ type matrix */
3510: MatCreate(comm,outmat);
3511: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3512: MatSetType(*outmat,MATMPIAIJ);
3513: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
3514: MatPreallocateFinalize(dnz,onz);
3515:
3516: } else if (scall == MAT_REUSE_MATRIX){
3517: MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);
3518: } else {
3519: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3520: }
3522: for (i=0;i<m;i++) {
3523: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
3524: Ii = i + rstart;
3525: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
3526: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
3527: }
3528: MatDestroy(inmat);
3529: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
3530: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
3532: return(0);
3533: }
3537: PetscErrorCode MatFileSplit(Mat A,char *outfile)
3538: {
3539: PetscErrorCode ierr;
3540: PetscMPIInt rank;
3541: PetscInt m,N,i,rstart,nnz;
3542: size_t len;
3543: const PetscInt *indx;
3544: PetscViewer out;
3545: char *name;
3546: Mat B;
3547: const PetscScalar *values;
3550: MatGetLocalSize(A,&m,0);
3551: MatGetSize(A,0,&N);
3552: /* Should this be the type of the diagonal block of A? */
3553: MatCreate(PETSC_COMM_SELF,&B);
3554: MatSetSizes(B,m,N,m,N);
3555: MatSetType(B,MATSEQAIJ);
3556: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
3557: MatGetOwnershipRange(A,&rstart,0);
3558: for (i=0;i<m;i++) {
3559: MatGetRow(A,i+rstart,&nnz,&indx,&values);
3560: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
3561: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
3562: }
3563: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3564: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3566: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
3567: PetscStrlen(outfile,&len);
3568: PetscMalloc((len+5)*sizeof(char),&name);
3569: sprintf(name,"%s.%d",outfile,rank);
3570: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
3571: PetscFree(name);
3572: MatView(B,out);
3573: PetscViewerDestroy(out);
3574: MatDestroy(B);
3575: return(0);
3576: }
3578: EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3581: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3582: {
3583: PetscErrorCode ierr;
3584: Mat_Merge_SeqsToMPI *merge;
3585: PetscContainer container;
3588: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
3589: if (container) {
3590: PetscContainerGetPointer(container,(void **)&merge);
3591: PetscFree(merge->id_r);
3592: PetscFree(merge->len_s);
3593: PetscFree(merge->len_r);
3594: PetscFree(merge->bi);
3595: PetscFree(merge->bj);
3596: PetscFree(merge->buf_ri);
3597: PetscFree(merge->buf_rj);
3598: PetscFree(merge->coi);
3599: PetscFree(merge->coj);
3600: PetscFree(merge->owners_co);
3601: PetscFree(merge->rowmap.range);
3602:
3603: PetscContainerDestroy(container);
3604: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
3605: }
3606: PetscFree(merge);
3608: MatDestroy_MPIAIJ(A);
3609: return(0);
3610: }
3612: #include src/mat/utils/freespace.h
3613: #include petscbt.h
3617: /*@C
3618: MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3619: matrices from each processor
3621: Collective on MPI_Comm
3623: Input Parameters:
3624: + comm - the communicators the parallel matrix will live on
3625: . seqmat - the input sequential matrices
3626: . m - number of local rows (or PETSC_DECIDE)
3627: . n - number of local columns (or PETSC_DECIDE)
3628: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3630: Output Parameter:
3631: . mpimat - the parallel matrix generated
3633: Level: advanced
3635: Notes:
3636: The dimensions of the sequential matrix in each processor MUST be the same.
3637: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3638: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3639: @*/
3640: PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3641: {
3642: PetscErrorCode ierr;
3643: MPI_Comm comm=((PetscObject)mpimat)->comm;
3644: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
3645: PetscMPIInt size,rank,taga,*len_s;
3646: PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3647: PetscInt proc,m;
3648: PetscInt **buf_ri,**buf_rj;
3649: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3650: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
3651: MPI_Request *s_waits,*r_waits;
3652: MPI_Status *status;
3653: MatScalar *aa=a->a,**abuf_r,*ba_i;
3654: Mat_Merge_SeqsToMPI *merge;
3655: PetscContainer container;
3656:
3660: MPI_Comm_size(comm,&size);
3661: MPI_Comm_rank(comm,&rank);
3663: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
3664: if (container) {
3665: PetscContainerGetPointer(container,(void **)&merge);
3666: }
3667: bi = merge->bi;
3668: bj = merge->bj;
3669: buf_ri = merge->buf_ri;
3670: buf_rj = merge->buf_rj;
3672: PetscMalloc(size*sizeof(MPI_Status),&status);
3673: owners = merge->rowmap.range;
3674: len_s = merge->len_s;
3676: /* send and recv matrix values */
3677: /*-----------------------------*/
3678: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
3679: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
3681: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
3682: for (proc=0,k=0; proc<size; proc++){
3683: if (!len_s[proc]) continue;
3684: i = owners[proc];
3685: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
3686: k++;
3687: }
3689: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
3690: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
3691: PetscFree(status);
3693: PetscFree(s_waits);
3694: PetscFree(r_waits);
3696: /* insert mat values of mpimat */
3697: /*----------------------------*/
3698: PetscMalloc(N*sizeof(MatScalar),&ba_i);
3699: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3700: nextrow = buf_ri_k + merge->nrecv;
3701: nextai = nextrow + merge->nrecv;
3703: for (k=0; k<merge->nrecv; k++){
3704: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3705: nrows = *(buf_ri_k[k]);
3706: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
3707: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
3708: }
3710: /* set values of ba */
3711: m = merge->rowmap.n;
3712: for (i=0; i<m; i++) {
3713: arow = owners[rank] + i;
3714: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
3715: bnzi = bi[i+1] - bi[i];
3716: PetscMemzero(ba_i,bnzi*sizeof(MatScalar));
3718: /* add local non-zero vals of this proc's seqmat into ba */
3719: anzi = ai[arow+1] - ai[arow];
3720: aj = a->j + ai[arow];
3721: aa = a->a + ai[arow];
3722: nextaj = 0;
3723: for (j=0; nextaj<anzi; j++){
3724: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3725: ba_i[j] += aa[nextaj++];
3726: }
3727: }
3729: /* add received vals into ba */
3730: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3731: /* i-th row */
3732: if (i == *nextrow[k]) {
3733: anzi = *(nextai[k]+1) - *nextai[k];
3734: aj = buf_rj[k] + *(nextai[k]);
3735: aa = abuf_r[k] + *(nextai[k]);
3736: nextaj = 0;
3737: for (j=0; nextaj<anzi; j++){
3738: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3739: ba_i[j] += aa[nextaj++];
3740: }
3741: }
3742: nextrow[k]++; nextai[k]++;
3743: }
3744: }
3745: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
3746: }
3747: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
3748: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
3750: PetscFree(abuf_r);
3751: PetscFree(ba_i);
3752: PetscFree(buf_ri_k);
3754: return(0);
3755: }
3759: PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3760: {
3761: PetscErrorCode ierr;
3762: Mat B_mpi;
3763: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
3764: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3765: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
3766: PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3767: PetscInt len,proc,*dnz,*onz;
3768: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3769: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3770: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
3771: MPI_Status *status;
3772: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
3773: PetscBT lnkbt;
3774: Mat_Merge_SeqsToMPI *merge;
3775: PetscContainer container;
3780: /* make sure it is a PETSc comm */
3781: PetscCommDuplicate(comm,&comm,PETSC_NULL);
3782: MPI_Comm_size(comm,&size);
3783: MPI_Comm_rank(comm,&rank);
3784:
3785: PetscNew(Mat_Merge_SeqsToMPI,&merge);
3786: PetscMalloc(size*sizeof(MPI_Status),&status);
3788: /* determine row ownership */
3789: /*---------------------------------------------------------*/
3790: PetscMapInitialize(comm,&merge->rowmap);
3791: merge->rowmap.n = m;
3792: merge->rowmap.N = M;
3793: merge->rowmap.bs = 1;
3794: PetscMapSetUp(&merge->rowmap);
3795: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
3796: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
3797:
3798: m = merge->rowmap.n;
3799: M = merge->rowmap.N;
3800: owners = merge->rowmap.range;
3802: /* determine the number of messages to send, their lengths */
3803: /*---------------------------------------------------------*/
3804: len_s = merge->len_s;
3806: len = 0; /* length of buf_si[] */
3807: merge->nsend = 0;
3808: for (proc=0; proc<size; proc++){
3809: len_si[proc] = 0;
3810: if (proc == rank){
3811: len_s[proc] = 0;
3812: } else {
3813: len_si[proc] = owners[proc+1] - owners[proc] + 1;
3814: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3815: }
3816: if (len_s[proc]) {
3817: merge->nsend++;
3818: nrows = 0;
3819: for (i=owners[proc]; i<owners[proc+1]; i++){
3820: if (ai[i+1] > ai[i]) nrows++;
3821: }
3822: len_si[proc] = 2*(nrows+1);
3823: len += len_si[proc];
3824: }
3825: }
3827: /* determine the number and length of messages to receive for ij-structure */
3828: /*-------------------------------------------------------------------------*/
3829: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
3830: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
3832: /* post the Irecv of j-structure */
3833: /*-------------------------------*/
3834: PetscCommGetNewTag(comm,&tagj);
3835: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
3837: /* post the Isend of j-structure */
3838: /*--------------------------------*/
3839: PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);
3840: sj_waits = si_waits + merge->nsend;
3842: for (proc=0, k=0; proc<size; proc++){
3843: if (!len_s[proc]) continue;
3844: i = owners[proc];
3845: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
3846: k++;
3847: }
3849: /* receives and sends of j-structure are complete */
3850: /*------------------------------------------------*/
3851: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
3852: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
3853:
3854: /* send and recv i-structure */
3855: /*---------------------------*/
3856: PetscCommGetNewTag(comm,&tagi);
3857: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
3858:
3859: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
3860: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
3861: for (proc=0,k=0; proc<size; proc++){
3862: if (!len_s[proc]) continue;
3863: /* form outgoing message for i-structure:
3864: buf_si[0]: nrows to be sent
3865: [1:nrows]: row index (global)
3866: [nrows+1:2*nrows+1]: i-structure index
3867: */
3868: /*-------------------------------------------*/
3869: nrows = len_si[proc]/2 - 1;
3870: buf_si_i = buf_si + nrows+1;
3871: buf_si[0] = nrows;
3872: buf_si_i[0] = 0;
3873: nrows = 0;
3874: for (i=owners[proc]; i<owners[proc+1]; i++){
3875: anzi = ai[i+1] - ai[i];
3876: if (anzi) {
3877: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3878: buf_si[nrows+1] = i-owners[proc]; /* local row index */
3879: nrows++;
3880: }
3881: }
3882: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
3883: k++;
3884: buf_si += len_si[proc];
3885: }
3887: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
3888: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
3890: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
3891: for (i=0; i<merge->nrecv; i++){
3892: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
3893: }
3895: PetscFree(len_si);
3896: PetscFree(len_ri);
3897: PetscFree(rj_waits);
3898: PetscFree(si_waits);
3899: PetscFree(ri_waits);
3900: PetscFree(buf_s);
3901: PetscFree(status);
3903: /* compute a local seq matrix in each processor */
3904: /*----------------------------------------------*/
3905: /* allocate bi array and free space for accumulating nonzero column info */
3906: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
3907: bi[0] = 0;
3909: /* create and initialize a linked list */
3910: nlnk = N+1;
3911: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
3912:
3913: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3914: len = 0;
3915: len = ai[owners[rank+1]] - ai[owners[rank]];
3916: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
3917: current_space = free_space;
3919: /* determine symbolic info for each local row */
3920: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3921: nextrow = buf_ri_k + merge->nrecv;
3922: nextai = nextrow + merge->nrecv;
3923: for (k=0; k<merge->nrecv; k++){
3924: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3925: nrows = *buf_ri_k[k];
3926: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
3927: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
3928: }
3930: MatPreallocateInitialize(comm,m,n,dnz,onz);
3931: len = 0;
3932: for (i=0;i<m;i++) {
3933: bnzi = 0;
3934: /* add local non-zero cols of this proc's seqmat into lnk */
3935: arow = owners[rank] + i;
3936: anzi = ai[arow+1] - ai[arow];
3937: aj = a->j + ai[arow];
3938: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3939: bnzi += nlnk;
3940: /* add received col data into lnk */
3941: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3942: if (i == *nextrow[k]) { /* i-th row */
3943: anzi = *(nextai[k]+1) - *nextai[k];
3944: aj = buf_rj[k] + *nextai[k];
3945: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3946: bnzi += nlnk;
3947: nextrow[k]++; nextai[k]++;
3948: }
3949: }
3950: if (len < bnzi) len = bnzi; /* =max(bnzi) */
3952: /* if free space is not available, make more free space */
3953: if (current_space->local_remaining<bnzi) {
3954: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
3955: nspacedouble++;
3956: }
3957: /* copy data into free space, then initialize lnk */
3958: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
3959: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
3961: current_space->array += bnzi;
3962: current_space->local_used += bnzi;
3963: current_space->local_remaining -= bnzi;
3964:
3965: bi[i+1] = bi[i] + bnzi;
3966: }
3967:
3968: PetscFree(buf_ri_k);
3970: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
3971: PetscFreeSpaceContiguous(&free_space,bj);
3972: PetscLLDestroy(lnk,lnkbt);
3974: /* create symbolic parallel matrix B_mpi */
3975: /*---------------------------------------*/
3976: MatCreate(comm,&B_mpi);
3977: if (n==PETSC_DECIDE) {
3978: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
3979: } else {
3980: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3981: }
3982: MatSetType(B_mpi,MATMPIAIJ);
3983: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
3984: MatPreallocateFinalize(dnz,onz);
3986: /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3987: B_mpi->assembled = PETSC_FALSE;
3988: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
3989: merge->bi = bi;
3990: merge->bj = bj;
3991: merge->buf_ri = buf_ri;
3992: merge->buf_rj = buf_rj;
3993: merge->coi = PETSC_NULL;
3994: merge->coj = PETSC_NULL;
3995: merge->owners_co = PETSC_NULL;
3997: /* attach the supporting struct to B_mpi for reuse */
3998: PetscContainerCreate(PETSC_COMM_SELF,&container);
3999: PetscContainerSetPointer(container,merge);
4000: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4001: *mpimat = B_mpi;
4003: PetscCommDestroy(&comm);
4005: return(0);
4006: }
4010: PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4011: {
4012: PetscErrorCode ierr;
4016: if (scall == MAT_INITIAL_MATRIX){
4017: MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
4018: }
4019: MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
4021: return(0);
4022: }
4026: /*@
4027: MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4029: Not Collective
4031: Input Parameters:
4032: + A - the matrix
4033: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4035: Output Parameter:
4036: . A_loc - the local sequential matrix generated
4038: Level: developer
4040: @*/
4041: PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4042: {
4043: PetscErrorCode ierr;
4044: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4045: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4046: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4047: PetscScalar *aa=a->a,*ba=b->a,*ca;
4048: PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4049: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4053: if (scall == MAT_INITIAL_MATRIX){
4054: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
4055: ci[0] = 0;
4056: for (i=0; i<am; i++){
4057: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4058: }
4059: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
4060: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
4061: k = 0;
4062: for (i=0; i<am; i++) {
4063: ncols_o = bi[i+1] - bi[i];
4064: ncols_d = ai[i+1] - ai[i];
4065: /* off-diagonal portion of A */
4066: for (jo=0; jo<ncols_o; jo++) {
4067: col = cmap[*bj];
4068: if (col >= cstart) break;
4069: cj[k] = col; bj++;
4070: ca[k++] = *ba++;
4071: }
4072: /* diagonal portion of A */
4073: for (j=0; j<ncols_d; j++) {
4074: cj[k] = cstart + *aj++;
4075: ca[k++] = *aa++;
4076: }
4077: /* off-diagonal portion of A */
4078: for (j=jo; j<ncols_o; j++) {
4079: cj[k] = cmap[*bj++];
4080: ca[k++] = *ba++;
4081: }
4082: }
4083: /* put together the new matrix */
4084: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);
4085: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4086: /* Since these are PETSc arrays, change flags to free them as necessary. */
4087: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4088: mat->free_a = PETSC_TRUE;
4089: mat->free_ij = PETSC_TRUE;
4090: mat->nonew = 0;
4091: } else if (scall == MAT_REUSE_MATRIX){
4092: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4093: ci = mat->i; cj = mat->j; ca = mat->a;
4094: for (i=0; i<am; i++) {
4095: /* off-diagonal portion of A */
4096: ncols_o = bi[i+1] - bi[i];
4097: for (jo=0; jo<ncols_o; jo++) {
4098: col = cmap[*bj];
4099: if (col >= cstart) break;
4100: *ca++ = *ba++; bj++;
4101: }
4102: /* diagonal portion of A */
4103: ncols_d = ai[i+1] - ai[i];
4104: for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4105: /* off-diagonal portion of A */
4106: for (j=jo; j<ncols_o; j++) {
4107: *ca++ = *ba++; bj++;
4108: }
4109: }
4110: } else {
4111: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4112: }
4115: return(0);
4116: }
4120: /*@C
4121: MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4123: Not Collective
4125: Input Parameters:
4126: + A - the matrix
4127: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4128: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
4130: Output Parameter:
4131: . A_loc - the local sequential matrix generated
4133: Level: developer
4135: @*/
4136: PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4137: {
4138: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4139: PetscErrorCode ierr;
4140: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4141: IS isrowa,iscola;
4142: Mat *aloc;
4146: if (!row){
4147: start = A->rmap.rstart; end = A->rmap.rend;
4148: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
4149: } else {
4150: isrowa = *row;
4151: }
4152: if (!col){
4153: start = A->cmap.rstart;
4154: cmap = a->garray;
4155: nzA = a->A->cmap.n;
4156: nzB = a->B->cmap.n;
4157: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4158: ncols = 0;
4159: for (i=0; i<nzB; i++) {
4160: if (cmap[i] < start) idx[ncols++] = cmap[i];
4161: else break;
4162: }
4163: imark = i;
4164: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4165: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4166: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);
4167: PetscFree(idx);
4168: } else {
4169: iscola = *col;
4170: }
4171: if (scall != MAT_INITIAL_MATRIX){
4172: PetscMalloc(sizeof(Mat),&aloc);
4173: aloc[0] = *A_loc;
4174: }
4175: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
4176: *A_loc = aloc[0];
4177: PetscFree(aloc);
4178: if (!row){
4179: ISDestroy(isrowa);
4180: }
4181: if (!col){
4182: ISDestroy(iscola);
4183: }
4185: return(0);
4186: }
4190: /*@C
4191: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4193: Collective on Mat
4195: Input Parameters:
4196: + A,B - the matrices in mpiaij format
4197: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4198: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4200: Output Parameter:
4201: + rowb, colb - index sets of rows and columns of B to extract
4202: . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4203: - B_seq - the sequential matrix generated
4205: Level: developer
4207: @*/
4208: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4209: {
4210: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4211: PetscErrorCode ierr;
4212: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4213: IS isrowb,iscolb;
4214: Mat *bseq;
4215:
4217: if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4218: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4219: }
4221:
4222: if (scall == MAT_INITIAL_MATRIX){
4223: start = A->cmap.rstart;
4224: cmap = a->garray;
4225: nzA = a->A->cmap.n;
4226: nzB = a->B->cmap.n;
4227: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4228: ncols = 0;
4229: for (i=0; i<nzB; i++) { /* row < local row index */
4230: if (cmap[i] < start) idx[ncols++] = cmap[i];
4231: else break;
4232: }
4233: imark = i;
4234: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
4235: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4236: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);
4237: PetscFree(idx);
4238: *brstart = imark;
4239: ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);
4240: } else {
4241: if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4242: isrowb = *rowb; iscolb = *colb;
4243: PetscMalloc(sizeof(Mat),&bseq);
4244: bseq[0] = *B_seq;
4245: }
4246: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
4247: *B_seq = bseq[0];
4248: PetscFree(bseq);
4249: if (!rowb){
4250: ISDestroy(isrowb);
4251: } else {
4252: *rowb = isrowb;
4253: }
4254: if (!colb){
4255: ISDestroy(iscolb);
4256: } else {
4257: *colb = iscolb;
4258: }
4260: return(0);
4261: }
4265: /*@C
4266: MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4267: of the OFF-DIAGONAL portion of local A
4269: Collective on Mat
4271: Input Parameters:
4272: + A,B - the matrices in mpiaij format
4273: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4274: . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4275: - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4277: Output Parameter:
4278: + B_oth - the sequential matrix generated
4280: Level: developer
4282: @*/
4283: PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4284: {
4285: VecScatter_MPI_General *gen_to,*gen_from;
4286: PetscErrorCode ierr;
4287: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4288: Mat_SeqAIJ *b_oth;
4289: VecScatter ctx=a->Mvctx;
4290: MPI_Comm comm=((PetscObject)ctx)->comm;
4291: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4292: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4293: PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA;
4294: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4295: MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4296: MPI_Status *sstatus,rstatus;
4297: PetscMPIInt jj;
4298: PetscInt *cols,sbs,rbs;
4299: PetscScalar *vals;
4302: if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4303: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4304: }
4306: MPI_Comm_rank(comm,&rank);
4308: gen_to = (VecScatter_MPI_General*)ctx->todata;
4309: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4310: rvalues = gen_from->values; /* holds the length of receiving row */
4311: svalues = gen_to->values; /* holds the length of sending row */
4312: nrecvs = gen_from->n;
4313: nsends = gen_to->n;
4315: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
4316: srow = gen_to->indices; /* local row index to be sent */
4317: sstarts = gen_to->starts;
4318: sprocs = gen_to->procs;
4319: sstatus = gen_to->sstatus;
4320: sbs = gen_to->bs;
4321: rstarts = gen_from->starts;
4322: rprocs = gen_from->procs;
4323: rbs = gen_from->bs;
4325: if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4326: if (scall == MAT_INITIAL_MATRIX){
4327: /* i-array */
4328: /*---------*/
4329: /* post receives */
4330: for (i=0; i<nrecvs; i++){
4331: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4332: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4333: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
4334: }
4336: /* pack the outgoing message */
4337: PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);
4338: rstartsj = sstartsj + nsends +1;
4339: sstartsj[0] = 0; rstartsj[0] = 0;
4340: len = 0; /* total length of j or a array to be sent */
4341: k = 0;
4342: for (i=0; i<nsends; i++){
4343: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4344: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4345: for (j=0; j<nrows; j++) {
4346: row = srow[k] + B->rmap.range[rank]; /* global row idx */
4347: for (l=0; l<sbs; l++){
4348: MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL); /* rowlength */
4349: rowlen[j*sbs+l] = ncols;
4350: len += ncols;
4351: MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);
4352: }
4353: k++;
4354: }
4355: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
4356: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4357: }
4358: /* recvs and sends of i-array are completed */
4359: i = nrecvs;
4360: while (i--) {
4361: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
4362: }
4363: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4365: /* allocate buffers for sending j and a arrays */
4366: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
4367: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
4369: /* create i-array of B_oth */
4370: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
4371: b_othi[0] = 0;
4372: len = 0; /* total length of j or a array to be received */
4373: k = 0;
4374: for (i=0; i<nrecvs; i++){
4375: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4376: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4377: for (j=0; j<nrows; j++) {
4378: b_othi[k+1] = b_othi[k] + rowlen[j];
4379: len += rowlen[j]; k++;
4380: }
4381: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4382: }
4384: /* allocate space for j and a arrrays of B_oth */
4385: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
4386: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);
4388: /* j-array */
4389: /*---------*/
4390: /* post receives of j-array */
4391: for (i=0; i<nrecvs; i++){
4392: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4393: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
4394: }
4396: /* pack the outgoing message j-array */
4397: k = 0;
4398: for (i=0; i<nsends; i++){
4399: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4400: bufJ = bufj+sstartsj[i];
4401: for (j=0; j<nrows; j++) {
4402: row = srow[k++] + B->rmap.range[rank]; /* global row idx */
4403: for (ll=0; ll<sbs; ll++){
4404: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
4405: for (l=0; l<ncols; l++){
4406: *bufJ++ = cols[l];
4407: }
4408: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
4409: }
4410: }
4411: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
4412: }
4414: /* recvs and sends of j-array are completed */
4415: i = nrecvs;
4416: while (i--) {
4417: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
4418: }
4419: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4420: } else if (scall == MAT_REUSE_MATRIX){
4421: sstartsj = *startsj;
4422: rstartsj = sstartsj + nsends +1;
4423: bufa = *bufa_ptr;
4424: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
4425: b_otha = b_oth->a;
4426: } else {
4427: SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4428: }
4430: /* a-array */
4431: /*---------*/
4432: /* post receives of a-array */
4433: for (i=0; i<nrecvs; i++){
4434: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4435: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
4436: }
4438: /* pack the outgoing message a-array */
4439: k = 0;
4440: for (i=0; i<nsends; i++){
4441: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4442: bufA = bufa+sstartsj[i];
4443: for (j=0; j<nrows; j++) {
4444: row = srow[k++] + B->rmap.range[rank]; /* global row idx */
4445: for (ll=0; ll<sbs; ll++){
4446: MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
4447: for (l=0; l<ncols; l++){
4448: *bufA++ = vals[l];
4449: }
4450: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
4451: }
4452: }
4453: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
4454: }
4455: /* recvs and sends of a-array are completed */
4456: i = nrecvs;
4457: while (i--) {
4458: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
4459: }
4460: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4461: PetscFree2(rwaits,swaits);
4463: if (scall == MAT_INITIAL_MATRIX){
4464: /* put together the new matrix */
4465: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);
4467: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4468: /* Since these are PETSc arrays, change flags to free them as necessary. */
4469: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
4470: b_oth->free_a = PETSC_TRUE;
4471: b_oth->free_ij = PETSC_TRUE;
4472: b_oth->nonew = 0;
4474: PetscFree(bufj);
4475: if (!startsj || !bufa_ptr){
4476: PetscFree(sstartsj);
4477: PetscFree(bufa_ptr);
4478: } else {
4479: *startsj = sstartsj;
4480: *bufa_ptr = bufa;
4481: }
4482: }
4484: return(0);
4485: }
4489: /*@C
4490: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4492: Not Collective
4494: Input Parameters:
4495: . A - The matrix in mpiaij format
4497: Output Parameter:
4498: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4499: . colmap - A map from global column index to local index into lvec
4500: - multScatter - A scatter from the argument of a matrix-vector product to lvec
4502: Level: developer
4504: @*/
4505: #if defined (PETSC_USE_CTABLE)
4506: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4507: #else
4508: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4509: #endif
4510: {
4511: Mat_MPIAIJ *a;
4518: a = (Mat_MPIAIJ *) A->data;
4519: if (lvec) *lvec = a->lvec;
4520: if (colmap) *colmap = a->colmap;
4521: if (multScatter) *multScatter = a->Mvctx;
4522: return(0);
4523: }
4530: /*MC
4531: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4533: Options Database Keys:
4534: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4536: Level: beginner
4538: .seealso: MatCreateMPIAIJ()
4539: M*/
4544: PetscErrorCode MatCreate_MPIAIJ(Mat B)
4545: {
4546: Mat_MPIAIJ *b;
4548: PetscMPIInt size;
4551: MPI_Comm_size(((PetscObject)B)->comm,&size);
4553: PetscNewLog(B,Mat_MPIAIJ,&b);
4554: B->data = (void*)b;
4555: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
4556: B->factor = 0;
4557: B->rmap.bs = 1;
4558: B->assembled = PETSC_FALSE;
4559: B->mapping = 0;
4561: B->insertmode = NOT_SET_VALUES;
4562: b->size = size;
4563: MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
4565: /* build cache for off array entries formed */
4566: MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
4567: b->donotstash = PETSC_FALSE;
4568: b->colmap = 0;
4569: b->garray = 0;
4570: b->roworiented = PETSC_TRUE;
4572: /* stuff used for matrix vector multiply */
4573: b->lvec = PETSC_NULL;
4574: b->Mvctx = PETSC_NULL;
4576: /* stuff for MatGetRow() */
4577: b->rowindices = 0;
4578: b->rowvalues = 0;
4579: b->getrowactive = PETSC_FALSE;
4582: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4583: "MatStoreValues_MPIAIJ",
4584: MatStoreValues_MPIAIJ);
4585: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4586: "MatRetrieveValues_MPIAIJ",
4587: MatRetrieveValues_MPIAIJ);
4588: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4589: "MatGetDiagonalBlock_MPIAIJ",
4590: MatGetDiagonalBlock_MPIAIJ);
4591: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4592: "MatIsTranspose_MPIAIJ",
4593: MatIsTranspose_MPIAIJ);
4594: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4595: "MatMPIAIJSetPreallocation_MPIAIJ",
4596: MatMPIAIJSetPreallocation_MPIAIJ);
4597: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4598: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4599: MatMPIAIJSetPreallocationCSR_MPIAIJ);
4600: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4601: "MatDiagonalScaleLocal_MPIAIJ",
4602: MatDiagonalScaleLocal_MPIAIJ);
4603: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4604: "MatConvert_MPIAIJ_MPICSRPERM",
4605: MatConvert_MPIAIJ_MPICSRPERM);
4606: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4607: "MatConvert_MPIAIJ_MPICRL",
4608: MatConvert_MPIAIJ_MPICRL);
4609: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
4610: return(0);
4611: }
4616: /*@
4617: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4618: and "off-diagonal" part of the matrix in CSR format.
4620: Collective on MPI_Comm
4622: Input Parameters:
4623: + comm - MPI communicator
4624: . m - number of local rows (Cannot be PETSC_DECIDE)
4625: . n - This value should be the same as the local size used in creating the
4626: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4627: calculated if N is given) For square matrices n is almost always m.
4628: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4629: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4630: . i - row indices for "diagonal" portion of matrix
4631: . j - column indices
4632: . a - matrix values
4633: . oi - row indices for "off-diagonal" portion of matrix
4634: . oj - column indices
4635: - oa - matrix values
4637: Output Parameter:
4638: . mat - the matrix
4640: Level: advanced
4642: Notes:
4643: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4645: The i and j indices are 0 based
4646:
4647: See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4650: .keywords: matrix, aij, compressed row, sparse, parallel
4652: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4653: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4654: @*/
4655: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4656: PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4657: {
4659: Mat_MPIAIJ *maij;
4662: if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4663: if (i[0]) {
4664: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4665: }
4666: if (oi[0]) {
4667: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4668: }
4669: MatCreate(comm,mat);
4670: MatSetSizes(*mat,m,n,M,N);
4671: MatSetType(*mat,MATMPIAIJ);
4672: maij = (Mat_MPIAIJ*) (*mat)->data;
4673: maij->donotstash = PETSC_TRUE;
4674: (*mat)->preallocated = PETSC_TRUE;
4676: (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4677: PetscMapSetUp(&(*mat)->rmap);
4678: PetscMapSetUp(&(*mat)->cmap);
4680: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
4681: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);
4683: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
4684: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
4685: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
4686: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
4688: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
4689: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
4690: return(0);
4691: }
4693: /*
4694: Special version for direct calls from Fortran
4695: */
4696: #if defined(PETSC_HAVE_FORTRAN_CAPS)
4697: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4698: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4699: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4700: #endif
4702: /* Change these macros so can be used in void function */
4703: #undef CHKERRQ
4704: #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4705: #undef SETERRQ2
4706: #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4707: #undef SETERRQ
4708: #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4713: void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4714: {
4715: Mat mat = *mmat;
4716: PetscInt m = *mm, n = *mn;
4717: InsertMode addv = *maddv;
4718: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
4719: PetscScalar value;
4722: MatPreallocated(mat);
4723: if (mat->insertmode == NOT_SET_VALUES) {
4724: mat->insertmode = addv;
4725: }
4726: #if defined(PETSC_USE_DEBUG)
4727: else if (mat->insertmode != addv) {
4728: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4729: }
4730: #endif
4731: {
4732: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4733: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4734: PetscTruth roworiented = aij->roworiented;
4736: /* Some Variables required in the macro */
4737: Mat A = aij->A;
4738: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
4739: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4740: PetscScalar *aa = a->a;
4741: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4742: Mat B = aij->B;
4743: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
4744: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4745: PetscScalar *ba = b->a;
4747: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4748: PetscInt nonew = a->nonew;
4749: PetscScalar *ap1,*ap2;
4752: for (i=0; i<m; i++) {
4753: if (im[i] < 0) continue;
4754: #if defined(PETSC_USE_DEBUG)
4755: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4756: #endif
4757: if (im[i] >= rstart && im[i] < rend) {
4758: row = im[i] - rstart;
4759: lastcol1 = -1;
4760: rp1 = aj + ai[row];
4761: ap1 = aa + ai[row];
4762: rmax1 = aimax[row];
4763: nrow1 = ailen[row];
4764: low1 = 0;
4765: high1 = nrow1;
4766: lastcol2 = -1;
4767: rp2 = bj + bi[row];
4768: ap2 = ba + bi[row];
4769: rmax2 = bimax[row];
4770: nrow2 = bilen[row];
4771: low2 = 0;
4772: high2 = nrow2;
4774: for (j=0; j<n; j++) {
4775: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4776: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4777: if (in[j] >= cstart && in[j] < cend){
4778: col = in[j] - cstart;
4779: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4780: } else if (in[j] < 0) continue;
4781: #if defined(PETSC_USE_DEBUG)
4782: else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4783: #endif
4784: else {
4785: if (mat->was_assembled) {
4786: if (!aij->colmap) {
4787: CreateColmap_MPIAIJ_Private(mat);
4788: }
4789: #if defined (PETSC_USE_CTABLE)
4790: PetscTableFind(aij->colmap,in[j]+1,&col);
4791: col--;
4792: #else
4793: col = aij->colmap[in[j]] - 1;
4794: #endif
4795: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4796: DisAssemble_MPIAIJ(mat);
4797: col = in[j];
4798: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4799: B = aij->B;
4800: b = (Mat_SeqAIJ*)B->data;
4801: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4802: rp2 = bj + bi[row];
4803: ap2 = ba + bi[row];
4804: rmax2 = bimax[row];
4805: nrow2 = bilen[row];
4806: low2 = 0;
4807: high2 = nrow2;
4808: bm = aij->B->rmap.n;
4809: ba = b->a;
4810: }
4811: } else col = in[j];
4812: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4813: }
4814: }
4815: } else {
4816: if (!aij->donotstash) {
4817: if (roworiented) {
4818: if (ignorezeroentries && v[i*n] == 0.0) continue;
4819: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
4820: } else {
4821: if (ignorezeroentries && v[i] == 0.0) continue;
4822: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
4823: }
4824: }
4825: }
4826: }}
4827: PetscFunctionReturnVoid();
4828: }