Actual source code: mpiaij.c
petsc-dev 2014-02-02
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petsc-private/vecimpl.h>
4: #include <petscblaslapack.h>
5: #include <petscsf.h>
7: /*MC
8: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11: and MATMPIAIJ otherwise. As a result, for single process communicators,
12: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13: for communicators controlling multiple processes. It is recommended that you call both of
14: the above preallocation routines for simplicity.
16: Options Database Keys:
17: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19: Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20: enough exist.
22: Level: beginner
24: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25: M*/
27: /*MC
28: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
32: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33: for communicators controlling multiple processes. It is recommended that you call both of
34: the above preallocation routines for simplicity.
36: Options Database Keys:
37: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39: Level: beginner
41: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42: M*/
46: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47: {
48: PetscErrorCode ierr;
49: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
50: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
51: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
52: const PetscInt *ia,*ib;
53: const MatScalar *aa,*bb;
54: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
55: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
58: *keptrows = 0;
59: ia = a->i;
60: ib = b->i;
61: for (i=0; i<m; i++) {
62: na = ia[i+1] - ia[i];
63: nb = ib[i+1] - ib[i];
64: if (!na && !nb) {
65: cnt++;
66: goto ok1;
67: }
68: aa = a->a + ia[i];
69: for (j=0; j<na; j++) {
70: if (aa[j] != 0.0) goto ok1;
71: }
72: bb = b->a + ib[i];
73: for (j=0; j <nb; j++) {
74: if (bb[j] != 0.0) goto ok1;
75: }
76: cnt++;
77: ok1:;
78: }
79: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));
80: if (!n0rows) return(0);
81: PetscMalloc1((M->rmap->n-cnt),&rows);
82: cnt = 0;
83: for (i=0; i<m; i++) {
84: na = ia[i+1] - ia[i];
85: nb = ib[i+1] - ib[i];
86: if (!na && !nb) continue;
87: aa = a->a + ia[i];
88: for (j=0; j<na;j++) {
89: if (aa[j] != 0.0) {
90: rows[cnt++] = rstart + i;
91: goto ok2;
92: }
93: }
94: bb = b->a + ib[i];
95: for (j=0; j<nb; j++) {
96: if (bb[j] != 0.0) {
97: rows[cnt++] = rstart + i;
98: goto ok2;
99: }
100: }
101: ok2:;
102: }
103: ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);
104: return(0);
105: }
109: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110: {
111: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data;
113: PetscInt i,rstart,nrows,*rows;
116: *zrows = NULL;
117: MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);
118: MatGetOwnershipRange(M,&rstart,NULL);
119: for (i=0; i<nrows; i++) rows[i] += rstart;
120: ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);
121: return(0);
122: }
126: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127: {
129: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
130: PetscInt i,n,*garray = aij->garray;
131: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
132: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
133: PetscReal *work;
136: MatGetSize(A,NULL,&n);
137: PetscCalloc1(n,&work);
138: if (type == NORM_2) {
139: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141: }
142: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144: }
145: } else if (type == NORM_1) {
146: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148: }
149: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151: }
152: } else if (type == NORM_INFINITY) {
153: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155: }
156: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158: }
160: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161: if (type == NORM_INFINITY) {
162: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
163: } else {
164: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
165: }
166: PetscFree(work);
167: if (type == NORM_2) {
168: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169: }
170: return(0);
171: }
175: /*
176: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179: Only for square matrices
181: Used by a preconditioner, hence PETSC_EXTERN
182: */
183: PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184: {
185: PetscMPIInt rank,size;
186: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
188: Mat mat;
189: Mat_SeqAIJ *gmata;
190: PetscMPIInt tag;
191: MPI_Status status;
192: PetscBool aij;
193: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
196: MPI_Comm_rank(comm,&rank);
197: MPI_Comm_size(comm,&size);
198: if (!rank) {
199: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
200: if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201: }
202: if (reuse == MAT_INITIAL_MATRIX) {
203: MatCreate(comm,&mat);
204: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
205: if (!rank) {
206: bses[0] = gmat->rmap->bs;
207: bses[1] = gmat->cmap->bs;
208: }
209: MPI_Bcast(bses,2,MPIU_INT,0,comm);
210: MatSetBlockSizes(mat,bses[0],bses[1]);
211: MatSetType(mat,MATAIJ);
212: PetscMalloc1((size+1),&rowners);
213: PetscMalloc2(m,&dlens,m,&olens);
214: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
216: rowners[0] = 0;
217: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
218: rstart = rowners[rank];
219: rend = rowners[rank+1];
220: PetscObjectGetNewTag((PetscObject)mat,&tag);
221: if (!rank) {
222: gmata = (Mat_SeqAIJ*) gmat->data;
223: /* send row lengths to all processors */
224: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
225: for (i=1; i<size; i++) {
226: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
227: }
228: /* determine number diagonal and off-diagonal counts */
229: PetscMemzero(olens,m*sizeof(PetscInt));
230: PetscCalloc1(m,&ld);
231: jj = 0;
232: for (i=0; i<m; i++) {
233: for (j=0; j<dlens[i]; j++) {
234: if (gmata->j[jj] < rstart) ld[i]++;
235: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236: jj++;
237: }
238: }
239: /* send column indices to other processes */
240: for (i=1; i<size; i++) {
241: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
243: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
244: }
246: /* send numerical values to other processes */
247: for (i=1; i<size; i++) {
248: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
250: }
251: gmataa = gmata->a;
252: gmataj = gmata->j;
254: } else {
255: /* receive row lengths */
256: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
257: /* receive column indices */
258: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
259: PetscMalloc2(nz,&gmataa,nz,&gmataj);
260: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
261: /* determine number diagonal and off-diagonal counts */
262: PetscMemzero(olens,m*sizeof(PetscInt));
263: PetscCalloc1(m,&ld);
264: jj = 0;
265: for (i=0; i<m; i++) {
266: for (j=0; j<dlens[i]; j++) {
267: if (gmataj[jj] < rstart) ld[i]++;
268: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
269: jj++;
270: }
271: }
272: /* receive numerical values */
273: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
274: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
275: }
276: /* set preallocation */
277: for (i=0; i<m; i++) {
278: dlens[i] -= olens[i];
279: }
280: MatSeqAIJSetPreallocation(mat,0,dlens);
281: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
283: for (i=0; i<m; i++) {
284: dlens[i] += olens[i];
285: }
286: cnt = 0;
287: for (i=0; i<m; i++) {
288: row = rstart + i;
289: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
290: cnt += dlens[i];
291: }
292: if (rank) {
293: PetscFree2(gmataa,gmataj);
294: }
295: PetscFree2(dlens,olens);
296: PetscFree(rowners);
298: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
300: *inmat = mat;
301: } else { /* column indices are already set; only need to move over numerical values from process 0 */
302: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
303: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
304: mat = *inmat;
305: PetscObjectGetNewTag((PetscObject)mat,&tag);
306: if (!rank) {
307: /* send numerical values to other processes */
308: gmata = (Mat_SeqAIJ*) gmat->data;
309: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
310: gmataa = gmata->a;
311: for (i=1; i<size; i++) {
312: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
314: }
315: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
316: } else {
317: /* receive numerical values from process 0*/
318: nz = Ad->nz + Ao->nz;
319: PetscMalloc1(nz,&gmataa); gmataarestore = gmataa;
320: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
321: }
322: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
323: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
324: ad = Ad->a;
325: ao = Ao->a;
326: if (mat->rmap->n) {
327: i = 0;
328: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
329: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
330: }
331: for (i=1; i<mat->rmap->n; i++) {
332: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
333: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
334: }
335: i--;
336: if (mat->rmap->n) {
337: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));
338: }
339: if (rank) {
340: PetscFree(gmataarestore);
341: }
342: }
343: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
344: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
345: return(0);
346: }
348: /*
349: Local utility routine that creates a mapping from the global column
350: number to the local number in the off-diagonal part of the local
351: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
352: a slightly higher hash table cost; without it it is not scalable (each processor
353: has an order N integer array but is fast to acess.
354: */
357: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358: {
359: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
361: PetscInt n = aij->B->cmap->n,i;
364: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365: #if defined(PETSC_USE_CTABLE)
366: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
367: for (i=0; i<n; i++) {
368: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
369: }
370: #else
371: PetscCalloc1((mat->cmap->N+1),&aij->colmap);
372: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));
373: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
374: #endif
375: return(0);
376: }
378: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
379: { \
380: if (col <= lastcol1) low1 = 0; \
381: else high1 = nrow1; \
382: lastcol1 = col;\
383: while (high1-low1 > 5) { \
384: t = (low1+high1)/2; \
385: if (rp1[t] > col) high1 = t; \
386: else low1 = t; \
387: } \
388: for (_i=low1; _i<high1; _i++) { \
389: if (rp1[_i] > col) break; \
390: if (rp1[_i] == col) { \
391: if (addv == ADD_VALUES) ap1[_i] += value; \
392: else ap1[_i] = value; \
393: goto a_noinsert; \
394: } \
395: } \
396: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
398: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400: N = nrow1++ - 1; a->nz++; high1++; \
401: /* shift up all the later entries in this row */ \
402: for (ii=N; ii>=_i; ii--) { \
403: rp1[ii+1] = rp1[ii]; \
404: ap1[ii+1] = ap1[ii]; \
405: } \
406: rp1[_i] = col; \
407: ap1[_i] = value; \
408: a_noinsert: ; \
409: ailen[row] = nrow1; \
410: }
413: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414: { \
415: if (col <= lastcol2) low2 = 0; \
416: else high2 = nrow2; \
417: lastcol2 = col; \
418: while (high2-low2 > 5) { \
419: t = (low2+high2)/2; \
420: if (rp2[t] > col) high2 = t; \
421: else low2 = t; \
422: } \
423: for (_i=low2; _i<high2; _i++) { \
424: if (rp2[_i] > col) break; \
425: if (rp2[_i] == col) { \
426: if (addv == ADD_VALUES) ap2[_i] += value; \
427: else ap2[_i] = value; \
428: goto b_noinsert; \
429: } \
430: } \
431: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
432: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
433: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
434: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
435: N = nrow2++ - 1; b->nz++; high2++; \
436: /* shift up all the later entries in this row */ \
437: for (ii=N; ii>=_i; ii--) { \
438: rp2[ii+1] = rp2[ii]; \
439: ap2[ii+1] = ap2[ii]; \
440: } \
441: rp2[_i] = col; \
442: ap2[_i] = value; \
443: b_noinsert: ; \
444: bilen[row] = nrow2; \
445: }
449: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
450: {
451: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
452: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
454: PetscInt l,*garray = mat->garray,diag;
457: /* code only works for square matrices A */
459: /* find size of row to the left of the diagonal part */
460: MatGetOwnershipRange(A,&diag,0);
461: row = row - diag;
462: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
463: if (garray[b->j[b->i[row]+l]] > diag) break;
464: }
465: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
467: /* diagonal part */
468: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
470: /* right of diagonal part */
471: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
472: return(0);
473: }
477: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
478: {
479: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
480: PetscScalar value;
482: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
483: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
484: PetscBool roworiented = aij->roworiented;
486: /* Some Variables required in the macro */
487: Mat A = aij->A;
488: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
489: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
490: MatScalar *aa = a->a;
491: PetscBool ignorezeroentries = a->ignorezeroentries;
492: Mat B = aij->B;
493: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
494: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
495: MatScalar *ba = b->a;
497: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
498: PetscInt nonew;
499: MatScalar *ap1,*ap2;
503: for (i=0; i<m; i++) {
504: if (im[i] < 0) continue;
505: #if defined(PETSC_USE_DEBUG)
506: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
507: #endif
508: if (im[i] >= rstart && im[i] < rend) {
509: row = im[i] - rstart;
510: lastcol1 = -1;
511: rp1 = aj + ai[row];
512: ap1 = aa + ai[row];
513: rmax1 = aimax[row];
514: nrow1 = ailen[row];
515: low1 = 0;
516: high1 = nrow1;
517: lastcol2 = -1;
518: rp2 = bj + bi[row];
519: ap2 = ba + bi[row];
520: rmax2 = bimax[row];
521: nrow2 = bilen[row];
522: low2 = 0;
523: high2 = nrow2;
525: for (j=0; j<n; j++) {
526: if (v) {
527: if (roworiented) value = v[i*n+j];
528: else value = v[i+j*m];
529: } else value = 0.0;
530: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
531: if (in[j] >= cstart && in[j] < cend) {
532: col = in[j] - cstart;
533: nonew = a->nonew;
534: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
535: } else if (in[j] < 0) continue;
536: #if defined(PETSC_USE_DEBUG)
537: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
538: #endif
539: else {
540: if (mat->was_assembled) {
541: if (!aij->colmap) {
542: MatCreateColmap_MPIAIJ_Private(mat);
543: }
544: #if defined(PETSC_USE_CTABLE)
545: PetscTableFind(aij->colmap,in[j]+1,&col);
546: col--;
547: #else
548: col = aij->colmap[in[j]] - 1;
549: #endif
550: if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
551: MatDisAssemble_MPIAIJ(mat);
552: col = in[j];
553: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
554: B = aij->B;
555: b = (Mat_SeqAIJ*)B->data;
556: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
557: rp2 = bj + bi[row];
558: ap2 = ba + bi[row];
559: rmax2 = bimax[row];
560: nrow2 = bilen[row];
561: low2 = 0;
562: high2 = nrow2;
563: bm = aij->B->rmap->n;
564: ba = b->a;
565: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
566: } else col = in[j];
567: nonew = b->nonew;
568: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
569: }
570: }
571: } else {
572: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
573: if (!aij->donotstash) {
574: mat->assembled = PETSC_FALSE;
575: if (roworiented) {
576: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
577: } else {
578: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
579: }
580: }
581: }
582: }
583: return(0);
584: }
588: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
589: {
590: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
592: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
593: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
596: for (i=0; i<m; i++) {
597: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
598: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
599: if (idxm[i] >= rstart && idxm[i] < rend) {
600: row = idxm[i] - rstart;
601: for (j=0; j<n; j++) {
602: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
603: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
604: if (idxn[j] >= cstart && idxn[j] < cend) {
605: col = idxn[j] - cstart;
606: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
607: } else {
608: if (!aij->colmap) {
609: MatCreateColmap_MPIAIJ_Private(mat);
610: }
611: #if defined(PETSC_USE_CTABLE)
612: PetscTableFind(aij->colmap,idxn[j]+1,&col);
613: col--;
614: #else
615: col = aij->colmap[idxn[j]] - 1;
616: #endif
617: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
618: else {
619: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
620: }
621: }
622: }
623: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
624: }
625: return(0);
626: }
628: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
632: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
633: {
634: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
636: PetscInt nstash,reallocs;
637: InsertMode addv;
640: if (aij->donotstash || mat->nooffprocentries) return(0);
642: /* make sure all processors are either in INSERTMODE or ADDMODE */
643: MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
644: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645: mat->insertmode = addv; /* in case this processor had no cache */
647: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
648: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
649: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
650: return(0);
651: }
655: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656: {
657: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
658: Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data;
660: PetscMPIInt n;
661: PetscInt i,j,rstart,ncols,flg;
662: PetscInt *row,*col;
663: PetscBool other_disassembled;
664: PetscScalar *val;
665: InsertMode addv = mat->insertmode;
667: /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
670: if (!aij->donotstash && !mat->nooffprocentries) {
671: while (1) {
672: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
673: if (!flg) break;
675: for (i=0; i<n; ) {
676: /* Now identify the consecutive vals belonging to the same row */
677: for (j=i,rstart=row[j]; j<n; j++) {
678: if (row[j] != rstart) break;
679: }
680: if (j < n) ncols = j-i;
681: else ncols = n-i;
682: /* Now assemble all these values with a single function call */
683: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
685: i = j;
686: }
687: }
688: MatStashScatterEnd_Private(&mat->stash);
689: }
690: MatAssemblyBegin(aij->A,mode);
691: MatAssemblyEnd(aij->A,mode);
693: /* determine if any processor has disassembled, if so we must
694: also disassemble ourselfs, in order that we may reassemble. */
695: /*
696: if nonzero structure of submatrix B cannot change then we know that
697: no processor disassembled thus we can skip this stuff
698: */
699: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
700: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
701: if (mat->was_assembled && !other_disassembled) {
702: MatDisAssemble_MPIAIJ(mat);
703: }
704: }
705: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
706: MatSetUpMultiply_MPIAIJ(mat);
707: }
708: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
709: MatAssemblyBegin(aij->B,mode);
710: MatAssemblyEnd(aij->B,mode);
712: PetscFree2(aij->rowvalues,aij->rowindices);
714: aij->rowvalues = 0;
716: /* used by MatAXPY() */
717: a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
718: a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
720: VecDestroy(&aij->diag);
721: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
722: return(0);
723: }
727: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
728: {
729: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
733: MatZeroEntries(l->A);
734: MatZeroEntries(l->B);
735: return(0);
736: }
740: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
741: {
742: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
743: PetscInt *owners = A->rmap->range;
744: PetscInt n = A->rmap->n;
745: PetscMPIInt size = mat->size;
746: PetscSF sf;
747: PetscInt *lrows;
748: PetscSFNode *rrows;
749: PetscInt lastidx = -1, r, p = 0, len = 0;
753: /* Create SF where leaves are input rows and roots are owned rows */
754: PetscMalloc1(n, &lrows);
755: for (r = 0; r < n; ++r) lrows[r] = -1;
756: PetscMalloc1(N, &rrows);
757: for (r = 0; r < N; ++r) {
758: const PetscInt idx = rows[r];
759: PetscBool found = PETSC_FALSE;
760: /* Trick for efficient searching for sorted rows */
761: if (lastidx > idx) p = 0;
762: lastidx = idx;
763: for (; p < size; ++p) {
764: if (idx >= owners[p] && idx < owners[p+1]) {
765: rrows[r].rank = p;
766: rrows[r].index = rows[r] - owners[p];
767: found = PETSC_TRUE;
768: break;
769: }
770: }
771: if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
772: }
773: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
774: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
775: /* Collect flags for rows to be zeroed */
776: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
777: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
778: PetscSFDestroy(&sf);
779: /* Compress and put in row numbers */
780: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781: /* fix right hand side if needed */
782: if (x && b) {
783: const PetscScalar *xx;
784: PetscScalar *bb;
786: VecGetArrayRead(x, &xx);
787: VecGetArray(b, &bb);
788: for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
789: VecRestoreArrayRead(x, &xx);
790: VecRestoreArray(b, &bb);
791: }
792: /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
793: MatZeroRows(mat->B, len, lrows, 0.0, 0,0);
794: if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
795: MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);
796: } else if (diag != 0.0) {
797: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
798: if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
799: for (r = 0; r < len; ++r) {
800: const PetscInt row = lrows[r] + A->rmap->rstart;
801: MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);
802: }
803: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
804: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
805: } else {
806: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
807: }
808: PetscFree(lrows);
809: return(0);
810: }
814: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
815: {
816: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
817: PetscErrorCode ierr;
818: PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1;
819: PetscInt i,j,r,m,p = 0,len = 0;
820: PetscInt *lrows,*owners = A->rmap->range;
821: PetscSFNode *rrows;
822: PetscSF sf;
823: const PetscScalar *xx;
824: PetscScalar *bb,*mask;
825: Vec xmask,lmask;
826: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
827: const PetscInt *aj, *ii,*ridx;
828: PetscScalar *aa;
829: #if defined(PETSC_DEBUG)
830: PetscBool found = PETSC_FALSE;
831: #endif
834: /* Create SF where leaves are input rows and roots are owned rows */
835: PetscMalloc1(n, &lrows);
836: for (r = 0; r < n; ++r) lrows[r] = -1;
837: PetscMalloc1(N, &rrows);
838: for (r = 0; r < N; ++r) {
839: const PetscInt idx = rows[r];
840: PetscBool found = PETSC_FALSE;
841: /* Trick for efficient searching for sorted rows */
842: if (lastidx > idx) p = 0;
843: lastidx = idx;
844: for (; p < size; ++p) {
845: if (idx >= owners[p] && idx < owners[p+1]) {
846: rrows[r].rank = p;
847: rrows[r].index = rows[r] - owners[p];
848: found = PETSC_TRUE;
849: break;
850: }
851: }
852: if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
853: }
854: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
855: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
856: /* Collect flags for rows to be zeroed */
857: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
858: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
859: PetscSFDestroy(&sf);
860: /* Compress and put in row numbers */
861: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
862: /* zero diagonal part of matrix */
863: MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
864: /* handle off diagonal part of matrix */
865: MatGetVecs(A,&xmask,NULL);
866: VecDuplicate(l->lvec,&lmask);
867: VecGetArray(xmask,&bb);
868: for (i=0; i<len; i++) bb[lrows[i]] = 1;
869: VecRestoreArray(xmask,&bb);
870: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
871: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
872: VecDestroy(&xmask);
873: if (x) {
874: VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
875: VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
876: VecGetArrayRead(l->lvec,&xx);
877: VecGetArray(b,&bb);
878: }
879: VecGetArray(lmask,&mask);
880: /* remove zeroed rows of off diagonal matrix */
881: ii = aij->i;
882: for (i=0; i<len; i++) {
883: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
884: }
885: /* loop over all elements of off process part of matrix zeroing removed columns*/
886: if (aij->compressedrow.use) {
887: m = aij->compressedrow.nrows;
888: ii = aij->compressedrow.i;
889: ridx = aij->compressedrow.rindex;
890: for (i=0; i<m; i++) {
891: n = ii[i+1] - ii[i];
892: aj = aij->j + ii[i];
893: aa = aij->a + ii[i];
895: for (j=0; j<n; j++) {
896: if (PetscAbsScalar(mask[*aj])) {
897: if (b) bb[*ridx] -= *aa*xx[*aj];
898: *aa = 0.0;
899: }
900: aa++;
901: aj++;
902: }
903: ridx++;
904: }
905: } else { /* do not use compressed row format */
906: m = l->B->rmap->n;
907: for (i=0; i<m; i++) {
908: n = ii[i+1] - ii[i];
909: aj = aij->j + ii[i];
910: aa = aij->a + ii[i];
911: for (j=0; j<n; j++) {
912: if (PetscAbsScalar(mask[*aj])) {
913: if (b) bb[i] -= *aa*xx[*aj];
914: *aa = 0.0;
915: }
916: aa++;
917: aj++;
918: }
919: }
920: }
921: if (x) {
922: VecRestoreArray(b,&bb);
923: VecRestoreArrayRead(l->lvec,&xx);
924: }
925: VecRestoreArray(lmask,&mask);
926: VecDestroy(&lmask);
927: PetscFree(lrows);
928: return(0);
929: }
933: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934: {
935: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
937: PetscInt nt;
940: VecGetLocalSize(xx,&nt);
941: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
943: (*a->A->ops->mult)(a->A,xx,yy);
944: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
945: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
946: return(0);
947: }
951: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952: {
953: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
957: MatMultDiagonalBlock(a->A,bb,xx);
958: return(0);
959: }
963: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
964: {
965: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
969: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
970: (*a->A->ops->multadd)(a->A,xx,yy,zz);
971: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
972: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
973: return(0);
974: }
978: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
979: {
980: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
982: PetscBool merged;
985: VecScatterGetMerged(a->Mvctx,&merged);
986: /* do nondiagonal part */
987: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
988: if (!merged) {
989: /* send it on its way */
990: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
991: /* do local part */
992: (*a->A->ops->multtranspose)(a->A,xx,yy);
993: /* receive remote parts: note this assumes the values are not actually */
994: /* added in yy until the next line, */
995: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
996: } else {
997: /* do local part */
998: (*a->A->ops->multtranspose)(a->A,xx,yy);
999: /* send it on its way */
1000: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1001: /* values actually were received in the Begin() but we need to call this nop */
1002: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1003: }
1004: return(0);
1005: }
1009: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1010: {
1011: MPI_Comm comm;
1012: Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1013: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1014: IS Me,Notme;
1016: PetscInt M,N,first,last,*notme,i;
1017: PetscMPIInt size;
1020: /* Easy test: symmetric diagonal block */
1021: Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1022: MatIsTranspose(Adia,Bdia,tol,f);
1023: if (!*f) return(0);
1024: PetscObjectGetComm((PetscObject)Amat,&comm);
1025: MPI_Comm_size(comm,&size);
1026: if (size == 1) return(0);
1028: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1029: MatGetSize(Amat,&M,&N);
1030: MatGetOwnershipRange(Amat,&first,&last);
1031: PetscMalloc1((N-last+first),¬me);
1032: for (i=0; i<first; i++) notme[i] = i;
1033: for (i=last; i<M; i++) notme[i-last+first] = i;
1034: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1035: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1036: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1037: Aoff = Aoffs[0];
1038: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1039: Boff = Boffs[0];
1040: MatIsTranspose(Aoff,Boff,tol,f);
1041: MatDestroyMatrices(1,&Aoffs);
1042: MatDestroyMatrices(1,&Boffs);
1043: ISDestroy(&Me);
1044: ISDestroy(&Notme);
1045: PetscFree(notme);
1046: return(0);
1047: }
1051: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1052: {
1053: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1057: /* do nondiagonal part */
1058: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1059: /* send it on its way */
1060: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1061: /* do local part */
1062: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1063: /* receive remote parts */
1064: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1065: return(0);
1066: }
1068: /*
1069: This only works correctly for square matrices where the subblock A->A is the
1070: diagonal block
1071: */
1074: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1075: {
1077: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1080: if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1081: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1082: MatGetDiagonal(a->A,v);
1083: return(0);
1084: }
1088: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1089: {
1090: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1094: MatScale(a->A,aa);
1095: MatScale(a->B,aa);
1096: return(0);
1097: }
1101: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1102: {
1103: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1107: #if defined(PETSC_USE_LOG)
1108: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1109: #endif
1110: MatStashDestroy_Private(&mat->stash);
1111: VecDestroy(&aij->diag);
1112: MatDestroy(&aij->A);
1113: MatDestroy(&aij->B);
1114: #if defined(PETSC_USE_CTABLE)
1115: PetscTableDestroy(&aij->colmap);
1116: #else
1117: PetscFree(aij->colmap);
1118: #endif
1119: PetscFree(aij->garray);
1120: VecDestroy(&aij->lvec);
1121: VecScatterDestroy(&aij->Mvctx);
1122: PetscFree2(aij->rowvalues,aij->rowindices);
1123: PetscFree(aij->ld);
1124: PetscFree(mat->data);
1126: PetscObjectChangeTypeName((PetscObject)mat,0);
1127: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1128: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1129: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
1130: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);
1131: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);
1132: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);
1133: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1134: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);
1135: return(0);
1136: }
1140: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1141: {
1142: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1143: Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data;
1144: Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
1146: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1147: int fd;
1148: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1149: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1150: PetscScalar *column_values;
1151: PetscInt message_count,flowcontrolcount;
1152: FILE *file;
1155: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1156: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
1157: nz = A->nz + B->nz;
1158: if (!rank) {
1159: header[0] = MAT_FILE_CLASSID;
1160: header[1] = mat->rmap->N;
1161: header[2] = mat->cmap->N;
1163: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1164: PetscViewerBinaryGetDescriptor(viewer,&fd);
1165: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1166: /* get largest number of rows any processor has */
1167: rlen = mat->rmap->n;
1168: range = mat->rmap->range;
1169: for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1170: } else {
1171: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1172: rlen = mat->rmap->n;
1173: }
1175: /* load up the local row counts */
1176: PetscMalloc1((rlen+1),&row_lengths);
1177: for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1179: /* store the row lengths to the file */
1180: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1181: if (!rank) {
1182: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1183: for (i=1; i<size; i++) {
1184: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1185: rlen = range[i+1] - range[i];
1186: MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1187: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1188: }
1189: PetscViewerFlowControlEndMaster(viewer,&message_count);
1190: } else {
1191: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1192: MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1193: PetscViewerFlowControlEndWorker(viewer,&message_count);
1194: }
1195: PetscFree(row_lengths);
1197: /* load up the local column indices */
1198: nzmax = nz; /* th processor needs space a largest processor needs */
1199: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1200: PetscMalloc1((nzmax+1),&column_indices);
1201: cnt = 0;
1202: for (i=0; i<mat->rmap->n; i++) {
1203: for (j=B->i[i]; j<B->i[i+1]; j++) {
1204: if ((col = garray[B->j[j]]) > cstart) break;
1205: column_indices[cnt++] = col;
1206: }
1207: for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1208: for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1209: }
1210: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1212: /* store the column indices to the file */
1213: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1214: if (!rank) {
1215: MPI_Status status;
1216: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1217: for (i=1; i<size; i++) {
1218: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1219: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1220: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1221: MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1222: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1223: }
1224: PetscViewerFlowControlEndMaster(viewer,&message_count);
1225: } else {
1226: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1227: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1228: MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1229: PetscViewerFlowControlEndWorker(viewer,&message_count);
1230: }
1231: PetscFree(column_indices);
1233: /* load up the local column values */
1234: PetscMalloc1((nzmax+1),&column_values);
1235: cnt = 0;
1236: for (i=0; i<mat->rmap->n; i++) {
1237: for (j=B->i[i]; j<B->i[i+1]; j++) {
1238: if (garray[B->j[j]] > cstart) break;
1239: column_values[cnt++] = B->a[j];
1240: }
1241: for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1242: for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1243: }
1244: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1246: /* store the column values to the file */
1247: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1248: if (!rank) {
1249: MPI_Status status;
1250: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1251: for (i=1; i<size; i++) {
1252: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1253: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1254: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1255: MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));
1256: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1257: }
1258: PetscViewerFlowControlEndMaster(viewer,&message_count);
1259: } else {
1260: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1261: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1262: MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1263: PetscViewerFlowControlEndWorker(viewer,&message_count);
1264: }
1265: PetscFree(column_values);
1267: PetscViewerBinaryGetInfoPointer(viewer,&file);
1268: if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1269: return(0);
1270: }
1272: #include <petscdraw.h>
1275: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1276: {
1277: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1278: PetscErrorCode ierr;
1279: PetscMPIInt rank = aij->rank,size = aij->size;
1280: PetscBool isdraw,iascii,isbinary;
1281: PetscViewer sviewer;
1282: PetscViewerFormat format;
1285: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1286: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1287: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1288: if (iascii) {
1289: PetscViewerGetFormat(viewer,&format);
1290: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1291: MatInfo info;
1292: PetscBool inodes;
1294: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1295: MatGetInfo(mat,MAT_LOCAL,&info);
1296: MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);
1297: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1298: if (!inodes) {
1299: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1300: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1301: } else {
1302: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1303: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1304: }
1305: MatGetInfo(aij->A,MAT_LOCAL,&info);
1306: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1307: MatGetInfo(aij->B,MAT_LOCAL,&info);
1308: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1309: PetscViewerFlush(viewer);
1310: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1311: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1312: VecScatterView(aij->Mvctx,viewer);
1313: return(0);
1314: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1315: PetscInt inodecount,inodelimit,*inodes;
1316: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1317: if (inodes) {
1318: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1319: } else {
1320: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1321: }
1322: return(0);
1323: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1324: return(0);
1325: }
1326: } else if (isbinary) {
1327: if (size == 1) {
1328: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1329: MatView(aij->A,viewer);
1330: } else {
1331: MatView_MPIAIJ_Binary(mat,viewer);
1332: }
1333: return(0);
1334: } else if (isdraw) {
1335: PetscDraw draw;
1336: PetscBool isnull;
1337: PetscViewerDrawGetDraw(viewer,0,&draw);
1338: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1339: }
1341: if (size == 1) {
1342: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1343: MatView(aij->A,viewer);
1344: } else {
1345: /* assemble the entire matrix onto first processor. */
1346: Mat A;
1347: Mat_SeqAIJ *Aloc;
1348: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1349: MatScalar *a;
1351: if (mat->rmap->N > 1024) {
1352: PetscBool flg = PETSC_FALSE;
1354: PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);
1355: if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1356: }
1358: MatCreate(PetscObjectComm((PetscObject)mat),&A);
1359: if (!rank) {
1360: MatSetSizes(A,M,N,M,N);
1361: } else {
1362: MatSetSizes(A,0,0,M,N);
1363: }
1364: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1365: MatSetType(A,MATMPIAIJ);
1366: MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);
1367: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1368: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
1370: /* copy over the A part */
1371: Aloc = (Mat_SeqAIJ*)aij->A->data;
1372: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1373: row = mat->rmap->rstart;
1374: for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1375: for (i=0; i<m; i++) {
1376: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1377: row++;
1378: a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1379: }
1380: aj = Aloc->j;
1381: for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1383: /* copy over the B part */
1384: Aloc = (Mat_SeqAIJ*)aij->B->data;
1385: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1386: row = mat->rmap->rstart;
1387: PetscMalloc1((ai[m]+1),&cols);
1388: ct = cols;
1389: for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1390: for (i=0; i<m; i++) {
1391: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1392: row++;
1393: a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1394: }
1395: PetscFree(ct);
1396: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1397: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1398: /*
1399: Everyone has to call to draw the matrix since the graphics waits are
1400: synchronized across all processors that share the PetscDraw object
1401: */
1402: PetscViewerGetSingleton(viewer,&sviewer);
1403: if (!rank) {
1404: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1405: /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1406: PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1407: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1408: }
1409: PetscViewerRestoreSingleton(viewer,&sviewer);
1410: MatDestroy(&A);
1411: }
1412: return(0);
1413: }
1417: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1418: {
1420: PetscBool iascii,isdraw,issocket,isbinary;
1423: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1424: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1425: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1426: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1427: if (iascii || isdraw || isbinary || issocket) {
1428: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1429: }
1430: return(0);
1431: }
1435: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1436: {
1437: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1439: Vec bb1 = 0;
1440: PetscBool hasop;
1443: if (flag == SOR_APPLY_UPPER) {
1444: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1445: return(0);
1446: }
1448: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1449: VecDuplicate(bb,&bb1);
1450: }
1452: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1453: if (flag & SOR_ZERO_INITIAL_GUESS) {
1454: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1455: its--;
1456: }
1458: while (its--) {
1459: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1460: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1462: /* update rhs: bb1 = bb - B*x */
1463: VecScale(mat->lvec,-1.0);
1464: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1466: /* local sweep */
1467: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1468: }
1469: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1470: if (flag & SOR_ZERO_INITIAL_GUESS) {
1471: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1472: its--;
1473: }
1474: while (its--) {
1475: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1476: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1478: /* update rhs: bb1 = bb - B*x */
1479: VecScale(mat->lvec,-1.0);
1480: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1482: /* local sweep */
1483: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1484: }
1485: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1486: if (flag & SOR_ZERO_INITIAL_GUESS) {
1487: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1488: its--;
1489: }
1490: while (its--) {
1491: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1492: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1494: /* update rhs: bb1 = bb - B*x */
1495: VecScale(mat->lvec,-1.0);
1496: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1498: /* local sweep */
1499: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1500: }
1501: } else if (flag & SOR_EISENSTAT) {
1502: Vec xx1;
1504: VecDuplicate(bb,&xx1);
1505: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1507: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1508: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1509: if (!mat->diag) {
1510: MatGetVecs(matin,&mat->diag,NULL);
1511: MatGetDiagonal(matin,mat->diag);
1512: }
1513: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1514: if (hasop) {
1515: MatMultDiagonalBlock(matin,xx,bb1);
1516: } else {
1517: VecPointwiseMult(bb1,mat->diag,xx);
1518: }
1519: VecAYPX(bb1,(omega-2.0)/omega,bb);
1521: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1523: /* local sweep */
1524: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1525: VecAXPY(xx,1.0,xx1);
1526: VecDestroy(&xx1);
1527: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1529: VecDestroy(&bb1);
1530: return(0);
1531: }
1535: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1536: {
1537: Mat aA,aB,Aperm;
1538: const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1539: PetscScalar *aa,*ba;
1540: PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1541: PetscSF rowsf,sf;
1542: IS parcolp = NULL;
1543: PetscBool done;
1547: MatGetLocalSize(A,&m,&n);
1548: ISGetIndices(rowp,&rwant);
1549: ISGetIndices(colp,&cwant);
1550: PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);
1552: /* Invert row permutation to find out where my rows should go */
1553: PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);
1554: PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);
1555: PetscSFSetFromOptions(rowsf);
1556: for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1557: PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1558: PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1560: /* Invert column permutation to find out where my columns should go */
1561: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1562: PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);
1563: PetscSFSetFromOptions(sf);
1564: for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1565: PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1566: PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1567: PetscSFDestroy(&sf);
1569: ISRestoreIndices(rowp,&rwant);
1570: ISRestoreIndices(colp,&cwant);
1571: MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);
1573: /* Find out where my gcols should go */
1574: MatGetSize(aB,NULL,&ng);
1575: PetscMalloc1(ng,&gcdest);
1576: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1577: PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);
1578: PetscSFSetFromOptions(sf);
1579: PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);
1580: PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);
1581: PetscSFDestroy(&sf);
1583: PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);
1584: MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1585: MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1586: for (i=0; i<m; i++) {
1587: PetscInt row = rdest[i],rowner;
1588: PetscLayoutFindOwner(A->rmap,row,&rowner);
1589: for (j=ai[i]; j<ai[i+1]; j++) {
1590: PetscInt cowner,col = cdest[aj[j]];
1591: PetscLayoutFindOwner(A->cmap,col,&cowner); /* Could build an index for the columns to eliminate this search */
1592: if (rowner == cowner) dnnz[i]++;
1593: else onnz[i]++;
1594: }
1595: for (j=bi[i]; j<bi[i+1]; j++) {
1596: PetscInt cowner,col = gcdest[bj[j]];
1597: PetscLayoutFindOwner(A->cmap,col,&cowner);
1598: if (rowner == cowner) dnnz[i]++;
1599: else onnz[i]++;
1600: }
1601: }
1602: PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);
1603: PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);
1604: PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);
1605: PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);
1606: PetscSFDestroy(&rowsf);
1608: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);
1609: MatSeqAIJGetArray(aA,&aa);
1610: MatSeqAIJGetArray(aB,&ba);
1611: for (i=0; i<m; i++) {
1612: PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1613: PetscInt rowlen;
1614: rowlen = ai[i+1] - ai[i];
1615: for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1616: MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);
1617: rowlen = bi[i+1] - bi[i];
1618: for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1619: MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);
1620: }
1621: MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);
1622: MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);
1623: MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1624: MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1625: MatSeqAIJRestoreArray(aA,&aa);
1626: MatSeqAIJRestoreArray(aB,&ba);
1627: PetscFree4(dnnz,onnz,tdnnz,tonnz);
1628: PetscFree3(work,rdest,cdest);
1629: PetscFree(gcdest);
1630: if (parcolp) {ISDestroy(&colp);}
1631: *B = Aperm;
1632: return(0);
1633: }
1637: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1638: {
1639: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1640: Mat A = mat->A,B = mat->B;
1642: PetscReal isend[5],irecv[5];
1645: info->block_size = 1.0;
1646: MatGetInfo(A,MAT_LOCAL,info);
1648: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1649: isend[3] = info->memory; isend[4] = info->mallocs;
1651: MatGetInfo(B,MAT_LOCAL,info);
1653: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1654: isend[3] += info->memory; isend[4] += info->mallocs;
1655: if (flag == MAT_LOCAL) {
1656: info->nz_used = isend[0];
1657: info->nz_allocated = isend[1];
1658: info->nz_unneeded = isend[2];
1659: info->memory = isend[3];
1660: info->mallocs = isend[4];
1661: } else if (flag == MAT_GLOBAL_MAX) {
1662: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1664: info->nz_used = irecv[0];
1665: info->nz_allocated = irecv[1];
1666: info->nz_unneeded = irecv[2];
1667: info->memory = irecv[3];
1668: info->mallocs = irecv[4];
1669: } else if (flag == MAT_GLOBAL_SUM) {
1670: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1672: info->nz_used = irecv[0];
1673: info->nz_allocated = irecv[1];
1674: info->nz_unneeded = irecv[2];
1675: info->memory = irecv[3];
1676: info->mallocs = irecv[4];
1677: }
1678: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1679: info->fill_ratio_needed = 0;
1680: info->factor_mallocs = 0;
1681: return(0);
1682: }
1686: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1687: {
1688: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1692: switch (op) {
1693: case MAT_NEW_NONZERO_LOCATIONS:
1694: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696: case MAT_KEEP_NONZERO_PATTERN:
1697: case MAT_NEW_NONZERO_LOCATION_ERR:
1698: case MAT_USE_INODES:
1699: case MAT_IGNORE_ZERO_ENTRIES:
1700: MatCheckPreallocated(A,1);
1701: MatSetOption(a->A,op,flg);
1702: MatSetOption(a->B,op,flg);
1703: break;
1704: case MAT_ROW_ORIENTED:
1705: a->roworiented = flg;
1707: MatSetOption(a->A,op,flg);
1708: MatSetOption(a->B,op,flg);
1709: break;
1710: case MAT_NEW_DIAGONALS:
1711: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1712: break;
1713: case MAT_IGNORE_OFF_PROC_ENTRIES:
1714: a->donotstash = flg;
1715: break;
1716: case MAT_SPD:
1717: A->spd_set = PETSC_TRUE;
1718: A->spd = flg;
1719: if (flg) {
1720: A->symmetric = PETSC_TRUE;
1721: A->structurally_symmetric = PETSC_TRUE;
1722: A->symmetric_set = PETSC_TRUE;
1723: A->structurally_symmetric_set = PETSC_TRUE;
1724: }
1725: break;
1726: case MAT_SYMMETRIC:
1727: MatSetOption(a->A,op,flg);
1728: break;
1729: case MAT_STRUCTURALLY_SYMMETRIC:
1730: MatSetOption(a->A,op,flg);
1731: break;
1732: case MAT_HERMITIAN:
1733: MatSetOption(a->A,op,flg);
1734: break;
1735: case MAT_SYMMETRY_ETERNAL:
1736: MatSetOption(a->A,op,flg);
1737: break;
1738: default:
1739: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1740: }
1741: return(0);
1742: }
1746: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1747: {
1748: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1749: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1751: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1752: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1753: PetscInt *cmap,*idx_p;
1756: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1757: mat->getrowactive = PETSC_TRUE;
1759: if (!mat->rowvalues && (idx || v)) {
1760: /*
1761: allocate enough space to hold information from the longest row.
1762: */
1763: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1764: PetscInt max = 1,tmp;
1765: for (i=0; i<matin->rmap->n; i++) {
1766: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1767: if (max < tmp) max = tmp;
1768: }
1769: PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);
1770: }
1772: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1773: lrow = row - rstart;
1775: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1776: if (!v) {pvA = 0; pvB = 0;}
1777: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1778: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1779: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1780: nztot = nzA + nzB;
1782: cmap = mat->garray;
1783: if (v || idx) {
1784: if (nztot) {
1785: /* Sort by increasing column numbers, assuming A and B already sorted */
1786: PetscInt imark = -1;
1787: if (v) {
1788: *v = v_p = mat->rowvalues;
1789: for (i=0; i<nzB; i++) {
1790: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1791: else break;
1792: }
1793: imark = i;
1794: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1795: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1796: }
1797: if (idx) {
1798: *idx = idx_p = mat->rowindices;
1799: if (imark > -1) {
1800: for (i=0; i<imark; i++) {
1801: idx_p[i] = cmap[cworkB[i]];
1802: }
1803: } else {
1804: for (i=0; i<nzB; i++) {
1805: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1806: else break;
1807: }
1808: imark = i;
1809: }
1810: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1811: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1812: }
1813: } else {
1814: if (idx) *idx = 0;
1815: if (v) *v = 0;
1816: }
1817: }
1818: *nz = nztot;
1819: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1820: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1821: return(0);
1822: }
1826: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1827: {
1828: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1831: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1832: aij->getrowactive = PETSC_FALSE;
1833: return(0);
1834: }
1838: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1839: {
1840: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1841: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1843: PetscInt i,j,cstart = mat->cmap->rstart;
1844: PetscReal sum = 0.0;
1845: MatScalar *v;
1848: if (aij->size == 1) {
1849: MatNorm(aij->A,type,norm);
1850: } else {
1851: if (type == NORM_FROBENIUS) {
1852: v = amat->a;
1853: for (i=0; i<amat->nz; i++) {
1854: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1855: }
1856: v = bmat->a;
1857: for (i=0; i<bmat->nz; i++) {
1858: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1859: }
1860: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1861: *norm = PetscSqrtReal(*norm);
1862: } else if (type == NORM_1) { /* max column norm */
1863: PetscReal *tmp,*tmp2;
1864: PetscInt *jj,*garray = aij->garray;
1865: PetscCalloc1((mat->cmap->N+1),&tmp);
1866: PetscMalloc1((mat->cmap->N+1),&tmp2);
1867: *norm = 0.0;
1868: v = amat->a; jj = amat->j;
1869: for (j=0; j<amat->nz; j++) {
1870: tmp[cstart + *jj++] += PetscAbsScalar(*v); v++;
1871: }
1872: v = bmat->a; jj = bmat->j;
1873: for (j=0; j<bmat->nz; j++) {
1874: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1875: }
1876: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1877: for (j=0; j<mat->cmap->N; j++) {
1878: if (tmp2[j] > *norm) *norm = tmp2[j];
1879: }
1880: PetscFree(tmp);
1881: PetscFree(tmp2);
1882: } else if (type == NORM_INFINITY) { /* max row norm */
1883: PetscReal ntemp = 0.0;
1884: for (j=0; j<aij->A->rmap->n; j++) {
1885: v = amat->a + amat->i[j];
1886: sum = 0.0;
1887: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1888: sum += PetscAbsScalar(*v); v++;
1889: }
1890: v = bmat->a + bmat->i[j];
1891: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1892: sum += PetscAbsScalar(*v); v++;
1893: }
1894: if (sum > ntemp) ntemp = sum;
1895: }
1896: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
1897: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1898: }
1899: return(0);
1900: }
1904: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1905: {
1906: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1907: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1909: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1910: PetscInt cstart = A->cmap->rstart,ncol;
1911: Mat B;
1912: MatScalar *array;
1915: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1917: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1918: ai = Aloc->i; aj = Aloc->j;
1919: bi = Bloc->i; bj = Bloc->j;
1920: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1921: PetscInt *d_nnz,*g_nnz,*o_nnz;
1922: PetscSFNode *oloc;
1923: PETSC_UNUSED PetscSF sf;
1925: PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);
1926: /* compute d_nnz for preallocation */
1927: PetscMemzero(d_nnz,na*sizeof(PetscInt));
1928: for (i=0; i<ai[ma]; i++) {
1929: d_nnz[aj[i]]++;
1930: aj[i] += cstart; /* global col index to be used by MatSetValues() */
1931: }
1932: /* compute local off-diagonal contributions */
1933: PetscMemzero(g_nnz,nb*sizeof(PetscInt));
1934: for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1935: /* map those to global */
1936: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1937: PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);
1938: PetscSFSetFromOptions(sf);
1939: PetscMemzero(o_nnz,na*sizeof(PetscInt));
1940: PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1941: PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1942: PetscSFDestroy(&sf);
1944: MatCreate(PetscObjectComm((PetscObject)A),&B);
1945: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1946: MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);
1947: MatSetType(B,((PetscObject)A)->type_name);
1948: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
1949: PetscFree4(d_nnz,o_nnz,g_nnz,oloc);
1950: } else {
1951: B = *matout;
1952: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
1953: for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1954: }
1956: /* copy over the A part */
1957: array = Aloc->a;
1958: row = A->rmap->rstart;
1959: for (i=0; i<ma; i++) {
1960: ncol = ai[i+1]-ai[i];
1961: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
1962: row++;
1963: array += ncol; aj += ncol;
1964: }
1965: aj = Aloc->j;
1966: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1968: /* copy over the B part */
1969: PetscCalloc1(bi[mb],&cols);
1970: array = Bloc->a;
1971: row = A->rmap->rstart;
1972: for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1973: cols_tmp = cols;
1974: for (i=0; i<mb; i++) {
1975: ncol = bi[i+1]-bi[i];
1976: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
1977: row++;
1978: array += ncol; cols_tmp += ncol;
1979: }
1980: PetscFree(cols);
1982: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1983: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1984: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1985: *matout = B;
1986: } else {
1987: MatHeaderMerge(A,B);
1988: }
1989: return(0);
1990: }
1994: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1995: {
1996: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1997: Mat a = aij->A,b = aij->B;
1999: PetscInt s1,s2,s3;
2002: MatGetLocalSize(mat,&s2,&s3);
2003: if (rr) {
2004: VecGetLocalSize(rr,&s1);
2005: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2006: /* Overlap communication with computation. */
2007: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2008: }
2009: if (ll) {
2010: VecGetLocalSize(ll,&s1);
2011: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2012: (*b->ops->diagonalscale)(b,ll,0);
2013: }
2014: /* scale the diagonal block */
2015: (*a->ops->diagonalscale)(a,ll,rr);
2017: if (rr) {
2018: /* Do a scatter end and then right scale the off-diagonal block */
2019: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2020: (*b->ops->diagonalscale)(b,0,aij->lvec);
2021: }
2022: return(0);
2023: }
2027: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2028: {
2029: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2033: MatSetUnfactored(a->A);
2034: return(0);
2035: }
2039: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2040: {
2041: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2042: Mat a,b,c,d;
2043: PetscBool flg;
2047: a = matA->A; b = matA->B;
2048: c = matB->A; d = matB->B;
2050: MatEqual(a,c,&flg);
2051: if (flg) {
2052: MatEqual(b,d,&flg);
2053: }
2054: MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2055: return(0);
2056: }
2060: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2061: {
2063: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2064: Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
2067: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2068: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2069: /* because of the column compression in the off-processor part of the matrix a->B,
2070: the number of columns in a->B and b->B may be different, hence we cannot call
2071: the MatCopy() directly on the two parts. If need be, we can provide a more
2072: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2073: then copying the submatrices */
2074: MatCopy_Basic(A,B,str);
2075: } else {
2076: MatCopy(a->A,b->A,str);
2077: MatCopy(a->B,b->B,str);
2078: }
2079: return(0);
2080: }
2084: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2085: {
2089: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2090: return(0);
2091: }
2095: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2096: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2097: {
2098: PetscInt i,m=Y->rmap->N;
2099: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2100: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2101: const PetscInt *xi = x->i,*yi = y->i;
2104: /* Set the number of nonzeros in the new matrix */
2105: for (i=0; i<m; i++) {
2106: PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2107: const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2108: nnz[i] = 0;
2109: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2110: for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2111: if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */
2112: nnz[i]++;
2113: }
2114: for (; k<nzy; k++) nnz[i]++;
2115: }
2116: return(0);
2117: }
2121: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2122: {
2124: PetscInt i;
2125: Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2126: PetscBLASInt bnz,one=1;
2127: Mat_SeqAIJ *x,*y;
2130: if (str == SAME_NONZERO_PATTERN) {
2131: PetscScalar alpha = a;
2132: x = (Mat_SeqAIJ*)xx->A->data;
2133: PetscBLASIntCast(x->nz,&bnz);
2134: y = (Mat_SeqAIJ*)yy->A->data;
2135: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2136: x = (Mat_SeqAIJ*)xx->B->data;
2137: y = (Mat_SeqAIJ*)yy->B->data;
2138: PetscBLASIntCast(x->nz,&bnz);
2139: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140: } else if (str == SUBSET_NONZERO_PATTERN) {
2141: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2143: x = (Mat_SeqAIJ*)xx->B->data;
2144: y = (Mat_SeqAIJ*)yy->B->data;
2145: if (y->xtoy && y->XtoY != xx->B) {
2146: PetscFree(y->xtoy);
2147: MatDestroy(&y->XtoY);
2148: }
2149: if (!y->xtoy) { /* get xtoy */
2150: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2151: y->XtoY = xx->B;
2152: PetscObjectReference((PetscObject)xx->B);
2153: }
2154: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2155: } else {
2156: Mat B;
2157: PetscInt *nnz_d,*nnz_o;
2158: PetscMalloc1(yy->A->rmap->N,&nnz_d);
2159: PetscMalloc1(yy->B->rmap->N,&nnz_o);
2160: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2161: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2162: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2163: MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);
2164: MatSetType(B,MATMPIAIJ);
2165: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2166: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2167: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2168: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2169: MatHeaderReplace(Y,B);
2170: PetscFree(nnz_d);
2171: PetscFree(nnz_o);
2172: }
2173: return(0);
2174: }
2176: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2180: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2181: {
2182: #if defined(PETSC_USE_COMPLEX)
2184: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2187: MatConjugate_SeqAIJ(aij->A);
2188: MatConjugate_SeqAIJ(aij->B);
2189: #else
2191: #endif
2192: return(0);
2193: }
2197: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2198: {
2199: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2203: MatRealPart(a->A);
2204: MatRealPart(a->B);
2205: return(0);
2206: }
2210: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2211: {
2212: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2216: MatImaginaryPart(a->A);
2217: MatImaginaryPart(a->B);
2218: return(0);
2219: }
2221: #if defined(PETSC_HAVE_PBGL)
2223: #include <boost/parallel/mpi/bsp_process_group.hpp>
2224: #include <boost/graph/distributed/ilu_default_graph.hpp>
2225: #include <boost/graph/distributed/ilu_0_block.hpp>
2226: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2227: #include <boost/graph/distributed/petsc/interface.hpp>
2228: #include <boost/multi_array.hpp>
2229: #include <boost/parallel/distributed_property_map->hpp>
2233: /*
2234: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2235: */
2236: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2237: {
2238: namespace petsc = boost::distributed::petsc;
2240: namespace graph_dist = boost::graph::distributed;
2241: using boost::graph::distributed::ilu_default::process_group_type;
2242: using boost::graph::ilu_permuted;
2244: PetscBool row_identity, col_identity;
2245: PetscContainer c;
2246: PetscInt m, n, M, N;
2250: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2251: ISIdentity(isrow, &row_identity);
2252: ISIdentity(iscol, &col_identity);
2253: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2255: process_group_type pg;
2256: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2257: lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2258: lgraph_type& level_graph = *lgraph_p;
2259: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2261: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2262: ilu_permuted(level_graph);
2264: /* put together the new matrix */
2265: MatCreate(PetscObjectComm((PetscObject)A), fact);
2266: MatGetLocalSize(A, &m, &n);
2267: MatGetSize(A, &M, &N);
2268: MatSetSizes(fact, m, n, M, N);
2269: MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);
2270: MatSetType(fact, ((PetscObject)A)->type_name);
2271: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2272: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2274: PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2275: PetscContainerSetPointer(c, lgraph_p);
2276: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2277: PetscContainerDestroy(&c);
2278: return(0);
2279: }
2283: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2284: {
2286: return(0);
2287: }
2291: /*
2292: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2293: */
2294: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2295: {
2296: namespace graph_dist = boost::graph::distributed;
2298: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2299: lgraph_type *lgraph_p;
2300: PetscContainer c;
2304: PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);
2305: PetscContainerGetPointer(c, (void**) &lgraph_p);
2306: VecCopy(b, x);
2308: PetscScalar *array_x;
2309: VecGetArray(x, &array_x);
2310: PetscInt sx;
2311: VecGetSize(x, &sx);
2313: PetscScalar *array_b;
2314: VecGetArray(b, &array_b);
2315: PetscInt sb;
2316: VecGetSize(b, &sb);
2318: lgraph_type& level_graph = *lgraph_p;
2319: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2321: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2322: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]);
2323: array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]);
2325: typedef boost::iterator_property_map<array_ref_type::iterator,
2326: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2327: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2328: gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2330: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2331: return(0);
2332: }
2333: #endif
2337: PetscErrorCode MatDestroy_MatRedundant(Mat A)
2338: {
2340: Mat_Redundant *redund;
2341: PetscInt i;
2342: PetscMPIInt size;
2345: MPI_Comm_size(((PetscObject)A)->comm,&size);
2346: if (size == 1) {
2347: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2348: redund = a->redundant;
2349: } else {
2350: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2351: redund = a->redundant;
2352: }
2353: if (redund){
2354: if (redund->matseq) { /* via MatGetSubMatrices() */
2355: ISDestroy(&redund->isrow);
2356: ISDestroy(&redund->iscol);
2357: MatDestroy(&redund->matseq[0]);
2358: PetscFree(redund->matseq);
2359: } else {
2360: PetscFree2(redund->send_rank,redund->recv_rank);
2361: PetscFree(redund->sbuf_j);
2362: PetscFree(redund->sbuf_a);
2363: for (i=0; i<redund->nrecvs; i++) {
2364: PetscFree(redund->rbuf_j[i]);
2365: PetscFree(redund->rbuf_a[i]);
2366: }
2367: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
2368: }
2370: if (redund->psubcomm) {
2371: PetscSubcommDestroy(&redund->psubcomm);
2372: }
2373: redund->Destroy(A);
2374: PetscFree(redund);
2375: }
2376: return(0);
2377: }
2381: PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2382: {
2383: PetscMPIInt rank,size;
2384: MPI_Comm comm;
2386: PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2387: PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2388: PetscInt *rowrange = mat->rmap->range;
2389: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2390: Mat A = aij->A,B=aij->B,C=*matredundant;
2391: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2392: PetscScalar *sbuf_a;
2393: PetscInt nzlocal=a->nz+b->nz;
2394: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2395: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2396: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2397: MatScalar *aworkA,*aworkB;
2398: PetscScalar *vals;
2399: PetscMPIInt tag1,tag2,tag3,imdex;
2400: MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2401: MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2402: MPI_Status recv_status,*send_status;
2403: PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2404: PetscInt **rbuf_j=NULL;
2405: PetscScalar **rbuf_a=NULL;
2406: Mat_Redundant *redund =NULL;
2407:
2409: PetscObjectGetComm((PetscObject)mat,&comm);
2410: MPI_Comm_rank(comm,&rank);
2411: MPI_Comm_size(comm,&size);
2412: MPI_Comm_rank(subcomm,&subrank);
2413: MPI_Comm_size(subcomm,&subsize);
2415: if (reuse == MAT_REUSE_MATRIX) {
2416: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2417: if (subsize == 1) {
2418: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2419: redund = c->redundant;
2420: } else {
2421: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2422: redund = c->redundant;
2423: }
2424: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2426: nsends = redund->nsends;
2427: nrecvs = redund->nrecvs;
2428: send_rank = redund->send_rank;
2429: recv_rank = redund->recv_rank;
2430: sbuf_nz = redund->sbuf_nz;
2431: rbuf_nz = redund->rbuf_nz;
2432: sbuf_j = redund->sbuf_j;
2433: sbuf_a = redund->sbuf_a;
2434: rbuf_j = redund->rbuf_j;
2435: rbuf_a = redund->rbuf_a;
2436: }
2438: if (reuse == MAT_INITIAL_MATRIX) {
2439: PetscInt nleftover,np_subcomm;
2441: /* get the destination processors' id send_rank, nsends and nrecvs */
2442: PetscMalloc2(size,&send_rank,size,&recv_rank);
2444: np_subcomm = size/nsubcomm;
2445: nleftover = size - nsubcomm*np_subcomm;
2447: /* block of codes below is specific for INTERLACED */
2448: /* ------------------------------------------------*/
2449: nsends = 0; nrecvs = 0;
2450: for (i=0; i<size; i++) {
2451: if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2452: send_rank[nsends++] = i;
2453: recv_rank[nrecvs++] = i;
2454: }
2455: }
2456: if (rank >= size - nleftover) { /* this proc is a leftover processor */
2457: i = size-nleftover-1;
2458: j = 0;
2459: while (j < nsubcomm - nleftover) {
2460: send_rank[nsends++] = i;
2461: i--; j++;
2462: }
2463: }
2465: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2466: for (i=0; i<nleftover; i++) {
2467: recv_rank[nrecvs++] = size-nleftover+i;
2468: }
2469: }
2470: /*----------------------------------------------*/
2472: /* allocate sbuf_j, sbuf_a */
2473: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2474: PetscMalloc1(i,&sbuf_j);
2475: PetscMalloc1((nzlocal+1),&sbuf_a);
2476: /*
2477: PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);
2478: PetscSynchronizedFlush(comm,PETSC_STDOUT);
2479: */
2480: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2482: /* copy mat's local entries into the buffers */
2483: if (reuse == MAT_INITIAL_MATRIX) {
2484: rownz_max = 0;
2485: rptr = sbuf_j;
2486: cols = sbuf_j + rend-rstart + 1;
2487: vals = sbuf_a;
2488: rptr[0] = 0;
2489: for (i=0; i<rend-rstart; i++) {
2490: row = i + rstart;
2491: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2492: ncols = nzA + nzB;
2493: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2494: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2495: /* load the column indices for this row into cols */
2496: lwrite = 0;
2497: for (l=0; l<nzB; l++) {
2498: if ((ctmp = bmap[cworkB[l]]) < cstart) {
2499: vals[lwrite] = aworkB[l];
2500: cols[lwrite++] = ctmp;
2501: }
2502: }
2503: for (l=0; l<nzA; l++) {
2504: vals[lwrite] = aworkA[l];
2505: cols[lwrite++] = cstart + cworkA[l];
2506: }
2507: for (l=0; l<nzB; l++) {
2508: if ((ctmp = bmap[cworkB[l]]) >= cend) {
2509: vals[lwrite] = aworkB[l];
2510: cols[lwrite++] = ctmp;
2511: }
2512: }
2513: vals += ncols;
2514: cols += ncols;
2515: rptr[i+1] = rptr[i] + ncols;
2516: if (rownz_max < ncols) rownz_max = ncols;
2517: }
2518: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2519: } else { /* only copy matrix values into sbuf_a */
2520: rptr = sbuf_j;
2521: vals = sbuf_a;
2522: rptr[0] = 0;
2523: for (i=0; i<rend-rstart; i++) {
2524: row = i + rstart;
2525: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2526: ncols = nzA + nzB;
2527: cworkB = b->j + b->i[i];
2528: aworkA = a->a + a->i[i];
2529: aworkB = b->a + b->i[i];
2530: lwrite = 0;
2531: for (l=0; l<nzB; l++) {
2532: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2533: }
2534: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2535: for (l=0; l<nzB; l++) {
2536: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2537: }
2538: vals += ncols;
2539: rptr[i+1] = rptr[i] + ncols;
2540: }
2541: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2543: /* send nzlocal to others, and recv other's nzlocal */
2544: /*--------------------------------------------------*/
2545: if (reuse == MAT_INITIAL_MATRIX) {
2546: PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);
2548: s_waits2 = s_waits3 + nsends;
2549: s_waits1 = s_waits2 + nsends;
2550: r_waits1 = s_waits1 + nsends;
2551: r_waits2 = r_waits1 + nrecvs;
2552: r_waits3 = r_waits2 + nrecvs;
2553: } else {
2554: PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);
2556: r_waits3 = s_waits3 + nsends;
2557: }
2559: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2560: if (reuse == MAT_INITIAL_MATRIX) {
2561: /* get new tags to keep the communication clean */
2562: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2563: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2564: PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);
2566: /* post receives of other's nzlocal */
2567: for (i=0; i<nrecvs; i++) {
2568: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2569: }
2570: /* send nzlocal to others */
2571: for (i=0; i<nsends; i++) {
2572: sbuf_nz[i] = nzlocal;
2573: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2574: }
2575: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2576: count = nrecvs;
2577: while (count) {
2578: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2580: recv_rank[imdex] = recv_status.MPI_SOURCE;
2581: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2582: PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);
2584: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2586: rbuf_nz[imdex] += i + 2;
2588: PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);
2589: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2590: count--;
2591: }
2592: /* wait on sends of nzlocal */
2593: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2594: /* send mat->i,j to others, and recv from other's */
2595: /*------------------------------------------------*/
2596: for (i=0; i<nsends; i++) {
2597: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2598: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2599: }
2600: /* wait on receives of mat->i,j */
2601: /*------------------------------*/
2602: count = nrecvs;
2603: while (count) {
2604: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2605: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2606: count--;
2607: }
2608: /* wait on sends of mat->i,j */
2609: /*---------------------------*/
2610: if (nsends) {
2611: MPI_Waitall(nsends,s_waits2,send_status);
2612: }
2613: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2615: /* post receives, send and receive mat->a */
2616: /*----------------------------------------*/
2617: for (imdex=0; imdex<nrecvs; imdex++) {
2618: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2619: }
2620: for (i=0; i<nsends; i++) {
2621: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2622: }
2623: count = nrecvs;
2624: while (count) {
2625: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2626: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2627: count--;
2628: }
2629: if (nsends) {
2630: MPI_Waitall(nsends,s_waits3,send_status);
2631: }
2633: PetscFree2(s_waits3,send_status);
2635: /* create redundant matrix */
2636: /*-------------------------*/
2637: if (reuse == MAT_INITIAL_MATRIX) {
2638: const PetscInt *range;
2639: PetscInt rstart_sub,rend_sub,mloc_sub;
2641: /* compute rownz_max for preallocation */
2642: for (imdex=0; imdex<nrecvs; imdex++) {
2643: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2644: rptr = rbuf_j[imdex];
2645: for (i=0; i<j; i++) {
2646: ncols = rptr[i+1] - rptr[i];
2647: if (rownz_max < ncols) rownz_max = ncols;
2648: }
2649: }
2651: MatCreate(subcomm,&C);
2653: /* get local size of redundant matrix
2654: - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2655: MatGetOwnershipRanges(mat,&range);
2656: rstart_sub = range[nsubcomm*subrank];
2657: if (subrank+1 < subsize) { /* not the last proc in subcomm */
2658: rend_sub = range[nsubcomm*(subrank+1)];
2659: } else {
2660: rend_sub = mat->rmap->N;
2661: }
2662: mloc_sub = rend_sub - rstart_sub;
2664: if (M == N) {
2665: MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);
2666: } else { /* non-square matrix */
2667: MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);
2668: }
2669: MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);
2670: MatSetFromOptions(C);
2671: MatSeqAIJSetPreallocation(C,rownz_max,NULL);
2672: MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);
2673: } else {
2674: C = *matredundant;
2675: }
2677: /* insert local matrix entries */
2678: rptr = sbuf_j;
2679: cols = sbuf_j + rend-rstart + 1;
2680: vals = sbuf_a;
2681: for (i=0; i<rend-rstart; i++) {
2682: row = i + rstart;
2683: ncols = rptr[i+1] - rptr[i];
2684: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2685: vals += ncols;
2686: cols += ncols;
2687: }
2688: /* insert received matrix entries */
2689: for (imdex=0; imdex<nrecvs; imdex++) {
2690: rstart = rowrange[recv_rank[imdex]];
2691: rend = rowrange[recv_rank[imdex]+1];
2692: /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2693: rptr = rbuf_j[imdex];
2694: cols = rbuf_j[imdex] + rend-rstart + 1;
2695: vals = rbuf_a[imdex];
2696: for (i=0; i<rend-rstart; i++) {
2697: row = i + rstart;
2698: ncols = rptr[i+1] - rptr[i];
2699: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2700: vals += ncols;
2701: cols += ncols;
2702: }
2703: }
2704: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2705: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2707: if (reuse == MAT_INITIAL_MATRIX) {
2708: *matredundant = C;
2710: /* create a supporting struct and attach it to C for reuse */
2711: PetscNewLog(C,&redund);
2712: if (subsize == 1) {
2713: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2714: c->redundant = redund;
2715: } else {
2716: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2717: c->redundant = redund;
2718: }
2720: redund->nzlocal = nzlocal;
2721: redund->nsends = nsends;
2722: redund->nrecvs = nrecvs;
2723: redund->send_rank = send_rank;
2724: redund->recv_rank = recv_rank;
2725: redund->sbuf_nz = sbuf_nz;
2726: redund->rbuf_nz = rbuf_nz;
2727: redund->sbuf_j = sbuf_j;
2728: redund->sbuf_a = sbuf_a;
2729: redund->rbuf_j = rbuf_j;
2730: redund->rbuf_a = rbuf_a;
2731: redund->psubcomm = NULL;
2733: redund->Destroy = C->ops->destroy;
2734: C->ops->destroy = MatDestroy_MatRedundant;
2735: }
2736: return(0);
2737: }
2741: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2742: {
2744: MPI_Comm comm;
2745: PetscMPIInt size,subsize;
2746: PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2747: Mat_Redundant *redund=NULL;
2748: PetscSubcomm psubcomm=NULL;
2749: MPI_Comm subcomm_in=subcomm;
2750: Mat *matseq;
2751: IS isrow,iscol;
2754: if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2755: if (reuse == MAT_INITIAL_MATRIX) {
2756: /* create psubcomm, then get subcomm */
2757: PetscObjectGetComm((PetscObject)mat,&comm);
2758: MPI_Comm_size(comm,&size);
2759: if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2761: PetscSubcommCreate(comm,&psubcomm);
2762: PetscSubcommSetNumber(psubcomm,nsubcomm);
2763: PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);
2764: PetscSubcommSetFromOptions(psubcomm);
2765: subcomm = psubcomm->comm;
2766: } else { /* retrieve psubcomm and subcomm */
2767: PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);
2768: MPI_Comm_size(subcomm,&subsize);
2769: if (subsize == 1) {
2770: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2771: redund = c->redundant;
2772: } else {
2773: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2774: redund = c->redundant;
2775: }
2776: psubcomm = redund->psubcomm;
2777: }
2778: if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2779: MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);
2780: if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2781: MPI_Comm_size(psubcomm->comm,&subsize);
2782: if (subsize == 1) {
2783: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2784: c->redundant->psubcomm = psubcomm;
2785: } else {
2786: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2787: c->redundant->psubcomm = psubcomm ;
2788: }
2789: }
2790: return(0);
2791: }
2792: }
2794: /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2795: MPI_Comm_size(subcomm,&subsize);
2796: if (reuse == MAT_INITIAL_MATRIX) {
2797: /* create a local sequential matrix matseq[0] */
2798: mloc_sub = PETSC_DECIDE;
2799: PetscSplitOwnership(subcomm,&mloc_sub,&M);
2800: MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);
2801: rstart = rend - mloc_sub;
2802: ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);
2803: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);
2804: } else { /* reuse == MAT_REUSE_MATRIX */
2805: if (subsize == 1) {
2806: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2807: redund = c->redundant;
2808: } else {
2809: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2810: redund = c->redundant;
2811: }
2813: isrow = redund->isrow;
2814: iscol = redund->iscol;
2815: matseq = redund->matseq;
2816: }
2817: MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);
2818: MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);
2820: if (reuse == MAT_INITIAL_MATRIX) {
2821: /* create a supporting struct and attach it to C for reuse */
2822: PetscNewLog(*matredundant,&redund);
2823: if (subsize == 1) {
2824: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2825: c->redundant = redund;
2826: } else {
2827: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2828: c->redundant = redund;
2829: }
2830: redund->isrow = isrow;
2831: redund->iscol = iscol;
2832: redund->matseq = matseq;
2833: redund->psubcomm = psubcomm;
2834: redund->Destroy = (*matredundant)->ops->destroy;
2835: (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2836: }
2837: return(0);
2838: }
2842: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2843: {
2844: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2846: PetscInt i,*idxb = 0;
2847: PetscScalar *va,*vb;
2848: Vec vtmp;
2851: MatGetRowMaxAbs(a->A,v,idx);
2852: VecGetArray(v,&va);
2853: if (idx) {
2854: for (i=0; i<A->rmap->n; i++) {
2855: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2856: }
2857: }
2859: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2860: if (idx) {
2861: PetscMalloc1(A->rmap->n,&idxb);
2862: }
2863: MatGetRowMaxAbs(a->B,vtmp,idxb);
2864: VecGetArray(vtmp,&vb);
2866: for (i=0; i<A->rmap->n; i++) {
2867: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2868: va[i] = vb[i];
2869: if (idx) idx[i] = a->garray[idxb[i]];
2870: }
2871: }
2873: VecRestoreArray(v,&va);
2874: VecRestoreArray(vtmp,&vb);
2875: PetscFree(idxb);
2876: VecDestroy(&vtmp);
2877: return(0);
2878: }
2882: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2883: {
2884: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2886: PetscInt i,*idxb = 0;
2887: PetscScalar *va,*vb;
2888: Vec vtmp;
2891: MatGetRowMinAbs(a->A,v,idx);
2892: VecGetArray(v,&va);
2893: if (idx) {
2894: for (i=0; i<A->cmap->n; i++) {
2895: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2896: }
2897: }
2899: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2900: if (idx) {
2901: PetscMalloc1(A->rmap->n,&idxb);
2902: }
2903: MatGetRowMinAbs(a->B,vtmp,idxb);
2904: VecGetArray(vtmp,&vb);
2906: for (i=0; i<A->rmap->n; i++) {
2907: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2908: va[i] = vb[i];
2909: if (idx) idx[i] = a->garray[idxb[i]];
2910: }
2911: }
2913: VecRestoreArray(v,&va);
2914: VecRestoreArray(vtmp,&vb);
2915: PetscFree(idxb);
2916: VecDestroy(&vtmp);
2917: return(0);
2918: }
2922: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2923: {
2924: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2925: PetscInt n = A->rmap->n;
2926: PetscInt cstart = A->cmap->rstart;
2927: PetscInt *cmap = mat->garray;
2928: PetscInt *diagIdx, *offdiagIdx;
2929: Vec diagV, offdiagV;
2930: PetscScalar *a, *diagA, *offdiagA;
2931: PetscInt r;
2935: PetscMalloc2(n,&diagIdx,n,&offdiagIdx);
2936: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);
2937: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);
2938: MatGetRowMin(mat->A, diagV, diagIdx);
2939: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2940: VecGetArray(v, &a);
2941: VecGetArray(diagV, &diagA);
2942: VecGetArray(offdiagV, &offdiagA);
2943: for (r = 0; r < n; ++r) {
2944: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2945: a[r] = diagA[r];
2946: idx[r] = cstart + diagIdx[r];
2947: } else {
2948: a[r] = offdiagA[r];
2949: idx[r] = cmap[offdiagIdx[r]];
2950: }
2951: }
2952: VecRestoreArray(v, &a);
2953: VecRestoreArray(diagV, &diagA);
2954: VecRestoreArray(offdiagV, &offdiagA);
2955: VecDestroy(&diagV);
2956: VecDestroy(&offdiagV);
2957: PetscFree2(diagIdx, offdiagIdx);
2958: return(0);
2959: }
2963: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2964: {
2965: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2966: PetscInt n = A->rmap->n;
2967: PetscInt cstart = A->cmap->rstart;
2968: PetscInt *cmap = mat->garray;
2969: PetscInt *diagIdx, *offdiagIdx;
2970: Vec diagV, offdiagV;
2971: PetscScalar *a, *diagA, *offdiagA;
2972: PetscInt r;
2976: PetscMalloc2(n,&diagIdx,n,&offdiagIdx);
2977: VecCreateSeq(PETSC_COMM_SELF, n, &diagV);
2978: VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);
2979: MatGetRowMax(mat->A, diagV, diagIdx);
2980: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2981: VecGetArray(v, &a);
2982: VecGetArray(diagV, &diagA);
2983: VecGetArray(offdiagV, &offdiagA);
2984: for (r = 0; r < n; ++r) {
2985: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2986: a[r] = diagA[r];
2987: idx[r] = cstart + diagIdx[r];
2988: } else {
2989: a[r] = offdiagA[r];
2990: idx[r] = cmap[offdiagIdx[r]];
2991: }
2992: }
2993: VecRestoreArray(v, &a);
2994: VecRestoreArray(diagV, &diagA);
2995: VecRestoreArray(offdiagV, &offdiagA);
2996: VecDestroy(&diagV);
2997: VecDestroy(&offdiagV);
2998: PetscFree2(diagIdx, offdiagIdx);
2999: return(0);
3000: }
3004: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3005: {
3007: Mat *dummy;
3010: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
3011: *newmat = *dummy;
3012: PetscFree(dummy);
3013: return(0);
3014: }
3018: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3019: {
3020: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
3024: MatInvertBlockDiagonal(a->A,values);
3025: return(0);
3026: }
3030: static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3031: {
3033: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data;
3036: MatSetRandom(aij->A,rctx);
3037: MatSetRandom(aij->B,rctx);
3038: MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);
3039: MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);
3040: return(0);
3041: }
3043: /* -------------------------------------------------------------------*/
3044: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3045: MatGetRow_MPIAIJ,
3046: MatRestoreRow_MPIAIJ,
3047: MatMult_MPIAIJ,
3048: /* 4*/ MatMultAdd_MPIAIJ,
3049: MatMultTranspose_MPIAIJ,
3050: MatMultTransposeAdd_MPIAIJ,
3051: #if defined(PETSC_HAVE_PBGL)
3052: MatSolve_MPIAIJ,
3053: #else
3054: 0,
3055: #endif
3056: 0,
3057: 0,
3058: /*10*/ 0,
3059: 0,
3060: 0,
3061: MatSOR_MPIAIJ,
3062: MatTranspose_MPIAIJ,
3063: /*15*/ MatGetInfo_MPIAIJ,
3064: MatEqual_MPIAIJ,
3065: MatGetDiagonal_MPIAIJ,
3066: MatDiagonalScale_MPIAIJ,
3067: MatNorm_MPIAIJ,
3068: /*20*/ MatAssemblyBegin_MPIAIJ,
3069: MatAssemblyEnd_MPIAIJ,
3070: MatSetOption_MPIAIJ,
3071: MatZeroEntries_MPIAIJ,
3072: /*24*/ MatZeroRows_MPIAIJ,
3073: 0,
3074: #if defined(PETSC_HAVE_PBGL)
3075: 0,
3076: #else
3077: 0,
3078: #endif
3079: 0,
3080: 0,
3081: /*29*/ MatSetUp_MPIAIJ,
3082: #if defined(PETSC_HAVE_PBGL)
3083: 0,
3084: #else
3085: 0,
3086: #endif
3087: 0,
3088: 0,
3089: 0,
3090: /*34*/ MatDuplicate_MPIAIJ,
3091: 0,
3092: 0,
3093: 0,
3094: 0,
3095: /*39*/ MatAXPY_MPIAIJ,
3096: MatGetSubMatrices_MPIAIJ,
3097: MatIncreaseOverlap_MPIAIJ,
3098: MatGetValues_MPIAIJ,
3099: MatCopy_MPIAIJ,
3100: /*44*/ MatGetRowMax_MPIAIJ,
3101: MatScale_MPIAIJ,
3102: 0,
3103: 0,
3104: MatZeroRowsColumns_MPIAIJ,
3105: /*49*/ MatSetRandom_MPIAIJ,
3106: 0,
3107: 0,
3108: 0,
3109: 0,
3110: /*54*/ MatFDColoringCreate_MPIXAIJ,
3111: 0,
3112: MatSetUnfactored_MPIAIJ,
3113: MatPermute_MPIAIJ,
3114: 0,
3115: /*59*/ MatGetSubMatrix_MPIAIJ,
3116: MatDestroy_MPIAIJ,
3117: MatView_MPIAIJ,
3118: 0,
3119: MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3120: /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3121: MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3122: 0,
3123: 0,
3124: 0,
3125: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3126: MatGetRowMinAbs_MPIAIJ,
3127: 0,
3128: MatSetColoring_MPIAIJ,
3129: 0,
3130: MatSetValuesAdifor_MPIAIJ,
3131: /*75*/ MatFDColoringApply_AIJ,
3132: 0,
3133: 0,
3134: 0,
3135: MatFindZeroDiagonals_MPIAIJ,
3136: /*80*/ 0,
3137: 0,
3138: 0,
3139: /*83*/ MatLoad_MPIAIJ,
3140: 0,
3141: 0,
3142: 0,
3143: 0,
3144: 0,
3145: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3146: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3147: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3148: MatPtAP_MPIAIJ_MPIAIJ,
3149: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3150: /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3151: 0,
3152: 0,
3153: 0,
3154: 0,
3155: /*99*/ 0,
3156: 0,
3157: 0,
3158: MatConjugate_MPIAIJ,
3159: 0,
3160: /*104*/MatSetValuesRow_MPIAIJ,
3161: MatRealPart_MPIAIJ,
3162: MatImaginaryPart_MPIAIJ,
3163: 0,
3164: 0,
3165: /*109*/0,
3166: MatGetRedundantMatrix_MPIAIJ,
3167: MatGetRowMin_MPIAIJ,
3168: 0,
3169: 0,
3170: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3171: 0,
3172: 0,
3173: 0,
3174: 0,
3175: /*119*/0,
3176: 0,
3177: 0,
3178: 0,
3179: MatGetMultiProcBlock_MPIAIJ,
3180: /*124*/MatFindNonzeroRows_MPIAIJ,
3181: MatGetColumnNorms_MPIAIJ,
3182: MatInvertBlockDiagonal_MPIAIJ,
3183: 0,
3184: MatGetSubMatricesParallel_MPIAIJ,
3185: /*129*/0,
3186: MatTransposeMatMult_MPIAIJ_MPIAIJ,
3187: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3188: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3189: 0,
3190: /*134*/0,
3191: 0,
3192: 0,
3193: 0,
3194: 0,
3195: /*139*/0,
3196: 0,
3197: 0,
3198: MatFDColoringSetUp_MPIXAIJ
3199: };
3201: /* ----------------------------------------------------------------------------------------*/
3205: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3206: {
3207: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
3211: MatStoreValues(aij->A);
3212: MatStoreValues(aij->B);
3213: return(0);
3214: }
3218: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3219: {
3220: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
3224: MatRetrieveValues(aij->A);
3225: MatRetrieveValues(aij->B);
3226: return(0);
3227: }
3231: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3232: {
3233: Mat_MPIAIJ *b;
3237: PetscLayoutSetUp(B->rmap);
3238: PetscLayoutSetUp(B->cmap);
3239: b = (Mat_MPIAIJ*)B->data;
3241: if (!B->preallocated) {
3242: /* Explicitly create 2 MATSEQAIJ matrices. */
3243: MatCreate(PETSC_COMM_SELF,&b->A);
3244: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3245: MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);
3246: MatSetType(b->A,MATSEQAIJ);
3247: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
3248: MatCreate(PETSC_COMM_SELF,&b->B);
3249: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3250: MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);
3251: MatSetType(b->B,MATSEQAIJ);
3252: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
3253: }
3255: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3256: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3257: B->preallocated = PETSC_TRUE;
3258: return(0);
3259: }
3263: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3264: {
3265: Mat mat;
3266: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3270: *newmat = 0;
3271: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
3272: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3273: MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);
3274: MatSetType(mat,((PetscObject)matin)->type_name);
3275: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3276: a = (Mat_MPIAIJ*)mat->data;
3278: mat->factortype = matin->factortype;
3279: mat->rmap->bs = matin->rmap->bs;
3280: mat->cmap->bs = matin->cmap->bs;
3281: mat->assembled = PETSC_TRUE;
3282: mat->insertmode = NOT_SET_VALUES;
3283: mat->preallocated = PETSC_TRUE;
3285: a->size = oldmat->size;
3286: a->rank = oldmat->rank;
3287: a->donotstash = oldmat->donotstash;
3288: a->roworiented = oldmat->roworiented;
3289: a->rowindices = 0;
3290: a->rowvalues = 0;
3291: a->getrowactive = PETSC_FALSE;
3293: PetscLayoutReference(matin->rmap,&mat->rmap);
3294: PetscLayoutReference(matin->cmap,&mat->cmap);
3296: if (oldmat->colmap) {
3297: #if defined(PETSC_USE_CTABLE)
3298: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3299: #else
3300: PetscMalloc1((mat->cmap->N),&a->colmap);
3301: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));
3302: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3303: #endif
3304: } else a->colmap = 0;
3305: if (oldmat->garray) {
3306: PetscInt len;
3307: len = oldmat->B->cmap->n;
3308: PetscMalloc1((len+1),&a->garray);
3309: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
3310: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3311: } else a->garray = 0;
3313: VecDuplicate(oldmat->lvec,&a->lvec);
3314: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
3315: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3316: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
3317: MatDuplicate(oldmat->A,cpvalues,&a->A);
3318: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
3319: MatDuplicate(oldmat->B,cpvalues,&a->B);
3320: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
3321: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3322: *newmat = mat;
3323: return(0);
3324: }
3330: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3331: {
3332: PetscScalar *vals,*svals;
3333: MPI_Comm comm;
3335: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3336: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3337: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3338: PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3339: PetscInt cend,cstart,n,*rowners,sizesset=1;
3340: int fd;
3341: PetscInt bs = 1;
3344: PetscObjectGetComm((PetscObject)viewer,&comm);
3345: MPI_Comm_size(comm,&size);
3346: MPI_Comm_rank(comm,&rank);
3347: if (!rank) {
3348: PetscViewerBinaryGetDescriptor(viewer,&fd);
3349: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
3350: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3351: }
3353: PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");
3354: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3355: PetscOptionsEnd();
3357: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3359: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3360: M = header[1]; N = header[2];
3361: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3362: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3363: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3365: /* If global sizes are set, check if they are consistent with that given in the file */
3366: if (sizesset) {
3367: MatGetSize(newMat,&grows,&gcols);
3368: }
3369: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3370: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3372: /* determine ownership of all (block) rows */
3373: if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3374: if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3375: else m = newMat->rmap->n; /* Set by user */
3377: PetscMalloc1((size+1),&rowners);
3378: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3380: /* First process needs enough room for process with most rows */
3381: if (!rank) {
3382: mmax = rowners[1];
3383: for (i=2; i<=size; i++) {
3384: mmax = PetscMax(mmax, rowners[i]);
3385: }
3386: } else mmax = -1; /* unused, but compilers complain */
3388: rowners[0] = 0;
3389: for (i=2; i<=size; i++) {
3390: rowners[i] += rowners[i-1];
3391: }
3392: rstart = rowners[rank];
3393: rend = rowners[rank+1];
3395: /* distribute row lengths to all processors */
3396: PetscMalloc2(m,&ourlens,m,&offlens);
3397: if (!rank) {
3398: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3399: PetscMalloc1(mmax,&rowlengths);
3400: PetscCalloc1(size,&procsnz);
3401: for (j=0; j<m; j++) {
3402: procsnz[0] += ourlens[j];
3403: }
3404: for (i=1; i<size; i++) {
3405: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3406: /* calculate the number of nonzeros on each processor */
3407: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3408: procsnz[i] += rowlengths[j];
3409: }
3410: MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3411: }
3412: PetscFree(rowlengths);
3413: } else {
3414: MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3415: }
3417: if (!rank) {
3418: /* determine max buffer needed and allocate it */
3419: maxnz = 0;
3420: for (i=0; i<size; i++) {
3421: maxnz = PetscMax(maxnz,procsnz[i]);
3422: }
3423: PetscMalloc1(maxnz,&cols);
3425: /* read in my part of the matrix column indices */
3426: nz = procsnz[0];
3427: PetscMalloc1(nz,&mycols);
3428: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3430: /* read in every one elses and ship off */
3431: for (i=1; i<size; i++) {
3432: nz = procsnz[i];
3433: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3434: MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);
3435: }
3436: PetscFree(cols);
3437: } else {
3438: /* determine buffer space needed for message */
3439: nz = 0;
3440: for (i=0; i<m; i++) {
3441: nz += ourlens[i];
3442: }
3443: PetscMalloc1(nz,&mycols);
3445: /* receive message of column indices*/
3446: MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3447: }
3449: /* determine column ownership if matrix is not square */
3450: if (N != M) {
3451: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3452: else n = newMat->cmap->n;
3453: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3454: cstart = cend - n;
3455: } else {
3456: cstart = rstart;
3457: cend = rend;
3458: n = cend - cstart;
3459: }
3461: /* loop over local rows, determining number of off diagonal entries */
3462: PetscMemzero(offlens,m*sizeof(PetscInt));
3463: jj = 0;
3464: for (i=0; i<m; i++) {
3465: for (j=0; j<ourlens[i]; j++) {
3466: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3467: jj++;
3468: }
3469: }
3471: for (i=0; i<m; i++) {
3472: ourlens[i] -= offlens[i];
3473: }
3474: if (!sizesset) {
3475: MatSetSizes(newMat,m,n,M,N);
3476: }
3478: if (bs > 1) {MatSetBlockSize(newMat,bs);}
3480: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3482: for (i=0; i<m; i++) {
3483: ourlens[i] += offlens[i];
3484: }
3486: if (!rank) {
3487: PetscMalloc1((maxnz+1),&vals);
3489: /* read in my part of the matrix numerical values */
3490: nz = procsnz[0];
3491: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3493: /* insert into matrix */
3494: jj = rstart;
3495: smycols = mycols;
3496: svals = vals;
3497: for (i=0; i<m; i++) {
3498: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3499: smycols += ourlens[i];
3500: svals += ourlens[i];
3501: jj++;
3502: }
3504: /* read in other processors and ship out */
3505: for (i=1; i<size; i++) {
3506: nz = procsnz[i];
3507: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3508: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3509: }
3510: PetscFree(procsnz);
3511: } else {
3512: /* receive numeric values */
3513: PetscMalloc1((nz+1),&vals);
3515: /* receive message of values*/
3516: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3518: /* insert into matrix */
3519: jj = rstart;
3520: smycols = mycols;
3521: svals = vals;
3522: for (i=0; i<m; i++) {
3523: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3524: smycols += ourlens[i];
3525: svals += ourlens[i];
3526: jj++;
3527: }
3528: }
3529: PetscFree2(ourlens,offlens);
3530: PetscFree(vals);
3531: PetscFree(mycols);
3532: PetscFree(rowners);
3533: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3534: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3535: return(0);
3536: }
3540: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3541: {
3543: IS iscol_local;
3544: PetscInt csize;
3547: ISGetLocalSize(iscol,&csize);
3548: if (call == MAT_REUSE_MATRIX) {
3549: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3550: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3551: } else {
3552: PetscInt cbs;
3553: ISGetBlockSize(iscol,&cbs);
3554: ISAllGather(iscol,&iscol_local);
3555: ISSetBlockSize(iscol_local,cbs);
3556: }
3557: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3558: if (call == MAT_INITIAL_MATRIX) {
3559: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3560: ISDestroy(&iscol_local);
3561: }
3562: return(0);
3563: }
3565: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3568: /*
3569: Not great since it makes two copies of the submatrix, first an SeqAIJ
3570: in local and then by concatenating the local matrices the end result.
3571: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3573: Note: This requires a sequential iscol with all indices.
3574: */
3575: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3576: {
3578: PetscMPIInt rank,size;
3579: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3580: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3581: PetscBool allcolumns, colflag;
3582: Mat M,Mreuse;
3583: MatScalar *vwork,*aa;
3584: MPI_Comm comm;
3585: Mat_SeqAIJ *aij;
3588: PetscObjectGetComm((PetscObject)mat,&comm);
3589: MPI_Comm_rank(comm,&rank);
3590: MPI_Comm_size(comm,&size);
3592: ISIdentity(iscol,&colflag);
3593: ISGetLocalSize(iscol,&ncol);
3594: if (colflag && ncol == mat->cmap->N) {
3595: allcolumns = PETSC_TRUE;
3596: } else {
3597: allcolumns = PETSC_FALSE;
3598: }
3599: if (call == MAT_REUSE_MATRIX) {
3600: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
3601: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3602: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);
3603: } else {
3604: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);
3605: }
3607: /*
3608: m - number of local rows
3609: n - number of columns (same on all processors)
3610: rstart - first row in new global matrix generated
3611: */
3612: MatGetSize(Mreuse,&m,&n);
3613: MatGetBlockSizes(Mreuse,&bs,&cbs);
3614: if (call == MAT_INITIAL_MATRIX) {
3615: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3616: ii = aij->i;
3617: jj = aij->j;
3619: /*
3620: Determine the number of non-zeros in the diagonal and off-diagonal
3621: portions of the matrix in order to do correct preallocation
3622: */
3624: /* first get start and end of "diagonal" columns */
3625: if (csize == PETSC_DECIDE) {
3626: ISGetSize(isrow,&mglobal);
3627: if (mglobal == n) { /* square matrix */
3628: nlocal = m;
3629: } else {
3630: nlocal = n/size + ((n % size) > rank);
3631: }
3632: } else {
3633: nlocal = csize;
3634: }
3635: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3636: rstart = rend - nlocal;
3637: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3639: /* next, compute all the lengths */
3640: PetscMalloc1((2*m+1),&dlens);
3641: olens = dlens + m;
3642: for (i=0; i<m; i++) {
3643: jend = ii[i+1] - ii[i];
3644: olen = 0;
3645: dlen = 0;
3646: for (j=0; j<jend; j++) {
3647: if (*jj < rstart || *jj >= rend) olen++;
3648: else dlen++;
3649: jj++;
3650: }
3651: olens[i] = olen;
3652: dlens[i] = dlen;
3653: }
3654: MatCreate(comm,&M);
3655: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3656: MatSetBlockSizes(M,bs,cbs);
3657: MatSetType(M,((PetscObject)mat)->type_name);
3658: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3659: PetscFree(dlens);
3660: } else {
3661: PetscInt ml,nl;
3663: M = *newmat;
3664: MatGetLocalSize(M,&ml,&nl);
3665: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3666: MatZeroEntries(M);
3667: /*
3668: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3669: rather than the slower MatSetValues().
3670: */
3671: M->was_assembled = PETSC_TRUE;
3672: M->assembled = PETSC_FALSE;
3673: }
3674: MatGetOwnershipRange(M,&rstart,&rend);
3675: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3676: ii = aij->i;
3677: jj = aij->j;
3678: aa = aij->a;
3679: for (i=0; i<m; i++) {
3680: row = rstart + i;
3681: nz = ii[i+1] - ii[i];
3682: cwork = jj; jj += nz;
3683: vwork = aa; aa += nz;
3684: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3685: }
3687: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3688: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3689: *newmat = M;
3691: /* save submatrix used in processor for next request */
3692: if (call == MAT_INITIAL_MATRIX) {
3693: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3694: MatDestroy(&Mreuse);
3695: }
3696: return(0);
3697: }
3701: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3702: {
3703: PetscInt m,cstart, cend,j,nnz,i,d;
3704: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3705: const PetscInt *JJ;
3706: PetscScalar *values;
3710: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3712: PetscLayoutSetUp(B->rmap);
3713: PetscLayoutSetUp(B->cmap);
3714: m = B->rmap->n;
3715: cstart = B->cmap->rstart;
3716: cend = B->cmap->rend;
3717: rstart = B->rmap->rstart;
3719: PetscMalloc2(m,&d_nnz,m,&o_nnz);
3721: #if defined(PETSC_USE_DEBUGGING)
3722: for (i=0; i<m; i++) {
3723: nnz = Ii[i+1]- Ii[i];
3724: JJ = J + Ii[i];
3725: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3726: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3727: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3728: }
3729: #endif
3731: for (i=0; i<m; i++) {
3732: nnz = Ii[i+1]- Ii[i];
3733: JJ = J + Ii[i];
3734: nnz_max = PetscMax(nnz_max,nnz);
3735: d = 0;
3736: for (j=0; j<nnz; j++) {
3737: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3738: }
3739: d_nnz[i] = d;
3740: o_nnz[i] = nnz - d;
3741: }
3742: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3743: PetscFree2(d_nnz,o_nnz);
3745: if (v) values = (PetscScalar*)v;
3746: else {
3747: PetscCalloc1((nnz_max+1),&values);
3748: }
3750: for (i=0; i<m; i++) {
3751: ii = i + rstart;
3752: nnz = Ii[i+1]- Ii[i];
3753: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3754: }
3755: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3756: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3758: if (!v) {
3759: PetscFree(values);
3760: }
3761: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3762: return(0);
3763: }
3767: /*@
3768: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3769: (the default parallel PETSc format).
3771: Collective on MPI_Comm
3773: Input Parameters:
3774: + B - the matrix
3775: . i - the indices into j for the start of each local row (starts with zero)
3776: . j - the column indices for each local row (starts with zero)
3777: - v - optional values in the matrix
3779: Level: developer
3781: Notes:
3782: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3783: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3784: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3786: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3788: The format which is used for the sparse matrix input, is equivalent to a
3789: row-major ordering.. i.e for the following matrix, the input data expected is
3790: as shown:
3792: 1 0 0
3793: 2 0 3 P0
3794: -------
3795: 4 5 6 P1
3797: Process0 [P0]: rows_owned=[0,1]
3798: i = {0,1,3} [size = nrow+1 = 2+1]
3799: j = {0,0,2} [size = nz = 6]
3800: v = {1,2,3} [size = nz = 6]
3802: Process1 [P1]: rows_owned=[2]
3803: i = {0,3} [size = nrow+1 = 1+1]
3804: j = {0,1,2} [size = nz = 6]
3805: v = {4,5,6} [size = nz = 6]
3807: .keywords: matrix, aij, compressed row, sparse, parallel
3809: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3810: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3811: @*/
3812: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3813: {
3817: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3818: return(0);
3819: }
3823: /*@C
3824: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3825: (the default parallel PETSc format). For good matrix assembly performance
3826: the user should preallocate the matrix storage by setting the parameters
3827: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3828: performance can be increased by more than a factor of 50.
3830: Collective on MPI_Comm
3832: Input Parameters:
3833: + A - the matrix
3834: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3835: (same value is used for all local rows)
3836: . d_nnz - array containing the number of nonzeros in the various rows of the
3837: DIAGONAL portion of the local submatrix (possibly different for each row)
3838: or NULL, if d_nz is used to specify the nonzero structure.
3839: The size of this array is equal to the number of local rows, i.e 'm'.
3840: For matrices that will be factored, you must leave room for (and set)
3841: the diagonal entry even if it is zero.
3842: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3843: submatrix (same value is used for all local rows).
3844: - o_nnz - array containing the number of nonzeros in the various rows of the
3845: OFF-DIAGONAL portion of the local submatrix (possibly different for
3846: each row) or NULL, if o_nz is used to specify the nonzero
3847: structure. The size of this array is equal to the number
3848: of local rows, i.e 'm'.
3850: If the *_nnz parameter is given then the *_nz parameter is ignored
3852: The AIJ format (also called the Yale sparse matrix format or
3853: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3854: storage. The stored row and column indices begin with zero.
3855: See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3857: The parallel matrix is partitioned such that the first m0 rows belong to
3858: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3859: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3861: The DIAGONAL portion of the local submatrix of a processor can be defined
3862: as the submatrix which is obtained by extraction the part corresponding to
3863: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3864: first row that belongs to the processor, r2 is the last row belonging to
3865: the this processor, and c1-c2 is range of indices of the local part of a
3866: vector suitable for applying the matrix to. This is an mxn matrix. In the
3867: common case of a square matrix, the row and column ranges are the same and
3868: the DIAGONAL part is also square. The remaining portion of the local
3869: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3871: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3873: You can call MatGetInfo() to get information on how effective the preallocation was;
3874: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3875: You can also run with the option -info and look for messages with the string
3876: malloc in them to see if additional memory allocation was needed.
3878: Example usage:
3880: Consider the following 8x8 matrix with 34 non-zero values, that is
3881: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3882: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3883: as follows:
3885: .vb
3886: 1 2 0 | 0 3 0 | 0 4
3887: Proc0 0 5 6 | 7 0 0 | 8 0
3888: 9 0 10 | 11 0 0 | 12 0
3889: -------------------------------------
3890: 13 0 14 | 15 16 17 | 0 0
3891: Proc1 0 18 0 | 19 20 21 | 0 0
3892: 0 0 0 | 22 23 0 | 24 0
3893: -------------------------------------
3894: Proc2 25 26 27 | 0 0 28 | 29 0
3895: 30 0 0 | 31 32 33 | 0 34
3896: .ve
3898: This can be represented as a collection of submatrices as:
3900: .vb
3901: A B C
3902: D E F
3903: G H I
3904: .ve
3906: Where the submatrices A,B,C are owned by proc0, D,E,F are
3907: owned by proc1, G,H,I are owned by proc2.
3909: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3910: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3911: The 'M','N' parameters are 8,8, and have the same values on all procs.
3913: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3914: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3915: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3916: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3917: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3918: matrix, ans [DF] as another SeqAIJ matrix.
3920: When d_nz, o_nz parameters are specified, d_nz storage elements are
3921: allocated for every row of the local diagonal submatrix, and o_nz
3922: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3923: One way to choose d_nz and o_nz is to use the max nonzerors per local
3924: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3925: In this case, the values of d_nz,o_nz are:
3926: .vb
3927: proc0 : dnz = 2, o_nz = 2
3928: proc1 : dnz = 3, o_nz = 2
3929: proc2 : dnz = 1, o_nz = 4
3930: .ve
3931: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3932: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3933: for proc3. i.e we are using 12+15+10=37 storage locations to store
3934: 34 values.
3936: When d_nnz, o_nnz parameters are specified, the storage is specified
3937: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3938: In the above case the values for d_nnz,o_nnz are:
3939: .vb
3940: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3941: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3942: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3943: .ve
3944: Here the space allocated is sum of all the above values i.e 34, and
3945: hence pre-allocation is perfect.
3947: Level: intermediate
3949: .keywords: matrix, aij, compressed row, sparse, parallel
3951: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3952: MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3953: @*/
3954: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3955: {
3961: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3962: return(0);
3963: }
3967: /*@
3968: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3969: CSR format the local rows.
3971: Collective on MPI_Comm
3973: Input Parameters:
3974: + comm - MPI communicator
3975: . m - number of local rows (Cannot be PETSC_DECIDE)
3976: . n - This value should be the same as the local size used in creating the
3977: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3978: calculated if N is given) For square matrices n is almost always m.
3979: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3980: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3981: . i - row indices
3982: . j - column indices
3983: - a - matrix values
3985: Output Parameter:
3986: . mat - the matrix
3988: Level: intermediate
3990: Notes:
3991: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3992: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3993: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3995: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3997: The format which is used for the sparse matrix input, is equivalent to a
3998: row-major ordering.. i.e for the following matrix, the input data expected is
3999: as shown:
4001: 1 0 0
4002: 2 0 3 P0
4003: -------
4004: 4 5 6 P1
4006: Process0 [P0]: rows_owned=[0,1]
4007: i = {0,1,3} [size = nrow+1 = 2+1]
4008: j = {0,0,2} [size = nz = 6]
4009: v = {1,2,3} [size = nz = 6]
4011: Process1 [P1]: rows_owned=[2]
4012: i = {0,3} [size = nrow+1 = 1+1]
4013: j = {0,1,2} [size = nz = 6]
4014: v = {4,5,6} [size = nz = 6]
4016: .keywords: matrix, aij, compressed row, sparse, parallel
4018: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4019: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4020: @*/
4021: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4022: {
4026: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4027: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4028: MatCreate(comm,mat);
4029: MatSetSizes(*mat,m,n,M,N);
4030: /* MatSetBlockSizes(M,bs,cbs); */
4031: MatSetType(*mat,MATMPIAIJ);
4032: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4033: return(0);
4034: }
4038: /*@C
4039: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4040: (the default parallel PETSc format). For good matrix assembly performance
4041: the user should preallocate the matrix storage by setting the parameters
4042: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4043: performance can be increased by more than a factor of 50.
4045: Collective on MPI_Comm
4047: Input Parameters:
4048: + comm - MPI communicator
4049: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4050: This value should be the same as the local size used in creating the
4051: y vector for the matrix-vector product y = Ax.
4052: . n - This value should be the same as the local size used in creating the
4053: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4054: calculated if N is given) For square matrices n is almost always m.
4055: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4056: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4057: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4058: (same value is used for all local rows)
4059: . d_nnz - array containing the number of nonzeros in the various rows of the
4060: DIAGONAL portion of the local submatrix (possibly different for each row)
4061: or NULL, if d_nz is used to specify the nonzero structure.
4062: The size of this array is equal to the number of local rows, i.e 'm'.
4063: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4064: submatrix (same value is used for all local rows).
4065: - o_nnz - array containing the number of nonzeros in the various rows of the
4066: OFF-DIAGONAL portion of the local submatrix (possibly different for
4067: each row) or NULL, if o_nz is used to specify the nonzero
4068: structure. The size of this array is equal to the number
4069: of local rows, i.e 'm'.
4071: Output Parameter:
4072: . A - the matrix
4074: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4075: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4076: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4078: Notes:
4079: If the *_nnz parameter is given then the *_nz parameter is ignored
4081: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4082: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4083: storage requirements for this matrix.
4085: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4086: processor than it must be used on all processors that share the object for
4087: that argument.
4089: The user MUST specify either the local or global matrix dimensions
4090: (possibly both).
4092: The parallel matrix is partitioned across processors such that the
4093: first m0 rows belong to process 0, the next m1 rows belong to
4094: process 1, the next m2 rows belong to process 2 etc.. where
4095: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4096: values corresponding to [m x N] submatrix.
4098: The columns are logically partitioned with the n0 columns belonging
4099: to 0th partition, the next n1 columns belonging to the next
4100: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4102: The DIAGONAL portion of the local submatrix on any given processor
4103: is the submatrix corresponding to the rows and columns m,n
4104: corresponding to the given processor. i.e diagonal matrix on
4105: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4106: etc. The remaining portion of the local submatrix [m x (N-n)]
4107: constitute the OFF-DIAGONAL portion. The example below better
4108: illustrates this concept.
4110: For a square global matrix we define each processor's diagonal portion
4111: to be its local rows and the corresponding columns (a square submatrix);
4112: each processor's off-diagonal portion encompasses the remainder of the
4113: local matrix (a rectangular submatrix).
4115: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4117: When calling this routine with a single process communicator, a matrix of
4118: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4119: type of communicator, use the construction mechanism:
4120: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4122: By default, this format uses inodes (identical nodes) when possible.
4123: We search for consecutive rows with the same nonzero structure, thereby
4124: reusing matrix information to achieve increased efficiency.
4126: Options Database Keys:
4127: + -mat_no_inode - Do not use inodes
4128: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4129: - -mat_aij_oneindex - Internally use indexing starting at 1
4130: rather than 0. Note that when calling MatSetValues(),
4131: the user still MUST index entries starting at 0!
4134: Example usage:
4136: Consider the following 8x8 matrix with 34 non-zero values, that is
4137: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4138: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4139: as follows:
4141: .vb
4142: 1 2 0 | 0 3 0 | 0 4
4143: Proc0 0 5 6 | 7 0 0 | 8 0
4144: 9 0 10 | 11 0 0 | 12 0
4145: -------------------------------------
4146: 13 0 14 | 15 16 17 | 0 0
4147: Proc1 0 18 0 | 19 20 21 | 0 0
4148: 0 0 0 | 22 23 0 | 24 0
4149: -------------------------------------
4150: Proc2 25 26 27 | 0 0 28 | 29 0
4151: 30 0 0 | 31 32 33 | 0 34
4152: .ve
4154: This can be represented as a collection of submatrices as:
4156: .vb
4157: A B C
4158: D E F
4159: G H I
4160: .ve
4162: Where the submatrices A,B,C are owned by proc0, D,E,F are
4163: owned by proc1, G,H,I are owned by proc2.
4165: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4166: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4167: The 'M','N' parameters are 8,8, and have the same values on all procs.
4169: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4170: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4171: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4172: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4173: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4174: matrix, ans [DF] as another SeqAIJ matrix.
4176: When d_nz, o_nz parameters are specified, d_nz storage elements are
4177: allocated for every row of the local diagonal submatrix, and o_nz
4178: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4179: One way to choose d_nz and o_nz is to use the max nonzerors per local
4180: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4181: In this case, the values of d_nz,o_nz are:
4182: .vb
4183: proc0 : dnz = 2, o_nz = 2
4184: proc1 : dnz = 3, o_nz = 2
4185: proc2 : dnz = 1, o_nz = 4
4186: .ve
4187: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4188: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4189: for proc3. i.e we are using 12+15+10=37 storage locations to store
4190: 34 values.
4192: When d_nnz, o_nnz parameters are specified, the storage is specified
4193: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4194: In the above case the values for d_nnz,o_nnz are:
4195: .vb
4196: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4197: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4198: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4199: .ve
4200: Here the space allocated is sum of all the above values i.e 34, and
4201: hence pre-allocation is perfect.
4203: Level: intermediate
4205: .keywords: matrix, aij, compressed row, sparse, parallel
4207: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4208: MPIAIJ, MatCreateMPIAIJWithArrays()
4209: @*/
4210: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4211: {
4213: PetscMPIInt size;
4216: MatCreate(comm,A);
4217: MatSetSizes(*A,m,n,M,N);
4218: MPI_Comm_size(comm,&size);
4219: if (size > 1) {
4220: MatSetType(*A,MATMPIAIJ);
4221: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4222: } else {
4223: MatSetType(*A,MATSEQAIJ);
4224: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4225: }
4226: return(0);
4227: }
4231: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4232: {
4233: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4236: *Ad = a->A;
4237: *Ao = a->B;
4238: *colmap = a->garray;
4239: return(0);
4240: }
4244: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4245: {
4247: PetscInt i;
4248: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4251: if (coloring->ctype == IS_COLORING_GLOBAL) {
4252: ISColoringValue *allcolors,*colors;
4253: ISColoring ocoloring;
4255: /* set coloring for diagonal portion */
4256: MatSetColoring_SeqAIJ(a->A,coloring);
4258: /* set coloring for off-diagonal portion */
4259: ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);
4260: PetscMalloc1((a->B->cmap->n+1),&colors);
4261: for (i=0; i<a->B->cmap->n; i++) {
4262: colors[i] = allcolors[a->garray[i]];
4263: }
4264: PetscFree(allcolors);
4265: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4266: MatSetColoring_SeqAIJ(a->B,ocoloring);
4267: ISColoringDestroy(&ocoloring);
4268: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4269: ISColoringValue *colors;
4270: PetscInt *larray;
4271: ISColoring ocoloring;
4273: /* set coloring for diagonal portion */
4274: PetscMalloc1((a->A->cmap->n+1),&larray);
4275: for (i=0; i<a->A->cmap->n; i++) {
4276: larray[i] = i + A->cmap->rstart;
4277: }
4278: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);
4279: PetscMalloc1((a->A->cmap->n+1),&colors);
4280: for (i=0; i<a->A->cmap->n; i++) {
4281: colors[i] = coloring->colors[larray[i]];
4282: }
4283: PetscFree(larray);
4284: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4285: MatSetColoring_SeqAIJ(a->A,ocoloring);
4286: ISColoringDestroy(&ocoloring);
4288: /* set coloring for off-diagonal portion */
4289: PetscMalloc1((a->B->cmap->n+1),&larray);
4290: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);
4291: PetscMalloc1((a->B->cmap->n+1),&colors);
4292: for (i=0; i<a->B->cmap->n; i++) {
4293: colors[i] = coloring->colors[larray[i]];
4294: }
4295: PetscFree(larray);
4296: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4297: MatSetColoring_SeqAIJ(a->B,ocoloring);
4298: ISColoringDestroy(&ocoloring);
4299: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4300: return(0);
4301: }
4305: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4306: {
4307: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4311: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4312: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4313: return(0);
4314: }
4318: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4319: {
4321: PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4322: PetscInt *indx;
4325: /* This routine will ONLY return MPIAIJ type matrix */
4326: MatGetSize(inmat,&m,&N);
4327: MatGetBlockSizes(inmat,&bs,&cbs);
4328: if (n == PETSC_DECIDE) {
4329: PetscSplitOwnership(comm,&n,&N);
4330: }
4331: /* Check sum(n) = N */
4332: MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4333: if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4335: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4336: rstart -= m;
4338: MatPreallocateInitialize(comm,m,n,dnz,onz);
4339: for (i=0; i<m; i++) {
4340: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4341: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4342: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4343: }
4345: MatCreate(comm,outmat);
4346: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4347: MatSetBlockSizes(*outmat,bs,cbs);
4348: MatSetType(*outmat,MATMPIAIJ);
4349: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4350: MatPreallocateFinalize(dnz,onz);
4351: return(0);
4352: }
4356: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4357: {
4359: PetscInt m,N,i,rstart,nnz,Ii;
4360: PetscInt *indx;
4361: PetscScalar *values;
4364: MatGetSize(inmat,&m,&N);
4365: MatGetOwnershipRange(outmat,&rstart,NULL);
4366: for (i=0; i<m; i++) {
4367: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4368: Ii = i + rstart;
4369: MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4370: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4371: }
4372: MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);
4373: MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);
4374: return(0);
4375: }
4379: /*@
4380: MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4381: matrices from each processor
4383: Collective on MPI_Comm
4385: Input Parameters:
4386: + comm - the communicators the parallel matrix will live on
4387: . inmat - the input sequential matrices
4388: . n - number of local columns (or PETSC_DECIDE)
4389: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4391: Output Parameter:
4392: . outmat - the parallel matrix generated
4394: Level: advanced
4396: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4398: @*/
4399: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4400: {
4402: PetscMPIInt size;
4405: MPI_Comm_size(comm,&size);
4406: PetscLogEventBegin(MAT_Merge,inmat,0,0,0);
4407: if (size == 1) {
4408: if (scall == MAT_INITIAL_MATRIX) {
4409: MatDuplicate(inmat,MAT_COPY_VALUES,outmat);
4410: } else {
4411: MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
4412: }
4413: } else {
4414: if (scall == MAT_INITIAL_MATRIX) {
4415: MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);
4416: }
4417: MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);
4418: }
4419: PetscLogEventEnd(MAT_Merge,inmat,0,0,0);
4420: return(0);
4421: }
4425: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4426: {
4427: PetscErrorCode ierr;
4428: PetscMPIInt rank;
4429: PetscInt m,N,i,rstart,nnz;
4430: size_t len;
4431: const PetscInt *indx;
4432: PetscViewer out;
4433: char *name;
4434: Mat B;
4435: const PetscScalar *values;
4438: MatGetLocalSize(A,&m,0);
4439: MatGetSize(A,0,&N);
4440: /* Should this be the type of the diagonal block of A? */
4441: MatCreate(PETSC_COMM_SELF,&B);
4442: MatSetSizes(B,m,N,m,N);
4443: MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
4444: MatSetType(B,MATSEQAIJ);
4445: MatSeqAIJSetPreallocation(B,0,NULL);
4446: MatGetOwnershipRange(A,&rstart,0);
4447: for (i=0; i<m; i++) {
4448: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4449: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4450: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4451: }
4452: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4453: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4455: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
4456: PetscStrlen(outfile,&len);
4457: PetscMalloc1((len+5),&name);
4458: sprintf(name,"%s.%d",outfile,rank);
4459: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4460: PetscFree(name);
4461: MatView(B,out);
4462: PetscViewerDestroy(&out);
4463: MatDestroy(&B);
4464: return(0);
4465: }
4467: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4470: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4471: {
4472: PetscErrorCode ierr;
4473: Mat_Merge_SeqsToMPI *merge;
4474: PetscContainer container;
4477: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);
4478: if (container) {
4479: PetscContainerGetPointer(container,(void**)&merge);
4480: PetscFree(merge->id_r);
4481: PetscFree(merge->len_s);
4482: PetscFree(merge->len_r);
4483: PetscFree(merge->bi);
4484: PetscFree(merge->bj);
4485: PetscFree(merge->buf_ri[0]);
4486: PetscFree(merge->buf_ri);
4487: PetscFree(merge->buf_rj[0]);
4488: PetscFree(merge->buf_rj);
4489: PetscFree(merge->coi);
4490: PetscFree(merge->coj);
4491: PetscFree(merge->owners_co);
4492: PetscLayoutDestroy(&merge->rowmap);
4493: PetscFree(merge);
4494: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4495: }
4496: MatDestroy_MPIAIJ(A);
4497: return(0);
4498: }
4500: #include <../src/mat/utils/freespace.h>
4501: #include <petscbt.h>
4505: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4506: {
4507: PetscErrorCode ierr;
4508: MPI_Comm comm;
4509: Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data;
4510: PetscMPIInt size,rank,taga,*len_s;
4511: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4512: PetscInt proc,m;
4513: PetscInt **buf_ri,**buf_rj;
4514: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4515: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4516: MPI_Request *s_waits,*r_waits;
4517: MPI_Status *status;
4518: MatScalar *aa=a->a;
4519: MatScalar **abuf_r,*ba_i;
4520: Mat_Merge_SeqsToMPI *merge;
4521: PetscContainer container;
4524: PetscObjectGetComm((PetscObject)mpimat,&comm);
4525: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4527: MPI_Comm_size(comm,&size);
4528: MPI_Comm_rank(comm,&rank);
4530: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);
4531: PetscContainerGetPointer(container,(void**)&merge);
4533: bi = merge->bi;
4534: bj = merge->bj;
4535: buf_ri = merge->buf_ri;
4536: buf_rj = merge->buf_rj;
4538: PetscMalloc1(size,&status);
4539: owners = merge->rowmap->range;
4540: len_s = merge->len_s;
4542: /* send and recv matrix values */
4543: /*-----------------------------*/
4544: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4545: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4547: PetscMalloc1((merge->nsend+1),&s_waits);
4548: for (proc=0,k=0; proc<size; proc++) {
4549: if (!len_s[proc]) continue;
4550: i = owners[proc];
4551: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4552: k++;
4553: }
4555: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4556: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4557: PetscFree(status);
4559: PetscFree(s_waits);
4560: PetscFree(r_waits);
4562: /* insert mat values of mpimat */
4563: /*----------------------------*/
4564: PetscMalloc1(N,&ba_i);
4565: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4567: for (k=0; k<merge->nrecv; k++) {
4568: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4569: nrows = *(buf_ri_k[k]);
4570: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4571: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4572: }
4574: /* set values of ba */
4575: m = merge->rowmap->n;
4576: for (i=0; i<m; i++) {
4577: arow = owners[rank] + i;
4578: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4579: bnzi = bi[i+1] - bi[i];
4580: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4582: /* add local non-zero vals of this proc's seqmat into ba */
4583: anzi = ai[arow+1] - ai[arow];
4584: aj = a->j + ai[arow];
4585: aa = a->a + ai[arow];
4586: nextaj = 0;
4587: for (j=0; nextaj<anzi; j++) {
4588: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4589: ba_i[j] += aa[nextaj++];
4590: }
4591: }
4593: /* add received vals into ba */
4594: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4595: /* i-th row */
4596: if (i == *nextrow[k]) {
4597: anzi = *(nextai[k]+1) - *nextai[k];
4598: aj = buf_rj[k] + *(nextai[k]);
4599: aa = abuf_r[k] + *(nextai[k]);
4600: nextaj = 0;
4601: for (j=0; nextaj<anzi; j++) {
4602: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4603: ba_i[j] += aa[nextaj++];
4604: }
4605: }
4606: nextrow[k]++; nextai[k]++;
4607: }
4608: }
4609: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4610: }
4611: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4612: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4614: PetscFree(abuf_r[0]);
4615: PetscFree(abuf_r);
4616: PetscFree(ba_i);
4617: PetscFree3(buf_ri_k,nextrow,nextai);
4618: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4619: return(0);
4620: }
4622: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4626: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4627: {
4628: PetscErrorCode ierr;
4629: Mat B_mpi;
4630: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4631: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4632: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4633: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4634: PetscInt len,proc,*dnz,*onz,bs,cbs;
4635: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4636: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4637: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4638: MPI_Status *status;
4639: PetscFreeSpaceList free_space=NULL,current_space=NULL;
4640: PetscBT lnkbt;
4641: Mat_Merge_SeqsToMPI *merge;
4642: PetscContainer container;
4645: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4647: /* make sure it is a PETSc comm */
4648: PetscCommDuplicate(comm,&comm,NULL);
4649: MPI_Comm_size(comm,&size);
4650: MPI_Comm_rank(comm,&rank);
4652: PetscNew(&merge);
4653: PetscMalloc1(size,&status);
4655: /* determine row ownership */
4656: /*---------------------------------------------------------*/
4657: PetscLayoutCreate(comm,&merge->rowmap);
4658: PetscLayoutSetLocalSize(merge->rowmap,m);
4659: PetscLayoutSetSize(merge->rowmap,M);
4660: PetscLayoutSetBlockSize(merge->rowmap,1);
4661: PetscLayoutSetUp(merge->rowmap);
4662: PetscMalloc1(size,&len_si);
4663: PetscMalloc1(size,&merge->len_s);
4665: m = merge->rowmap->n;
4666: owners = merge->rowmap->range;
4668: /* determine the number of messages to send, their lengths */
4669: /*---------------------------------------------------------*/
4670: len_s = merge->len_s;
4672: len = 0; /* length of buf_si[] */
4673: merge->nsend = 0;
4674: for (proc=0; proc<size; proc++) {
4675: len_si[proc] = 0;
4676: if (proc == rank) {
4677: len_s[proc] = 0;
4678: } else {
4679: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4680: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4681: }
4682: if (len_s[proc]) {
4683: merge->nsend++;
4684: nrows = 0;
4685: for (i=owners[proc]; i<owners[proc+1]; i++) {
4686: if (ai[i+1] > ai[i]) nrows++;
4687: }
4688: len_si[proc] = 2*(nrows+1);
4689: len += len_si[proc];
4690: }
4691: }
4693: /* determine the number and length of messages to receive for ij-structure */
4694: /*-------------------------------------------------------------------------*/
4695: PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);
4696: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4698: /* post the Irecv of j-structure */
4699: /*-------------------------------*/
4700: PetscCommGetNewTag(comm,&tagj);
4701: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4703: /* post the Isend of j-structure */
4704: /*--------------------------------*/
4705: PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);
4707: for (proc=0, k=0; proc<size; proc++) {
4708: if (!len_s[proc]) continue;
4709: i = owners[proc];
4710: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4711: k++;
4712: }
4714: /* receives and sends of j-structure are complete */
4715: /*------------------------------------------------*/
4716: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4717: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4719: /* send and recv i-structure */
4720: /*---------------------------*/
4721: PetscCommGetNewTag(comm,&tagi);
4722: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4724: PetscMalloc1((len+1),&buf_s);
4725: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4726: for (proc=0,k=0; proc<size; proc++) {
4727: if (!len_s[proc]) continue;
4728: /* form outgoing message for i-structure:
4729: buf_si[0]: nrows to be sent
4730: [1:nrows]: row index (global)
4731: [nrows+1:2*nrows+1]: i-structure index
4732: */
4733: /*-------------------------------------------*/
4734: nrows = len_si[proc]/2 - 1;
4735: buf_si_i = buf_si + nrows+1;
4736: buf_si[0] = nrows;
4737: buf_si_i[0] = 0;
4738: nrows = 0;
4739: for (i=owners[proc]; i<owners[proc+1]; i++) {
4740: anzi = ai[i+1] - ai[i];
4741: if (anzi) {
4742: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4743: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4744: nrows++;
4745: }
4746: }
4747: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4748: k++;
4749: buf_si += len_si[proc];
4750: }
4752: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4753: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4755: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4756: for (i=0; i<merge->nrecv; i++) {
4757: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4758: }
4760: PetscFree(len_si);
4761: PetscFree(len_ri);
4762: PetscFree(rj_waits);
4763: PetscFree2(si_waits,sj_waits);
4764: PetscFree(ri_waits);
4765: PetscFree(buf_s);
4766: PetscFree(status);
4768: /* compute a local seq matrix in each processor */
4769: /*----------------------------------------------*/
4770: /* allocate bi array and free space for accumulating nonzero column info */
4771: PetscMalloc1((m+1),&bi);
4772: bi[0] = 0;
4774: /* create and initialize a linked list */
4775: nlnk = N+1;
4776: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4778: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4779: len = ai[owners[rank+1]] - ai[owners[rank]];
4780: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4782: current_space = free_space;
4784: /* determine symbolic info for each local row */
4785: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4787: for (k=0; k<merge->nrecv; k++) {
4788: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4789: nrows = *buf_ri_k[k];
4790: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4791: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4792: }
4794: MatPreallocateInitialize(comm,m,n,dnz,onz);
4795: len = 0;
4796: for (i=0; i<m; i++) {
4797: bnzi = 0;
4798: /* add local non-zero cols of this proc's seqmat into lnk */
4799: arow = owners[rank] + i;
4800: anzi = ai[arow+1] - ai[arow];
4801: aj = a->j + ai[arow];
4802: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4803: bnzi += nlnk;
4804: /* add received col data into lnk */
4805: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4806: if (i == *nextrow[k]) { /* i-th row */
4807: anzi = *(nextai[k]+1) - *nextai[k];
4808: aj = buf_rj[k] + *nextai[k];
4809: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4810: bnzi += nlnk;
4811: nextrow[k]++; nextai[k]++;
4812: }
4813: }
4814: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4816: /* if free space is not available, make more free space */
4817: if (current_space->local_remaining<bnzi) {
4818: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4819: nspacedouble++;
4820: }
4821: /* copy data into free space, then initialize lnk */
4822: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4823: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4825: current_space->array += bnzi;
4826: current_space->local_used += bnzi;
4827: current_space->local_remaining -= bnzi;
4829: bi[i+1] = bi[i] + bnzi;
4830: }
4832: PetscFree3(buf_ri_k,nextrow,nextai);
4834: PetscMalloc1((bi[m]+1),&bj);
4835: PetscFreeSpaceContiguous(&free_space,bj);
4836: PetscLLDestroy(lnk,lnkbt);
4838: /* create symbolic parallel matrix B_mpi */
4839: /*---------------------------------------*/
4840: MatGetBlockSizes(seqmat,&bs,&cbs);
4841: MatCreate(comm,&B_mpi);
4842: if (n==PETSC_DECIDE) {
4843: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4844: } else {
4845: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4846: }
4847: MatSetBlockSizes(B_mpi,bs,cbs);
4848: MatSetType(B_mpi,MATMPIAIJ);
4849: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4850: MatPreallocateFinalize(dnz,onz);
4851: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
4853: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4854: B_mpi->assembled = PETSC_FALSE;
4855: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4856: merge->bi = bi;
4857: merge->bj = bj;
4858: merge->buf_ri = buf_ri;
4859: merge->buf_rj = buf_rj;
4860: merge->coi = NULL;
4861: merge->coj = NULL;
4862: merge->owners_co = NULL;
4864: PetscCommDestroy(&comm);
4866: /* attach the supporting struct to B_mpi for reuse */
4867: PetscContainerCreate(PETSC_COMM_SELF,&container);
4868: PetscContainerSetPointer(container,merge);
4869: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4870: PetscContainerDestroy(&container);
4871: *mpimat = B_mpi;
4873: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4874: return(0);
4875: }
4879: /*@C
4880: MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4881: matrices from each processor
4883: Collective on MPI_Comm
4885: Input Parameters:
4886: + comm - the communicators the parallel matrix will live on
4887: . seqmat - the input sequential matrices
4888: . m - number of local rows (or PETSC_DECIDE)
4889: . n - number of local columns (or PETSC_DECIDE)
4890: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4892: Output Parameter:
4893: . mpimat - the parallel matrix generated
4895: Level: advanced
4897: Notes:
4898: The dimensions of the sequential matrix in each processor MUST be the same.
4899: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4900: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4901: @*/
4902: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4903: {
4905: PetscMPIInt size;
4908: MPI_Comm_size(comm,&size);
4909: if (size == 1) {
4910: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4911: if (scall == MAT_INITIAL_MATRIX) {
4912: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4913: } else {
4914: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4915: }
4916: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4917: return(0);
4918: }
4919: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4920: if (scall == MAT_INITIAL_MATRIX) {
4921: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4922: }
4923: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4924: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4925: return(0);
4926: }
4930: /*@
4931: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4932: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4933: with MatGetSize()
4935: Not Collective
4937: Input Parameters:
4938: + A - the matrix
4939: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4941: Output Parameter:
4942: . A_loc - the local sequential matrix generated
4944: Level: developer
4946: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4948: @*/
4949: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4950: {
4952: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4953: Mat_SeqAIJ *mat,*a,*b;
4954: PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4955: MatScalar *aa,*ba,*cam;
4956: PetscScalar *ca;
4957: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4958: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4959: PetscBool match;
4962: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4963: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4964: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4965: a = (Mat_SeqAIJ*)(mpimat->A)->data;
4966: b = (Mat_SeqAIJ*)(mpimat->B)->data;
4967: ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4968: aa = a->a; ba = b->a;
4969: if (scall == MAT_INITIAL_MATRIX) {
4970: PetscMalloc1((1+am),&ci);
4971: ci[0] = 0;
4972: for (i=0; i<am; i++) {
4973: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4974: }
4975: PetscMalloc1((1+ci[am]),&cj);
4976: PetscMalloc1((1+ci[am]),&ca);
4977: k = 0;
4978: for (i=0; i<am; i++) {
4979: ncols_o = bi[i+1] - bi[i];
4980: ncols_d = ai[i+1] - ai[i];
4981: /* off-diagonal portion of A */
4982: for (jo=0; jo<ncols_o; jo++) {
4983: col = cmap[*bj];
4984: if (col >= cstart) break;
4985: cj[k] = col; bj++;
4986: ca[k++] = *ba++;
4987: }
4988: /* diagonal portion of A */
4989: for (j=0; j<ncols_d; j++) {
4990: cj[k] = cstart + *aj++;
4991: ca[k++] = *aa++;
4992: }
4993: /* off-diagonal portion of A */
4994: for (j=jo; j<ncols_o; j++) {
4995: cj[k] = cmap[*bj++];
4996: ca[k++] = *ba++;
4997: }
4998: }
4999: /* put together the new matrix */
5000: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
5001: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5002: /* Since these are PETSc arrays, change flags to free them as necessary. */
5003: mat = (Mat_SeqAIJ*)(*A_loc)->data;
5004: mat->free_a = PETSC_TRUE;
5005: mat->free_ij = PETSC_TRUE;
5006: mat->nonew = 0;
5007: } else if (scall == MAT_REUSE_MATRIX) {
5008: mat=(Mat_SeqAIJ*)(*A_loc)->data;
5009: ci = mat->i; cj = mat->j; cam = mat->a;
5010: for (i=0; i<am; i++) {
5011: /* off-diagonal portion of A */
5012: ncols_o = bi[i+1] - bi[i];
5013: for (jo=0; jo<ncols_o; jo++) {
5014: col = cmap[*bj];
5015: if (col >= cstart) break;
5016: *cam++ = *ba++; bj++;
5017: }
5018: /* diagonal portion of A */
5019: ncols_d = ai[i+1] - ai[i];
5020: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5021: /* off-diagonal portion of A */
5022: for (j=jo; j<ncols_o; j++) {
5023: *cam++ = *ba++; bj++;
5024: }
5025: }
5026: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5027: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
5028: return(0);
5029: }
5033: /*@C
5034: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5036: Not Collective
5038: Input Parameters:
5039: + A - the matrix
5040: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5041: - row, col - index sets of rows and columns to extract (or NULL)
5043: Output Parameter:
5044: . A_loc - the local sequential matrix generated
5046: Level: developer
5048: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5050: @*/
5051: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5052: {
5053: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5055: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5056: IS isrowa,iscola;
5057: Mat *aloc;
5058: PetscBool match;
5061: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5062: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5063: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5064: if (!row) {
5065: start = A->rmap->rstart; end = A->rmap->rend;
5066: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5067: } else {
5068: isrowa = *row;
5069: }
5070: if (!col) {
5071: start = A->cmap->rstart;
5072: cmap = a->garray;
5073: nzA = a->A->cmap->n;
5074: nzB = a->B->cmap->n;
5075: PetscMalloc1((nzA+nzB), &idx);
5076: ncols = 0;
5077: for (i=0; i<nzB; i++) {
5078: if (cmap[i] < start) idx[ncols++] = cmap[i];
5079: else break;
5080: }
5081: imark = i;
5082: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5083: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5084: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5085: } else {
5086: iscola = *col;
5087: }
5088: if (scall != MAT_INITIAL_MATRIX) {
5089: PetscMalloc(sizeof(Mat),&aloc);
5090: aloc[0] = *A_loc;
5091: }
5092: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5093: *A_loc = aloc[0];
5094: PetscFree(aloc);
5095: if (!row) {
5096: ISDestroy(&isrowa);
5097: }
5098: if (!col) {
5099: ISDestroy(&iscola);
5100: }
5101: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5102: return(0);
5103: }
5107: /*@C
5108: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5110: Collective on Mat
5112: Input Parameters:
5113: + A,B - the matrices in mpiaij format
5114: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5115: - rowb, colb - index sets of rows and columns of B to extract (or NULL)
5117: Output Parameter:
5118: + rowb, colb - index sets of rows and columns of B to extract
5119: - B_seq - the sequential matrix generated
5121: Level: developer
5123: @*/
5124: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5125: {
5126: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5128: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5129: IS isrowb,iscolb;
5130: Mat *bseq=NULL;
5133: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5134: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5135: }
5136: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5138: if (scall == MAT_INITIAL_MATRIX) {
5139: start = A->cmap->rstart;
5140: cmap = a->garray;
5141: nzA = a->A->cmap->n;
5142: nzB = a->B->cmap->n;
5143: PetscMalloc1((nzA+nzB), &idx);
5144: ncols = 0;
5145: for (i=0; i<nzB; i++) { /* row < local row index */
5146: if (cmap[i] < start) idx[ncols++] = cmap[i];
5147: else break;
5148: }
5149: imark = i;
5150: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5151: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5152: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5153: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5154: } else {
5155: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5156: isrowb = *rowb; iscolb = *colb;
5157: PetscMalloc(sizeof(Mat),&bseq);
5158: bseq[0] = *B_seq;
5159: }
5160: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5161: *B_seq = bseq[0];
5162: PetscFree(bseq);
5163: if (!rowb) {
5164: ISDestroy(&isrowb);
5165: } else {
5166: *rowb = isrowb;
5167: }
5168: if (!colb) {
5169: ISDestroy(&iscolb);
5170: } else {
5171: *colb = iscolb;
5172: }
5173: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5174: return(0);
5175: }
5179: /*
5180: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5181: of the OFF-DIAGONAL portion of local A
5183: Collective on Mat
5185: Input Parameters:
5186: + A,B - the matrices in mpiaij format
5187: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5189: Output Parameter:
5190: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5191: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5192: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5193: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5195: Level: developer
5197: */
5198: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5199: {
5200: VecScatter_MPI_General *gen_to,*gen_from;
5201: PetscErrorCode ierr;
5202: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5203: Mat_SeqAIJ *b_oth;
5204: VecScatter ctx =a->Mvctx;
5205: MPI_Comm comm;
5206: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5207: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5208: PetscScalar *rvalues,*svalues;
5209: MatScalar *b_otha,*bufa,*bufA;
5210: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5211: MPI_Request *rwaits = NULL,*swaits = NULL;
5212: MPI_Status *sstatus,rstatus;
5213: PetscMPIInt jj;
5214: PetscInt *cols,sbs,rbs;
5215: PetscScalar *vals;
5218: PetscObjectGetComm((PetscObject)A,&comm);
5219: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5220: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5221: }
5222: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5223: MPI_Comm_rank(comm,&rank);
5225: gen_to = (VecScatter_MPI_General*)ctx->todata;
5226: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5227: rvalues = gen_from->values; /* holds the length of receiving row */
5228: svalues = gen_to->values; /* holds the length of sending row */
5229: nrecvs = gen_from->n;
5230: nsends = gen_to->n;
5232: PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);
5233: srow = gen_to->indices; /* local row index to be sent */
5234: sstarts = gen_to->starts;
5235: sprocs = gen_to->procs;
5236: sstatus = gen_to->sstatus;
5237: sbs = gen_to->bs;
5238: rstarts = gen_from->starts;
5239: rprocs = gen_from->procs;
5240: rbs = gen_from->bs;
5242: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5243: if (scall == MAT_INITIAL_MATRIX) {
5244: /* i-array */
5245: /*---------*/
5246: /* post receives */
5247: for (i=0; i<nrecvs; i++) {
5248: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5249: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5250: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5251: }
5253: /* pack the outgoing message */
5254: PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);
5256: sstartsj[0] = 0;
5257: rstartsj[0] = 0;
5258: len = 0; /* total length of j or a array to be sent */
5259: k = 0;
5260: for (i=0; i<nsends; i++) {
5261: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5262: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5263: for (j=0; j<nrows; j++) {
5264: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5265: for (l=0; l<sbs; l++) {
5266: MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL); /* rowlength */
5268: rowlen[j*sbs+l] = ncols;
5270: len += ncols;
5271: MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);
5272: }
5273: k++;
5274: }
5275: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5277: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5278: }
5279: /* recvs and sends of i-array are completed */
5280: i = nrecvs;
5281: while (i--) {
5282: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5283: }
5284: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5286: /* allocate buffers for sending j and a arrays */
5287: PetscMalloc1((len+1),&bufj);
5288: PetscMalloc1((len+1),&bufa);
5290: /* create i-array of B_oth */
5291: PetscMalloc1((aBn+2),&b_othi);
5293: b_othi[0] = 0;
5294: len = 0; /* total length of j or a array to be received */
5295: k = 0;
5296: for (i=0; i<nrecvs; i++) {
5297: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5298: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5299: for (j=0; j<nrows; j++) {
5300: b_othi[k+1] = b_othi[k] + rowlen[j];
5301: len += rowlen[j]; k++;
5302: }
5303: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5304: }
5306: /* allocate space for j and a arrrays of B_oth */
5307: PetscMalloc1((b_othi[aBn]+1),&b_othj);
5308: PetscMalloc1((b_othi[aBn]+1),&b_otha);
5310: /* j-array */
5311: /*---------*/
5312: /* post receives of j-array */
5313: for (i=0; i<nrecvs; i++) {
5314: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5315: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5316: }
5318: /* pack the outgoing message j-array */
5319: k = 0;
5320: for (i=0; i<nsends; i++) {
5321: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5322: bufJ = bufj+sstartsj[i];
5323: for (j=0; j<nrows; j++) {
5324: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5325: for (ll=0; ll<sbs; ll++) {
5326: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5327: for (l=0; l<ncols; l++) {
5328: *bufJ++ = cols[l];
5329: }
5330: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5331: }
5332: }
5333: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5334: }
5336: /* recvs and sends of j-array are completed */
5337: i = nrecvs;
5338: while (i--) {
5339: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5340: }
5341: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5342: } else if (scall == MAT_REUSE_MATRIX) {
5343: sstartsj = *startsj_s;
5344: rstartsj = *startsj_r;
5345: bufa = *bufa_ptr;
5346: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5347: b_otha = b_oth->a;
5348: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5350: /* a-array */
5351: /*---------*/
5352: /* post receives of a-array */
5353: for (i=0; i<nrecvs; i++) {
5354: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5355: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5356: }
5358: /* pack the outgoing message a-array */
5359: k = 0;
5360: for (i=0; i<nsends; i++) {
5361: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5362: bufA = bufa+sstartsj[i];
5363: for (j=0; j<nrows; j++) {
5364: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5365: for (ll=0; ll<sbs; ll++) {
5366: MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5367: for (l=0; l<ncols; l++) {
5368: *bufA++ = vals[l];
5369: }
5370: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5371: }
5372: }
5373: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5374: }
5375: /* recvs and sends of a-array are completed */
5376: i = nrecvs;
5377: while (i--) {
5378: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5379: }
5380: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5381: PetscFree2(rwaits,swaits);
5383: if (scall == MAT_INITIAL_MATRIX) {
5384: /* put together the new matrix */
5385: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5387: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5388: /* Since these are PETSc arrays, change flags to free them as necessary. */
5389: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5390: b_oth->free_a = PETSC_TRUE;
5391: b_oth->free_ij = PETSC_TRUE;
5392: b_oth->nonew = 0;
5394: PetscFree(bufj);
5395: if (!startsj_s || !bufa_ptr) {
5396: PetscFree2(sstartsj,rstartsj);
5397: PetscFree(bufa_ptr);
5398: } else {
5399: *startsj_s = sstartsj;
5400: *startsj_r = rstartsj;
5401: *bufa_ptr = bufa;
5402: }
5403: }
5404: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5405: return(0);
5406: }
5410: /*@C
5411: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5413: Not Collective
5415: Input Parameters:
5416: . A - The matrix in mpiaij format
5418: Output Parameter:
5419: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5420: . colmap - A map from global column index to local index into lvec
5421: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5423: Level: developer
5425: @*/
5426: #if defined(PETSC_USE_CTABLE)
5427: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5428: #else
5429: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5430: #endif
5431: {
5432: Mat_MPIAIJ *a;
5439: a = (Mat_MPIAIJ*) A->data;
5440: if (lvec) *lvec = a->lvec;
5441: if (colmap) *colmap = a->colmap;
5442: if (multScatter) *multScatter = a->Mvctx;
5443: return(0);
5444: }
5446: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5447: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5448: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5452: /*
5453: Computes (B'*A')' since computing B*A directly is untenable
5455: n p p
5456: ( ) ( ) ( )
5457: m ( A ) * n ( B ) = m ( C )
5458: ( ) ( ) ( )
5460: */
5461: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5462: {
5464: Mat At,Bt,Ct;
5467: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5468: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5469: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5470: MatDestroy(&At);
5471: MatDestroy(&Bt);
5472: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5473: MatDestroy(&Ct);
5474: return(0);
5475: }
5479: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5480: {
5482: PetscInt m=A->rmap->n,n=B->cmap->n;
5483: Mat Cmat;
5486: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5487: MatCreate(PetscObjectComm((PetscObject)A),&Cmat);
5488: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5489: MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);
5490: MatSetType(Cmat,MATMPIDENSE);
5491: MatMPIDenseSetPreallocation(Cmat,NULL);
5492: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5493: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5495: Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5497: *C = Cmat;
5498: return(0);
5499: }
5501: /* ----------------------------------------------------------------*/
5504: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5505: {
5509: if (scall == MAT_INITIAL_MATRIX) {
5510: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
5511: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5512: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
5513: }
5514: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
5515: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5516: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
5517: return(0);
5518: }
5520: #if defined(PETSC_HAVE_MUMPS)
5521: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5522: #endif
5523: #if defined(PETSC_HAVE_PASTIX)
5524: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5525: #endif
5526: #if defined(PETSC_HAVE_SUPERLU_DIST)
5527: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5528: #endif
5529: #if defined(PETSC_HAVE_CLIQUE)
5530: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5531: #endif
5533: /*MC
5534: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5536: Options Database Keys:
5537: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5539: Level: beginner
5541: .seealso: MatCreateAIJ()
5542: M*/
5546: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5547: {
5548: Mat_MPIAIJ *b;
5550: PetscMPIInt size;
5553: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
5555: PetscNewLog(B,&b);
5556: B->data = (void*)b;
5557: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5558: B->assembled = PETSC_FALSE;
5559: B->insertmode = NOT_SET_VALUES;
5560: b->size = size;
5562: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
5564: /* build cache for off array entries formed */
5565: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
5567: b->donotstash = PETSC_FALSE;
5568: b->colmap = 0;
5569: b->garray = 0;
5570: b->roworiented = PETSC_TRUE;
5572: /* stuff used for matrix vector multiply */
5573: b->lvec = NULL;
5574: b->Mvctx = NULL;
5576: /* stuff for MatGetRow() */
5577: b->rowindices = 0;
5578: b->rowvalues = 0;
5579: b->getrowactive = PETSC_FALSE;
5581: /* flexible pointer used in CUSP/CUSPARSE classes */
5582: b->spptr = NULL;
5584: #if defined(PETSC_HAVE_MUMPS)
5585: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);
5586: #endif
5587: #if defined(PETSC_HAVE_PASTIX)
5588: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);
5589: #endif
5590: #if defined(PETSC_HAVE_SUPERLU_DIST)
5591: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);
5592: #endif
5593: #if defined(PETSC_HAVE_CLIQUE)
5594: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);
5595: #endif
5596: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);
5597: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);
5598: PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);
5599: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);
5600: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);
5601: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);
5602: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);
5603: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);
5604: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);
5605: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);
5606: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);
5607: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);
5608: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);
5609: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5610: return(0);
5611: }
5615: /*@
5616: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5617: and "off-diagonal" part of the matrix in CSR format.
5619: Collective on MPI_Comm
5621: Input Parameters:
5622: + comm - MPI communicator
5623: . m - number of local rows (Cannot be PETSC_DECIDE)
5624: . n - This value should be the same as the local size used in creating the
5625: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5626: calculated if N is given) For square matrices n is almost always m.
5627: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5628: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5629: . i - row indices for "diagonal" portion of matrix
5630: . j - column indices
5631: . a - matrix values
5632: . oi - row indices for "off-diagonal" portion of matrix
5633: . oj - column indices
5634: - oa - matrix values
5636: Output Parameter:
5637: . mat - the matrix
5639: Level: advanced
5641: Notes:
5642: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5643: must free the arrays once the matrix has been destroyed and not before.
5645: The i and j indices are 0 based
5647: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5649: This sets local rows and cannot be used to set off-processor values.
5651: Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5652: legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5653: not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5654: the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5655: keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5656: communication if it is known that only local entries will be set.
5658: .keywords: matrix, aij, compressed row, sparse, parallel
5660: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5661: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5662: @*/
5663: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5664: {
5666: Mat_MPIAIJ *maij;
5669: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5670: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5671: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5672: MatCreate(comm,mat);
5673: MatSetSizes(*mat,m,n,M,N);
5674: MatSetType(*mat,MATMPIAIJ);
5675: maij = (Mat_MPIAIJ*) (*mat)->data;
5677: (*mat)->preallocated = PETSC_TRUE;
5679: PetscLayoutSetUp((*mat)->rmap);
5680: PetscLayoutSetUp((*mat)->cmap);
5682: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5683: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5685: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5686: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5687: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5688: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5690: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5691: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5692: MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
5693: return(0);
5694: }
5696: /*
5697: Special version for direct calls from Fortran
5698: */
5699: #include <petsc-private/fortranimpl.h>
5701: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5702: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5703: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5704: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5705: #endif
5707: /* Change these macros so can be used in void function */
5708: #undef CHKERRQ
5709: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5710: #undef SETERRQ2
5711: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5712: #undef SETERRQ3
5713: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5714: #undef SETERRQ
5715: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5719: PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5720: {
5721: Mat mat = *mmat;
5722: PetscInt m = *mm, n = *mn;
5723: InsertMode addv = *maddv;
5724: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5725: PetscScalar value;
5728: MatCheckPreallocated(mat,1);
5729: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5731: #if defined(PETSC_USE_DEBUG)
5732: else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5733: #endif
5734: {
5735: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5736: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5737: PetscBool roworiented = aij->roworiented;
5739: /* Some Variables required in the macro */
5740: Mat A = aij->A;
5741: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5742: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5743: MatScalar *aa = a->a;
5744: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5745: Mat B = aij->B;
5746: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5747: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5748: MatScalar *ba = b->a;
5750: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5751: PetscInt nonew = a->nonew;
5752: MatScalar *ap1,*ap2;
5755: for (i=0; i<m; i++) {
5756: if (im[i] < 0) continue;
5757: #if defined(PETSC_USE_DEBUG)
5758: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5759: #endif
5760: if (im[i] >= rstart && im[i] < rend) {
5761: row = im[i] - rstart;
5762: lastcol1 = -1;
5763: rp1 = aj + ai[row];
5764: ap1 = aa + ai[row];
5765: rmax1 = aimax[row];
5766: nrow1 = ailen[row];
5767: low1 = 0;
5768: high1 = nrow1;
5769: lastcol2 = -1;
5770: rp2 = bj + bi[row];
5771: ap2 = ba + bi[row];
5772: rmax2 = bimax[row];
5773: nrow2 = bilen[row];
5774: low2 = 0;
5775: high2 = nrow2;
5777: for (j=0; j<n; j++) {
5778: if (roworiented) value = v[i*n+j];
5779: else value = v[i+j*m];
5780: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5781: if (in[j] >= cstart && in[j] < cend) {
5782: col = in[j] - cstart;
5783: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5784: } else if (in[j] < 0) continue;
5785: #if defined(PETSC_USE_DEBUG)
5786: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5787: #endif
5788: else {
5789: if (mat->was_assembled) {
5790: if (!aij->colmap) {
5791: MatCreateColmap_MPIAIJ_Private(mat);
5792: }
5793: #if defined(PETSC_USE_CTABLE)
5794: PetscTableFind(aij->colmap,in[j]+1,&col);
5795: col--;
5796: #else
5797: col = aij->colmap[in[j]] - 1;
5798: #endif
5799: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5800: MatDisAssemble_MPIAIJ(mat);
5801: col = in[j];
5802: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5803: B = aij->B;
5804: b = (Mat_SeqAIJ*)B->data;
5805: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5806: rp2 = bj + bi[row];
5807: ap2 = ba + bi[row];
5808: rmax2 = bimax[row];
5809: nrow2 = bilen[row];
5810: low2 = 0;
5811: high2 = nrow2;
5812: bm = aij->B->rmap->n;
5813: ba = b->a;
5814: }
5815: } else col = in[j];
5816: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5817: }
5818: }
5819: } else if (!aij->donotstash) {
5820: if (roworiented) {
5821: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5822: } else {
5823: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5824: }
5825: }
5826: }
5827: }
5828: PetscFunctionReturnVoid();
5829: }