Actual source code: mpibaij.c
petsc-dev 2014-02-02
2: #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3: #include <petscblaslapack.h>
4: #include <petscsf.h>
6: extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7: extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8: extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10: extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11: extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12: extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13: extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
17: PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18: {
19: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
21: PetscInt i,*idxb = 0;
22: PetscScalar *va,*vb;
23: Vec vtmp;
26: MatGetRowMaxAbs(a->A,v,idx);
27: VecGetArray(v,&va);
28: if (idx) {
29: for (i=0; i<A->rmap->n; i++) {
30: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31: }
32: }
34: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
35: if (idx) {PetscMalloc1(A->rmap->n,&idxb);}
36: MatGetRowMaxAbs(a->B,vtmp,idxb);
37: VecGetArray(vtmp,&vb);
39: for (i=0; i<A->rmap->n; i++) {
40: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41: va[i] = vb[i];
42: if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43: }
44: }
46: VecRestoreArray(v,&va);
47: VecRestoreArray(vtmp,&vb);
48: PetscFree(idxb);
49: VecDestroy(&vtmp);
50: return(0);
51: }
55: PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
56: {
57: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
61: MatStoreValues(aij->A);
62: MatStoreValues(aij->B);
63: return(0);
64: }
68: PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
69: {
70: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
74: MatRetrieveValues(aij->A);
75: MatRetrieveValues(aij->B);
76: return(0);
77: }
79: /*
80: Local utility routine that creates a mapping from the global column
81: number to the local number in the off-diagonal part of the local
82: storage of the matrix. This is done in a non scalable way since the
83: length of colmap equals the global matrix length.
84: */
87: PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88: {
89: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
90: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
92: PetscInt nbs = B->nbs,i,bs=mat->rmap->bs;
95: #if defined(PETSC_USE_CTABLE)
96: PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);
97: for (i=0; i<nbs; i++) {
98: PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);
99: }
100: #else
101: PetscMalloc1((baij->Nbs+1),&baij->colmap);
102: PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));
103: PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));
104: for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105: #endif
106: return(0);
107: }
109: #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110: { \
111: \
112: brow = row/bs; \
113: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114: rmax = aimax[brow]; nrow = ailen[brow]; \
115: bcol = col/bs; \
116: ridx = row % bs; cidx = col % bs; \
117: low = 0; high = nrow; \
118: while (high-low > 3) { \
119: t = (low+high)/2; \
120: if (rp[t] > bcol) high = t; \
121: else low = t; \
122: } \
123: for (_i=low; _i<high; _i++) { \
124: if (rp[_i] > bcol) break; \
125: if (rp[_i] == bcol) { \
126: bap = ap + bs2*_i + bs*cidx + ridx; \
127: if (addv == ADD_VALUES) *bap += value; \
128: else *bap = value; \
129: goto a_noinsert; \
130: } \
131: } \
132: if (a->nonew == 1) goto a_noinsert; \
133: if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135: N = nrow++ - 1; \
136: /* shift up all the later entries in this row */ \
137: for (ii=N; ii>=_i; ii--) { \
138: rp[ii+1] = rp[ii]; \
139: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
140: } \
141: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); } \
142: rp[_i] = bcol; \
143: ap[bs2*_i + bs*cidx + ridx] = value; \
144: a_noinsert:; \
145: ailen[brow] = nrow; \
146: }
148: #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149: { \
150: brow = row/bs; \
151: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152: rmax = bimax[brow]; nrow = bilen[brow]; \
153: bcol = col/bs; \
154: ridx = row % bs; cidx = col % bs; \
155: low = 0; high = nrow; \
156: while (high-low > 3) { \
157: t = (low+high)/2; \
158: if (rp[t] > bcol) high = t; \
159: else low = t; \
160: } \
161: for (_i=low; _i<high; _i++) { \
162: if (rp[_i] > bcol) break; \
163: if (rp[_i] == bcol) { \
164: bap = ap + bs2*_i + bs*cidx + ridx; \
165: if (addv == ADD_VALUES) *bap += value; \
166: else *bap = value; \
167: goto b_noinsert; \
168: } \
169: } \
170: if (b->nonew == 1) goto b_noinsert; \
171: if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173: N = nrow++ - 1; \
174: /* shift up all the later entries in this row */ \
175: for (ii=N; ii>=_i; ii--) { \
176: rp[ii+1] = rp[ii]; \
177: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
178: } \
179: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));} \
180: rp[_i] = bcol; \
181: ap[bs2*_i + bs*cidx + ridx] = value; \
182: b_noinsert:; \
183: bilen[brow] = nrow; \
184: }
188: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189: {
190: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
191: MatScalar value;
192: PetscBool roworiented = baij->roworiented;
194: PetscInt i,j,row,col;
195: PetscInt rstart_orig=mat->rmap->rstart;
196: PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197: PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
199: /* Some Variables required in the macro */
200: Mat A = baij->A;
201: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
202: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203: MatScalar *aa =a->a;
205: Mat B = baij->B;
206: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
207: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208: MatScalar *ba =b->a;
210: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
211: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
212: MatScalar *ap,*bap;
216: for (i=0; i<m; i++) {
217: if (im[i] < 0) continue;
218: #if defined(PETSC_USE_DEBUG)
219: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220: #endif
221: if (im[i] >= rstart_orig && im[i] < rend_orig) {
222: row = im[i] - rstart_orig;
223: for (j=0; j<n; j++) {
224: if (in[j] >= cstart_orig && in[j] < cend_orig) {
225: col = in[j] - cstart_orig;
226: if (roworiented) value = v[i*n+j];
227: else value = v[i+j*m];
228: MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
230: } else if (in[j] < 0) continue;
231: #if defined(PETSC_USE_DEBUG)
232: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233: #endif
234: else {
235: if (mat->was_assembled) {
236: if (!baij->colmap) {
237: MatCreateColmap_MPIBAIJ_Private(mat);
238: }
239: #if defined(PETSC_USE_CTABLE)
240: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
241: col = col - 1;
242: #else
243: col = baij->colmap[in[j]/bs] - 1;
244: #endif
245: if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246: MatDisAssemble_MPIBAIJ(mat);
247: col = in[j];
248: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249: B = baij->B;
250: b = (Mat_SeqBAIJ*)(B)->data;
251: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252: ba =b->a;
253: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254: else col += in[j]%bs;
255: } else col = in[j];
256: if (roworiented) value = v[i*n+j];
257: else value = v[i+j*m];
258: MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
260: }
261: }
262: } else {
263: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264: if (!baij->donotstash) {
265: mat->assembled = PETSC_FALSE;
266: if (roworiented) {
267: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
268: } else {
269: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
270: }
271: }
272: }
273: }
274: return(0);
275: }
279: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280: {
281: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
282: const PetscScalar *value;
283: MatScalar *barray = baij->barray;
284: PetscBool roworiented = baij->roworiented;
285: PetscErrorCode ierr;
286: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
287: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
291: if (!barray) {
292: PetscMalloc1(bs2,&barray);
293: baij->barray = barray;
294: }
296: if (roworiented) stepval = (n-1)*bs;
297: else stepval = (m-1)*bs;
299: for (i=0; i<m; i++) {
300: if (im[i] < 0) continue;
301: #if defined(PETSC_USE_DEBUG)
302: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303: #endif
304: if (im[i] >= rstart && im[i] < rend) {
305: row = im[i] - rstart;
306: for (j=0; j<n; j++) {
307: /* If NumCol = 1 then a copy is not required */
308: if ((roworiented) && (n == 1)) {
309: barray = (MatScalar*)v + i*bs2;
310: } else if ((!roworiented) && (m == 1)) {
311: barray = (MatScalar*)v + j*bs2;
312: } else { /* Here a copy is required */
313: if (roworiented) {
314: value = v + (i*(stepval+bs) + j)*bs;
315: } else {
316: value = v + (j*(stepval+bs) + i)*bs;
317: }
318: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319: for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320: barray += bs;
321: }
322: barray -= bs2;
323: }
325: if (in[j] >= cstart && in[j] < cend) {
326: col = in[j] - cstart;
327: MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);
328: } else if (in[j] < 0) continue;
329: #if defined(PETSC_USE_DEBUG)
330: else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331: #endif
332: else {
333: if (mat->was_assembled) {
334: if (!baij->colmap) {
335: MatCreateColmap_MPIBAIJ_Private(mat);
336: }
338: #if defined(PETSC_USE_DEBUG)
339: #if defined(PETSC_USE_CTABLE)
340: { PetscInt data;
341: PetscTableFind(baij->colmap,in[j]+1,&data);
342: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343: }
344: #else
345: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346: #endif
347: #endif
348: #if defined(PETSC_USE_CTABLE)
349: PetscTableFind(baij->colmap,in[j]+1,&col);
350: col = (col - 1)/bs;
351: #else
352: col = (baij->colmap[in[j]] - 1)/bs;
353: #endif
354: if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355: MatDisAssemble_MPIBAIJ(mat);
356: col = in[j];
357: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358: } else col = in[j];
359: MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
360: }
361: }
362: } else {
363: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364: if (!baij->donotstash) {
365: if (roworiented) {
366: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
367: } else {
368: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
369: }
370: }
371: }
372: }
373: return(0);
374: }
376: #define HASH_KEY 0.6180339887
377: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383: {
384: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
385: PetscBool roworiented = baij->roworiented;
387: PetscInt i,j,row,col;
388: PetscInt rstart_orig=mat->rmap->rstart;
389: PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs;
390: PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391: PetscReal tmp;
392: MatScalar **HD = baij->hd,value;
393: #if defined(PETSC_USE_DEBUG)
394: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395: #endif
399: for (i=0; i<m; i++) {
400: #if defined(PETSC_USE_DEBUG)
401: if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403: #endif
404: row = im[i];
405: if (row >= rstart_orig && row < rend_orig) {
406: for (j=0; j<n; j++) {
407: col = in[j];
408: if (roworiented) value = v[i*n+j];
409: else value = v[i+j*m];
410: /* Look up PetscInto the Hash Table */
411: key = (row/bs)*Nbs+(col/bs)+1;
412: h1 = HASH(size,key,tmp);
415: idx = h1;
416: #if defined(PETSC_USE_DEBUG)
417: insert_ct++;
418: total_ct++;
419: if (HT[idx] != key) {
420: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421: if (idx == size) {
422: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424: }
425: }
426: #else
427: if (HT[idx] != key) {
428: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429: if (idx == size) {
430: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432: }
433: }
434: #endif
435: /* A HASH table entry is found, so insert the values at the correct address */
436: if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437: else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
438: }
439: } else if (!baij->donotstash) {
440: if (roworiented) {
441: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
442: } else {
443: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
444: }
445: }
446: }
447: #if defined(PETSC_USE_DEBUG)
448: baij->ht_total_ct = total_ct;
449: baij->ht_insert_ct = insert_ct;
450: #endif
451: return(0);
452: }
456: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457: {
458: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
459: PetscBool roworiented = baij->roworiented;
460: PetscErrorCode ierr;
461: PetscInt i,j,ii,jj,row,col;
462: PetscInt rstart=baij->rstartbs;
463: PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464: PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465: PetscReal tmp;
466: MatScalar **HD = baij->hd,*baij_a;
467: const PetscScalar *v_t,*value;
468: #if defined(PETSC_USE_DEBUG)
469: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470: #endif
473: if (roworiented) stepval = (n-1)*bs;
474: else stepval = (m-1)*bs;
476: for (i=0; i<m; i++) {
477: #if defined(PETSC_USE_DEBUG)
478: if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480: #endif
481: row = im[i];
482: v_t = v + i*nbs2;
483: if (row >= rstart && row < rend) {
484: for (j=0; j<n; j++) {
485: col = in[j];
487: /* Look up into the Hash Table */
488: key = row*Nbs+col+1;
489: h1 = HASH(size,key,tmp);
491: idx = h1;
492: #if defined(PETSC_USE_DEBUG)
493: total_ct++;
494: insert_ct++;
495: if (HT[idx] != key) {
496: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497: if (idx == size) {
498: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500: }
501: }
502: #else
503: if (HT[idx] != key) {
504: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505: if (idx == size) {
506: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508: }
509: }
510: #endif
511: baij_a = HD[idx];
512: if (roworiented) {
513: /*value = v + i*(stepval+bs)*bs + j*bs;*/
514: /* value = v + (i*(stepval+bs)+j)*bs; */
515: value = v_t;
516: v_t += bs;
517: if (addv == ADD_VALUES) {
518: for (ii=0; ii<bs; ii++,value+=stepval) {
519: for (jj=ii; jj<bs2; jj+=bs) {
520: baij_a[jj] += *value++;
521: }
522: }
523: } else {
524: for (ii=0; ii<bs; ii++,value+=stepval) {
525: for (jj=ii; jj<bs2; jj+=bs) {
526: baij_a[jj] = *value++;
527: }
528: }
529: }
530: } else {
531: value = v + j*(stepval+bs)*bs + i*bs;
532: if (addv == ADD_VALUES) {
533: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534: for (jj=0; jj<bs; jj++) {
535: baij_a[jj] += *value++;
536: }
537: }
538: } else {
539: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540: for (jj=0; jj<bs; jj++) {
541: baij_a[jj] = *value++;
542: }
543: }
544: }
545: }
546: }
547: } else {
548: if (!baij->donotstash) {
549: if (roworiented) {
550: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
551: } else {
552: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
553: }
554: }
555: }
556: }
557: #if defined(PETSC_USE_DEBUG)
558: baij->ht_total_ct = total_ct;
559: baij->ht_insert_ct = insert_ct;
560: #endif
561: return(0);
562: }
566: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567: {
568: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
570: PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
574: for (i=0; i<m; i++) {
575: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578: row = idxm[i] - bsrstart;
579: for (j=0; j<n; j++) {
580: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582: if (idxn[j] >= bscstart && idxn[j] < bscend) {
583: col = idxn[j] - bscstart;
584: MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
585: } else {
586: if (!baij->colmap) {
587: MatCreateColmap_MPIBAIJ_Private(mat);
588: }
589: #if defined(PETSC_USE_CTABLE)
590: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
591: data--;
592: #else
593: data = baij->colmap[idxn[j]/bs]-1;
594: #endif
595: if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596: else {
597: col = data + idxn[j]%bs;
598: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
599: }
600: }
601: }
602: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603: }
604: return(0);
605: }
609: PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610: {
611: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
612: Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
614: PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615: PetscReal sum = 0.0;
616: MatScalar *v;
619: if (baij->size == 1) {
620: MatNorm(baij->A,type,nrm);
621: } else {
622: if (type == NORM_FROBENIUS) {
623: v = amat->a;
624: nz = amat->nz*bs2;
625: for (i=0; i<nz; i++) {
626: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627: }
628: v = bmat->a;
629: nz = bmat->nz*bs2;
630: for (i=0; i<nz; i++) {
631: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632: }
633: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
634: *nrm = PetscSqrtReal(*nrm);
635: } else if (type == NORM_1) { /* max column sum */
636: PetscReal *tmp,*tmp2;
637: PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs;
638: PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);
639: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
640: v = amat->a; jj = amat->j;
641: for (i=0; i<amat->nz; i++) {
642: for (j=0; j<bs; j++) {
643: col = bs*(cstart + *jj) + j; /* column index */
644: for (row=0; row<bs; row++) {
645: tmp[col] += PetscAbsScalar(*v); v++;
646: }
647: }
648: jj++;
649: }
650: v = bmat->a; jj = bmat->j;
651: for (i=0; i<bmat->nz; i++) {
652: for (j=0; j<bs; j++) {
653: col = bs*garray[*jj] + j;
654: for (row=0; row<bs; row++) {
655: tmp[col] += PetscAbsScalar(*v); v++;
656: }
657: }
658: jj++;
659: }
660: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
661: *nrm = 0.0;
662: for (j=0; j<mat->cmap->N; j++) {
663: if (tmp2[j] > *nrm) *nrm = tmp2[j];
664: }
665: PetscFree2(tmp,tmp2);
666: } else if (type == NORM_INFINITY) { /* max row sum */
667: PetscReal *sums;
668: PetscMalloc1(bs,&sums);
669: sum = 0.0;
670: for (j=0; j<amat->mbs; j++) {
671: for (row=0; row<bs; row++) sums[row] = 0.0;
672: v = amat->a + bs2*amat->i[j];
673: nz = amat->i[j+1]-amat->i[j];
674: for (i=0; i<nz; i++) {
675: for (col=0; col<bs; col++) {
676: for (row=0; row<bs; row++) {
677: sums[row] += PetscAbsScalar(*v); v++;
678: }
679: }
680: }
681: v = bmat->a + bs2*bmat->i[j];
682: nz = bmat->i[j+1]-bmat->i[j];
683: for (i=0; i<nz; i++) {
684: for (col=0; col<bs; col++) {
685: for (row=0; row<bs; row++) {
686: sums[row] += PetscAbsScalar(*v); v++;
687: }
688: }
689: }
690: for (row=0; row<bs; row++) {
691: if (sums[row] > sum) sum = sums[row];
692: }
693: }
694: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
695: PetscFree(sums);
696: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697: }
698: return(0);
699: }
701: /*
702: Creates the hash table, and sets the table
703: This table is created only once.
704: If new entried need to be added to the matrix
705: then the hash table has to be destroyed and
706: recreated.
707: */
710: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711: {
712: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
713: Mat A = baij->A,B=baij->B;
714: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715: PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
717: PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718: PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719: PetscInt *HT,key;
720: MatScalar **HD;
721: PetscReal tmp;
722: #if defined(PETSC_USE_INFO)
723: PetscInt ct=0,max=0;
724: #endif
727: if (baij->ht) return(0);
729: baij->ht_size = (PetscInt)(factor*nz);
730: ht_size = baij->ht_size;
732: /* Allocate Memory for Hash Table */
733: PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);
734: HD = baij->hd;
735: HT = baij->ht;
737: /* Loop Over A */
738: for (i=0; i<a->mbs; i++) {
739: for (j=ai[i]; j<ai[i+1]; j++) {
740: row = i+rstart;
741: col = aj[j]+cstart;
743: key = row*Nbs + col + 1;
744: h1 = HASH(ht_size,key,tmp);
745: for (k=0; k<ht_size; k++) {
746: if (!HT[(h1+k)%ht_size]) {
747: HT[(h1+k)%ht_size] = key;
748: HD[(h1+k)%ht_size] = a->a + j*bs2;
749: break;
750: #if defined(PETSC_USE_INFO)
751: } else {
752: ct++;
753: #endif
754: }
755: }
756: #if defined(PETSC_USE_INFO)
757: if (k> max) max = k;
758: #endif
759: }
760: }
761: /* Loop Over B */
762: for (i=0; i<b->mbs; i++) {
763: for (j=bi[i]; j<bi[i+1]; j++) {
764: row = i+rstart;
765: col = garray[bj[j]];
766: key = row*Nbs + col + 1;
767: h1 = HASH(ht_size,key,tmp);
768: for (k=0; k<ht_size; k++) {
769: if (!HT[(h1+k)%ht_size]) {
770: HT[(h1+k)%ht_size] = key;
771: HD[(h1+k)%ht_size] = b->a + j*bs2;
772: break;
773: #if defined(PETSC_USE_INFO)
774: } else {
775: ct++;
776: #endif
777: }
778: }
779: #if defined(PETSC_USE_INFO)
780: if (k> max) max = k;
781: #endif
782: }
783: }
785: /* Print Summary */
786: #if defined(PETSC_USE_INFO)
787: for (i=0,j=0; i<ht_size; i++) {
788: if (HT[i]) j++;
789: }
790: PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
791: #endif
792: return(0);
793: }
797: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798: {
799: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
801: PetscInt nstash,reallocs;
802: InsertMode addv;
805: if (baij->donotstash || mat->nooffprocentries) return(0);
807: /* make sure all processors are either in INSERTMODE or ADDMODE */
808: MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
809: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
810: mat->insertmode = addv; /* in case this processor had no cache */
812: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
813: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
814: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
815: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
816: MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
817: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
818: return(0);
819: }
823: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824: {
825: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
826: Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data;
828: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
829: PetscInt *row,*col;
830: PetscBool r1,r2,r3,other_disassembled;
831: MatScalar *val;
832: InsertMode addv = mat->insertmode;
833: PetscMPIInt n;
836: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
837: if (!baij->donotstash && !mat->nooffprocentries) {
838: while (1) {
839: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
840: if (!flg) break;
842: for (i=0; i<n;) {
843: /* Now identify the consecutive vals belonging to the same row */
844: for (j=i,rstart=row[j]; j<n; j++) {
845: if (row[j] != rstart) break;
846: }
847: if (j < n) ncols = j-i;
848: else ncols = n-i;
849: /* Now assemble all these values with a single function call */
850: MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
851: i = j;
852: }
853: }
854: MatStashScatterEnd_Private(&mat->stash);
855: /* Now process the block-stash. Since the values are stashed column-oriented,
856: set the roworiented flag to column oriented, and after MatSetValues()
857: restore the original flags */
858: r1 = baij->roworiented;
859: r2 = a->roworiented;
860: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
862: baij->roworiented = PETSC_FALSE;
863: a->roworiented = PETSC_FALSE;
865: (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
866: while (1) {
867: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
868: if (!flg) break;
870: for (i=0; i<n;) {
871: /* Now identify the consecutive vals belonging to the same row */
872: for (j=i,rstart=row[j]; j<n; j++) {
873: if (row[j] != rstart) break;
874: }
875: if (j < n) ncols = j-i;
876: else ncols = n-i;
877: MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
878: i = j;
879: }
880: }
881: MatStashScatterEnd_Private(&mat->bstash);
883: baij->roworiented = r1;
884: a->roworiented = r2;
886: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
887: }
889: MatAssemblyBegin(baij->A,mode);
890: MatAssemblyEnd(baij->A,mode);
892: /* determine if any processor has disassembled, if so we must
893: also disassemble ourselfs, in order that we may reassemble. */
894: /*
895: if nonzero structure of submatrix B cannot change then we know that
896: no processor disassembled thus we can skip this stuff
897: */
898: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
899: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
900: if (mat->was_assembled && !other_disassembled) {
901: MatDisAssemble_MPIBAIJ(mat);
902: }
903: }
905: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
906: MatSetUpMultiply_MPIBAIJ(mat);
907: }
908: MatAssemblyBegin(baij->B,mode);
909: MatAssemblyEnd(baij->B,mode);
911: #if defined(PETSC_USE_INFO)
912: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
913: PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
915: baij->ht_total_ct = 0;
916: baij->ht_insert_ct = 0;
917: }
918: #endif
919: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
920: MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
922: mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
923: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
924: }
926: PetscFree2(baij->rowvalues,baij->rowindices);
928: baij->rowvalues = 0;
929: return(0);
930: }
932: #include <petscdraw.h>
935: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
936: {
937: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
938: PetscErrorCode ierr;
939: PetscMPIInt size = baij->size,rank = baij->rank;
940: PetscInt bs = mat->rmap->bs;
941: PetscBool iascii,isdraw;
942: PetscViewer sviewer;
943: PetscViewerFormat format;
946: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
947: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
948: if (iascii) {
949: PetscViewerGetFormat(viewer,&format);
950: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
951: MatInfo info;
952: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
953: MatGetInfo(mat,MAT_LOCAL,&info);
954: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
955: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
956: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);
957: MatGetInfo(baij->A,MAT_LOCAL,&info);
958: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
959: MatGetInfo(baij->B,MAT_LOCAL,&info);
960: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
961: PetscViewerFlush(viewer);
962: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
963: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
964: VecScatterView(baij->Mvctx,viewer);
965: return(0);
966: } else if (format == PETSC_VIEWER_ASCII_INFO) {
967: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
968: return(0);
969: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
970: return(0);
971: }
972: }
974: if (isdraw) {
975: PetscDraw draw;
976: PetscBool isnull;
977: PetscViewerDrawGetDraw(viewer,0,&draw);
978: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
979: }
981: if (size == 1) {
982: PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);
983: MatView(baij->A,viewer);
984: } else {
985: /* assemble the entire matrix onto first processor. */
986: Mat A;
987: Mat_SeqBAIJ *Aloc;
988: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
989: MatScalar *a;
991: /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
992: /* Perhaps this should be the type of mat? */
993: MatCreate(PetscObjectComm((PetscObject)mat),&A);
994: if (!rank) {
995: MatSetSizes(A,M,N,M,N);
996: } else {
997: MatSetSizes(A,0,0,M,N);
998: }
999: MatSetType(A,MATMPIBAIJ);
1000: MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);
1001: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1002: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
1004: /* copy over the A part */
1005: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1006: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1007: PetscMalloc1(bs,&rvals);
1009: for (i=0; i<mbs; i++) {
1010: rvals[0] = bs*(baij->rstartbs + i);
1011: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1012: for (j=ai[i]; j<ai[i+1]; j++) {
1013: col = (baij->cstartbs+aj[j])*bs;
1014: for (k=0; k<bs; k++) {
1015: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1016: col++; a += bs;
1017: }
1018: }
1019: }
1020: /* copy over the B part */
1021: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1022: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023: for (i=0; i<mbs; i++) {
1024: rvals[0] = bs*(baij->rstartbs + i);
1025: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1026: for (j=ai[i]; j<ai[i+1]; j++) {
1027: col = baij->garray[aj[j]]*bs;
1028: for (k=0; k<bs; k++) {
1029: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1030: col++; a += bs;
1031: }
1032: }
1033: }
1034: PetscFree(rvals);
1035: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1036: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1037: /*
1038: Everyone has to call to draw the matrix since the graphics waits are
1039: synchronized across all processors that share the PetscDraw object
1040: */
1041: PetscViewerGetSingleton(viewer,&sviewer);
1042: if (!rank) {
1043: PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);
1044: /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1045: PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1046: MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1047: }
1048: PetscViewerRestoreSingleton(viewer,&sviewer);
1049: MatDestroy(&A);
1050: }
1051: return(0);
1052: }
1056: static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1057: {
1058: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data;
1059: Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data;
1060: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data;
1062: PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1063: PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1064: int fd;
1065: PetscScalar *column_values;
1066: FILE *file;
1067: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1068: PetscInt message_count,flowcontrolcount;
1071: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1072: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
1073: nz = bs2*(A->nz + B->nz);
1074: rlen = mat->rmap->n;
1075: if (!rank) {
1076: header[0] = MAT_FILE_CLASSID;
1077: header[1] = mat->rmap->N;
1078: header[2] = mat->cmap->N;
1080: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1081: PetscViewerBinaryGetDescriptor(viewer,&fd);
1082: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1083: /* get largest number of rows any processor has */
1084: range = mat->rmap->range;
1085: for (i=1; i<size; i++) {
1086: rlen = PetscMax(rlen,range[i+1] - range[i]);
1087: }
1088: } else {
1089: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1090: }
1092: PetscMalloc1((rlen/bs),&crow_lens);
1093: /* compute lengths of each row */
1094: for (i=0; i<a->mbs; i++) {
1095: crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1096: }
1097: /* store the row lengths to the file */
1098: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1099: if (!rank) {
1100: MPI_Status status;
1101: PetscMalloc1(rlen,&row_lens);
1102: rlen = (range[1] - range[0])/bs;
1103: for (i=0; i<rlen; i++) {
1104: for (j=0; j<bs; j++) {
1105: row_lens[i*bs+j] = bs*crow_lens[i];
1106: }
1107: }
1108: PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1109: for (i=1; i<size; i++) {
1110: rlen = (range[i+1] - range[i])/bs;
1111: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1112: MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1113: for (k=0; k<rlen; k++) {
1114: for (j=0; j<bs; j++) {
1115: row_lens[k*bs+j] = bs*crow_lens[k];
1116: }
1117: }
1118: PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1119: }
1120: PetscViewerFlowControlEndMaster(viewer,&message_count);
1121: PetscFree(row_lens);
1122: } else {
1123: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1124: MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1125: PetscViewerFlowControlEndWorker(viewer,&message_count);
1126: }
1127: PetscFree(crow_lens);
1129: /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1130: information needed to make it for each row from a block row. This does require more communication but still not more than
1131: the communication needed for the nonzero values */
1132: nzmax = nz; /* space a largest processor needs */
1133: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1134: PetscMalloc1(nzmax,&column_indices);
1135: cnt = 0;
1136: for (i=0; i<a->mbs; i++) {
1137: pcnt = cnt;
1138: for (j=B->i[i]; j<B->i[i+1]; j++) {
1139: if ((col = garray[B->j[j]]) > cstart) break;
1140: for (l=0; l<bs; l++) {
1141: column_indices[cnt++] = bs*col+l;
1142: }
1143: }
1144: for (k=A->i[i]; k<A->i[i+1]; k++) {
1145: for (l=0; l<bs; l++) {
1146: column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1147: }
1148: }
1149: for (; j<B->i[i+1]; j++) {
1150: for (l=0; l<bs; l++) {
1151: column_indices[cnt++] = bs*garray[B->j[j]]+l;
1152: }
1153: }
1154: len = cnt - pcnt;
1155: for (k=1; k<bs; k++) {
1156: PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));
1157: cnt += len;
1158: }
1159: }
1160: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1162: /* store the columns to the file */
1163: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1164: if (!rank) {
1165: MPI_Status status;
1166: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1167: for (i=1; i<size; i++) {
1168: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1169: MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1170: MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1171: PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);
1172: }
1173: PetscViewerFlowControlEndMaster(viewer,&message_count);
1174: } else {
1175: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1176: MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1177: MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1178: PetscViewerFlowControlEndWorker(viewer,&message_count);
1179: }
1180: PetscFree(column_indices);
1182: /* load up the numerical values */
1183: PetscMalloc1(nzmax,&column_values);
1184: cnt = 0;
1185: for (i=0; i<a->mbs; i++) {
1186: rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1187: for (j=B->i[i]; j<B->i[i+1]; j++) {
1188: if (garray[B->j[j]] > cstart) break;
1189: for (l=0; l<bs; l++) {
1190: for (ll=0; ll<bs; ll++) {
1191: column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1192: }
1193: }
1194: cnt += bs;
1195: }
1196: for (k=A->i[i]; k<A->i[i+1]; k++) {
1197: for (l=0; l<bs; l++) {
1198: for (ll=0; ll<bs; ll++) {
1199: column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1200: }
1201: }
1202: cnt += bs;
1203: }
1204: for (; j<B->i[i+1]; j++) {
1205: for (l=0; l<bs; l++) {
1206: for (ll=0; ll<bs; ll++) {
1207: column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208: }
1209: }
1210: cnt += bs;
1211: }
1212: cnt += (bs-1)*rlen;
1213: }
1214: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1216: /* store the column values to the file */
1217: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1218: if (!rank) {
1219: MPI_Status status;
1220: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1221: for (i=1; i<size; i++) {
1222: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1223: MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1224: MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);
1225: PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);
1226: }
1227: PetscViewerFlowControlEndMaster(viewer,&message_count);
1228: } else {
1229: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1230: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1231: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1232: PetscViewerFlowControlEndWorker(viewer,&message_count);
1233: }
1234: PetscFree(column_values);
1236: PetscViewerBinaryGetInfoPointer(viewer,&file);
1237: if (file) {
1238: fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1239: }
1240: return(0);
1241: }
1245: PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1246: {
1248: PetscBool iascii,isdraw,issocket,isbinary;
1251: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1252: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1253: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1254: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1255: if (iascii || isdraw || issocket) {
1256: MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1257: } else if (isbinary) {
1258: MatView_MPIBAIJ_Binary(mat,viewer);
1259: }
1260: return(0);
1261: }
1265: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1266: {
1267: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1271: #if defined(PETSC_USE_LOG)
1272: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1273: #endif
1274: MatStashDestroy_Private(&mat->stash);
1275: MatStashDestroy_Private(&mat->bstash);
1276: MatDestroy(&baij->A);
1277: MatDestroy(&baij->B);
1278: #if defined(PETSC_USE_CTABLE)
1279: PetscTableDestroy(&baij->colmap);
1280: #else
1281: PetscFree(baij->colmap);
1282: #endif
1283: PetscFree(baij->garray);
1284: VecDestroy(&baij->lvec);
1285: VecScatterDestroy(&baij->Mvctx);
1286: PetscFree2(baij->rowvalues,baij->rowindices);
1287: PetscFree(baij->barray);
1288: PetscFree2(baij->hd,baij->ht);
1289: PetscFree(baij->rangebs);
1290: PetscFree(mat->data);
1292: PetscObjectChangeTypeName((PetscObject)mat,0);
1293: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1294: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1295: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
1296: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);
1297: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);
1298: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1299: PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);
1300: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);
1301: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);
1302: return(0);
1303: }
1307: PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308: {
1309: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1311: PetscInt nt;
1314: VecGetLocalSize(xx,&nt);
1315: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316: VecGetLocalSize(yy,&nt);
1317: if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1319: (*a->A->ops->mult)(a->A,xx,yy);
1320: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1321: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1322: return(0);
1323: }
1327: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328: {
1329: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1333: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1334: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1335: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1336: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1337: return(0);
1338: }
1342: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343: {
1344: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1346: PetscBool merged;
1349: VecScatterGetMerged(a->Mvctx,&merged);
1350: /* do nondiagonal part */
1351: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1352: if (!merged) {
1353: /* send it on its way */
1354: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1355: /* do local part */
1356: (*a->A->ops->multtranspose)(a->A,xx,yy);
1357: /* receive remote parts: note this assumes the values are not actually */
1358: /* inserted in yy until the next line */
1359: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1360: } else {
1361: /* do local part */
1362: (*a->A->ops->multtranspose)(a->A,xx,yy);
1363: /* send it on its way */
1364: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1365: /* values actually were received in the Begin() but we need to call this nop */
1366: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1367: }
1368: return(0);
1369: }
1373: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374: {
1375: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1379: /* do nondiagonal part */
1380: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1381: /* send it on its way */
1382: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1383: /* do local part */
1384: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1385: /* receive remote parts: note this assumes the values are not actually */
1386: /* inserted in yy until the next line, which is true for my implementation*/
1387: /* but is not perhaps always true. */
1388: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1389: return(0);
1390: }
1392: /*
1393: This only works correctly for square matrices where the subblock A->A is the
1394: diagonal block
1395: */
1398: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399: {
1400: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1404: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405: MatGetDiagonal(a->A,v);
1406: return(0);
1407: }
1411: PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412: {
1413: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1417: MatScale(a->A,aa);
1418: MatScale(a->B,aa);
1419: return(0);
1420: }
1424: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425: {
1426: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1427: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1429: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431: PetscInt *cmap,*idx_p,cstart = mat->cstartbs;
1434: if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436: mat->getrowactive = PETSC_TRUE;
1438: if (!mat->rowvalues && (idx || v)) {
1439: /*
1440: allocate enough space to hold information from the longest row.
1441: */
1442: Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443: PetscInt max = 1,mbs = mat->mbs,tmp;
1444: for (i=0; i<mbs; i++) {
1445: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446: if (max < tmp) max = tmp;
1447: }
1448: PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);
1449: }
1450: lrow = row - brstart;
1452: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453: if (!v) {pvA = 0; pvB = 0;}
1454: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1456: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1457: nztot = nzA + nzB;
1459: cmap = mat->garray;
1460: if (v || idx) {
1461: if (nztot) {
1462: /* Sort by increasing column numbers, assuming A and B already sorted */
1463: PetscInt imark = -1;
1464: if (v) {
1465: *v = v_p = mat->rowvalues;
1466: for (i=0; i<nzB; i++) {
1467: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1468: else break;
1469: }
1470: imark = i;
1471: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1472: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1473: }
1474: if (idx) {
1475: *idx = idx_p = mat->rowindices;
1476: if (imark > -1) {
1477: for (i=0; i<imark; i++) {
1478: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479: }
1480: } else {
1481: for (i=0; i<nzB; i++) {
1482: if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1483: else break;
1484: }
1485: imark = i;
1486: }
1487: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1488: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1489: }
1490: } else {
1491: if (idx) *idx = 0;
1492: if (v) *v = 0;
1493: }
1494: }
1495: *nz = nztot;
1496: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1497: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1498: return(0);
1499: }
1503: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1504: {
1505: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1508: if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1509: baij->getrowactive = PETSC_FALSE;
1510: return(0);
1511: }
1515: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1516: {
1517: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1521: MatZeroEntries(l->A);
1522: MatZeroEntries(l->B);
1523: return(0);
1524: }
1528: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1529: {
1530: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1531: Mat A = a->A,B = a->B;
1533: PetscReal isend[5],irecv[5];
1536: info->block_size = (PetscReal)matin->rmap->bs;
1538: MatGetInfo(A,MAT_LOCAL,info);
1540: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1541: isend[3] = info->memory; isend[4] = info->mallocs;
1543: MatGetInfo(B,MAT_LOCAL,info);
1545: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1546: isend[3] += info->memory; isend[4] += info->mallocs;
1548: if (flag == MAT_LOCAL) {
1549: info->nz_used = isend[0];
1550: info->nz_allocated = isend[1];
1551: info->nz_unneeded = isend[2];
1552: info->memory = isend[3];
1553: info->mallocs = isend[4];
1554: } else if (flag == MAT_GLOBAL_MAX) {
1555: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1557: info->nz_used = irecv[0];
1558: info->nz_allocated = irecv[1];
1559: info->nz_unneeded = irecv[2];
1560: info->memory = irecv[3];
1561: info->mallocs = irecv[4];
1562: } else if (flag == MAT_GLOBAL_SUM) {
1563: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1565: info->nz_used = irecv[0];
1566: info->nz_allocated = irecv[1];
1567: info->nz_unneeded = irecv[2];
1568: info->memory = irecv[3];
1569: info->mallocs = irecv[4];
1570: } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1571: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1572: info->fill_ratio_needed = 0;
1573: info->factor_mallocs = 0;
1574: return(0);
1575: }
1579: PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1580: {
1581: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1585: switch (op) {
1586: case MAT_NEW_NONZERO_LOCATIONS:
1587: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1588: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1589: case MAT_KEEP_NONZERO_PATTERN:
1590: case MAT_NEW_NONZERO_LOCATION_ERR:
1591: MatSetOption(a->A,op,flg);
1592: MatSetOption(a->B,op,flg);
1593: break;
1594: case MAT_ROW_ORIENTED:
1595: a->roworiented = flg;
1597: MatSetOption(a->A,op,flg);
1598: MatSetOption(a->B,op,flg);
1599: break;
1600: case MAT_NEW_DIAGONALS:
1601: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1602: break;
1603: case MAT_IGNORE_OFF_PROC_ENTRIES:
1604: a->donotstash = flg;
1605: break;
1606: case MAT_USE_HASH_TABLE:
1607: a->ht_flag = flg;
1608: break;
1609: case MAT_SYMMETRIC:
1610: case MAT_STRUCTURALLY_SYMMETRIC:
1611: case MAT_HERMITIAN:
1612: case MAT_SYMMETRY_ETERNAL:
1613: MatSetOption(a->A,op,flg);
1614: break;
1615: default:
1616: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1617: }
1618: return(0);
1619: }
1623: PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1624: {
1625: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1626: Mat_SeqBAIJ *Aloc;
1627: Mat B;
1629: PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1630: PetscInt bs=A->rmap->bs,mbs=baij->mbs;
1631: MatScalar *a;
1634: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1635: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1636: MatCreate(PetscObjectComm((PetscObject)A),&B);
1637: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1638: MatSetType(B,((PetscObject)A)->type_name);
1639: /* Do not know preallocation information, but must set block size */
1640: MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);
1641: } else {
1642: B = *matout;
1643: }
1645: /* copy over the A part */
1646: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1647: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1648: PetscMalloc1(bs,&rvals);
1650: for (i=0; i<mbs; i++) {
1651: rvals[0] = bs*(baij->rstartbs + i);
1652: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1653: for (j=ai[i]; j<ai[i+1]; j++) {
1654: col = (baij->cstartbs+aj[j])*bs;
1655: for (k=0; k<bs; k++) {
1656: MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1658: col++; a += bs;
1659: }
1660: }
1661: }
1662: /* copy over the B part */
1663: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1664: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1665: for (i=0; i<mbs; i++) {
1666: rvals[0] = bs*(baij->rstartbs + i);
1667: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1668: for (j=ai[i]; j<ai[i+1]; j++) {
1669: col = baij->garray[aj[j]]*bs;
1670: for (k=0; k<bs; k++) {
1671: MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1672: col++;
1673: a += bs;
1674: }
1675: }
1676: }
1677: PetscFree(rvals);
1678: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1679: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1681: if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1682: else {
1683: MatHeaderMerge(A,B);
1684: }
1685: return(0);
1686: }
1690: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1691: {
1692: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1693: Mat a = baij->A,b = baij->B;
1695: PetscInt s1,s2,s3;
1698: MatGetLocalSize(mat,&s2,&s3);
1699: if (rr) {
1700: VecGetLocalSize(rr,&s1);
1701: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1702: /* Overlap communication with computation. */
1703: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1704: }
1705: if (ll) {
1706: VecGetLocalSize(ll,&s1);
1707: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1708: (*b->ops->diagonalscale)(b,ll,NULL);
1709: }
1710: /* scale the diagonal block */
1711: (*a->ops->diagonalscale)(a,ll,rr);
1713: if (rr) {
1714: /* Do a scatter end and then right scale the off-diagonal block */
1715: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1716: (*b->ops->diagonalscale)(b,NULL,baij->lvec);
1717: }
1718: return(0);
1719: }
1723: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1724: {
1725: Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1726: PetscInt *owners = A->rmap->range;
1727: PetscInt n = A->rmap->n;
1728: PetscMPIInt size = l->size;
1729: PetscSF sf;
1730: PetscInt *lrows;
1731: PetscSFNode *rrows;
1732: PetscInt lastidx = -1, r, p = 0, len = 0;
1736: /* Create SF where leaves are input rows and roots are owned rows */
1737: PetscMalloc1(n, &lrows);
1738: for (r = 0; r < n; ++r) lrows[r] = -1;
1739: PetscMalloc1(N, &rrows);
1740: for (r = 0; r < N; ++r) {
1741: const PetscInt idx = rows[r];
1742: PetscBool found = PETSC_FALSE;
1743: /* Trick for efficient searching for sorted rows */
1744: if (lastidx > idx) p = 0;
1745: lastidx = idx;
1746: for (; p < size; ++p) {
1747: if (idx >= owners[p] && idx < owners[p+1]) {
1748: rrows[r].rank = p;
1749: rrows[r].index = rows[r] - owners[p];
1750: found = PETSC_TRUE;
1751: break;
1752: }
1753: }
1754: if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1755: }
1756: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
1757: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
1758: /* Collect flags for rows to be zeroed */
1759: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1760: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1761: PetscSFDestroy(&sf);
1762: /* Compress and put in row numbers */
1763: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1764: /* fix right hand side if needed */
1765: if (x && b) {
1766: const PetscScalar *xx;
1767: PetscScalar *bb;
1769: VecGetArrayRead(x,&xx);
1770: VecGetArray(b,&bb);
1771: for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1772: VecRestoreArrayRead(x,&xx);
1773: VecRestoreArray(b,&bb);
1774: }
1776: /* actually zap the local rows */
1777: /*
1778: Zero the required rows. If the "diagonal block" of the matrix
1779: is square and the user wishes to set the diagonal we use separate
1780: code so that MatSetValues() is not called for each diagonal allocating
1781: new memory, thus calling lots of mallocs and slowing things down.
1783: */
1784: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1785: MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);
1786: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1787: MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);
1788: } else if (diag != 0.0) {
1789: MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);
1790: if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1791: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1792: for (r = 0; r < len; ++r) {
1793: const PetscInt row = lrows[r] + A->rmap->rstart;
1794: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
1795: }
1796: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1797: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1798: } else {
1799: MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);
1800: }
1801: PetscFree(lrows);
1802: return(0);
1803: }
1807: PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1808: {
1809: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1810: PetscErrorCode ierr;
1811: PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1;
1812: PetscInt i,j,k,r,p = 0,len = 0,row,col,count;
1813: PetscInt *lrows,*owners = A->rmap->range;
1814: PetscSFNode *rrows;
1815: PetscSF sf;
1816: const PetscScalar *xx;
1817: PetscScalar *bb,*mask;
1818: Vec xmask,lmask;
1819: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data;
1820: PetscInt bs = A->rmap->bs, bs2 = baij->bs2;
1821: PetscScalar *aa;
1822: #if defined(PETSC_DEBUG)
1823: PetscBool found = PETSC_FALSE;
1824: #endif
1827: /* Create SF where leaves are input rows and roots are owned rows */
1828: PetscMalloc1(n, &lrows);
1829: for (r = 0; r < n; ++r) lrows[r] = -1;
1830: PetscMalloc1(N, &rrows);
1831: for (r = 0; r < N; ++r) {
1832: const PetscInt idx = rows[r];
1833: PetscBool found = PETSC_FALSE;
1834: /* Trick for efficient searching for sorted rows */
1835: if (lastidx > idx) p = 0;
1836: lastidx = idx;
1837: for (; p < size; ++p) {
1838: if (idx >= owners[p] && idx < owners[p+1]) {
1839: rrows[r].rank = p;
1840: rrows[r].index = rows[r] - owners[p];
1841: found = PETSC_TRUE;
1842: break;
1843: }
1844: }
1845: if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1846: }
1847: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
1848: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
1849: /* Collect flags for rows to be zeroed */
1850: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1851: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1852: PetscSFDestroy(&sf);
1853: /* Compress and put in row numbers */
1854: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1855: /* zero diagonal part of matrix */
1856: MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
1857: /* handle off diagonal part of matrix */
1858: MatGetVecs(A,&xmask,NULL);
1859: VecDuplicate(l->lvec,&lmask);
1860: VecGetArray(xmask,&bb);
1861: for (i=0; i<len; i++) bb[lrows[i]] = 1;
1862: VecRestoreArray(xmask,&bb);
1863: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1864: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1865: VecDestroy(&xmask);
1866: if (x) {
1867: VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
1868: VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
1869: VecGetArrayRead(l->lvec,&xx);
1870: VecGetArray(b,&bb);
1871: }
1872: VecGetArray(lmask,&mask);
1873: /* remove zeroed rows of off diagonal matrix */
1874: for (i = 0; i < len; ++i) {
1875: row = lrows[i];
1876: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1877: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1878: for (k = 0; k < count; ++k) {
1879: aa[0] = 0.0;
1880: aa += bs;
1881: }
1882: }
1883: /* loop over all elements of off process part of matrix zeroing removed columns*/
1884: for (i = 0; i < l->B->rmap->N; ++i) {
1885: row = i/bs;
1886: for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1887: for (k = 0; k < bs; ++k) {
1888: col = bs*baij->j[j] + k;
1889: if (PetscAbsScalar(mask[col])) {
1890: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1891: if (b) bb[i] -= aa[0]*xx[col];
1892: aa[0] = 0.0;
1893: }
1894: }
1895: }
1896: }
1897: if (x) {
1898: VecRestoreArray(b,&bb);
1899: VecRestoreArrayRead(l->lvec,&xx);
1900: }
1901: VecRestoreArray(lmask,&mask);
1902: VecDestroy(&lmask);
1903: PetscFree(lrows);
1904: return(0);
1905: }
1909: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1910: {
1911: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1915: MatSetUnfactored(a->A);
1916: return(0);
1917: }
1919: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1923: PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag)
1924: {
1925: Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1926: Mat a,b,c,d;
1927: PetscBool flg;
1931: a = matA->A; b = matA->B;
1932: c = matB->A; d = matB->B;
1934: MatEqual(a,c,&flg);
1935: if (flg) {
1936: MatEqual(b,d,&flg);
1937: }
1938: MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1939: return(0);
1940: }
1944: PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1945: {
1947: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1948: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
1951: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1952: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1953: MatCopy_Basic(A,B,str);
1954: } else {
1955: MatCopy(a->A,b->A,str);
1956: MatCopy(a->B,b->B,str);
1957: }
1958: return(0);
1959: }
1963: PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1964: {
1968: MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1969: return(0);
1970: }
1974: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1975: {
1977: Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1978: PetscBLASInt bnz,one=1;
1979: Mat_SeqBAIJ *x,*y;
1982: if (str == SAME_NONZERO_PATTERN) {
1983: PetscScalar alpha = a;
1984: x = (Mat_SeqBAIJ*)xx->A->data;
1985: y = (Mat_SeqBAIJ*)yy->A->data;
1986: PetscBLASIntCast(x->nz,&bnz);
1987: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1988: x = (Mat_SeqBAIJ*)xx->B->data;
1989: y = (Mat_SeqBAIJ*)yy->B->data;
1990: PetscBLASIntCast(x->nz,&bnz);
1991: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1992: } else {
1993: MatAXPY_Basic(Y,a,X,str);
1994: }
1995: return(0);
1996: }
2000: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2001: {
2002: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2006: MatRealPart(a->A);
2007: MatRealPart(a->B);
2008: return(0);
2009: }
2013: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2014: {
2015: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2019: MatImaginaryPart(a->A);
2020: MatImaginaryPart(a->B);
2021: return(0);
2022: }
2026: PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2027: {
2029: IS iscol_local;
2030: PetscInt csize;
2033: ISGetLocalSize(iscol,&csize);
2034: if (call == MAT_REUSE_MATRIX) {
2035: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
2036: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2037: } else {
2038: ISAllGather(iscol,&iscol_local);
2039: }
2040: MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
2041: if (call == MAT_INITIAL_MATRIX) {
2042: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
2043: ISDestroy(&iscol_local);
2044: }
2045: return(0);
2046: }
2047: extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2050: /*
2051: Not great since it makes two copies of the submatrix, first an SeqBAIJ
2052: in local and then by concatenating the local matrices the end result.
2053: Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2054: */
2055: PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2056: {
2058: PetscMPIInt rank,size;
2059: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2060: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2061: Mat M,Mreuse;
2062: MatScalar *vwork,*aa;
2063: MPI_Comm comm;
2064: IS isrow_new, iscol_new;
2065: PetscBool idflag,allrows, allcols;
2066: Mat_SeqBAIJ *aij;
2069: PetscObjectGetComm((PetscObject)mat,&comm);
2070: MPI_Comm_rank(comm,&rank);
2071: MPI_Comm_size(comm,&size);
2072: /* The compression and expansion should be avoided. Doesn't point
2073: out errors, might change the indices, hence buggey */
2074: ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);
2075: ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);
2077: /* Check for special case: each processor gets entire matrix columns */
2078: ISIdentity(iscol,&idflag);
2079: ISGetLocalSize(iscol,&ncol);
2080: if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2081: else allcols = PETSC_FALSE;
2083: ISIdentity(isrow,&idflag);
2084: ISGetLocalSize(isrow,&nrow);
2085: if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2086: else allrows = PETSC_FALSE;
2088: if (call == MAT_REUSE_MATRIX) {
2089: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
2090: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2091: MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);
2092: } else {
2093: MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);
2094: }
2095: ISDestroy(&isrow_new);
2096: ISDestroy(&iscol_new);
2097: /*
2098: m - number of local rows
2099: n - number of columns (same on all processors)
2100: rstart - first row in new global matrix generated
2101: */
2102: MatGetBlockSize(mat,&bs);
2103: MatGetSize(Mreuse,&m,&n);
2104: m = m/bs;
2105: n = n/bs;
2107: if (call == MAT_INITIAL_MATRIX) {
2108: aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2109: ii = aij->i;
2110: jj = aij->j;
2112: /*
2113: Determine the number of non-zeros in the diagonal and off-diagonal
2114: portions of the matrix in order to do correct preallocation
2115: */
2117: /* first get start and end of "diagonal" columns */
2118: if (csize == PETSC_DECIDE) {
2119: ISGetSize(isrow,&mglobal);
2120: if (mglobal == n*bs) { /* square matrix */
2121: nlocal = m;
2122: } else {
2123: nlocal = n/size + ((n % size) > rank);
2124: }
2125: } else {
2126: nlocal = csize/bs;
2127: }
2128: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2129: rstart = rend - nlocal;
2130: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2132: /* next, compute all the lengths */
2133: PetscMalloc2(m+1,&dlens,m+1,&olens);
2134: for (i=0; i<m; i++) {
2135: jend = ii[i+1] - ii[i];
2136: olen = 0;
2137: dlen = 0;
2138: for (j=0; j<jend; j++) {
2139: if (*jj < rstart || *jj >= rend) olen++;
2140: else dlen++;
2141: jj++;
2142: }
2143: olens[i] = olen;
2144: dlens[i] = dlen;
2145: }
2146: MatCreate(comm,&M);
2147: MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);
2148: MatSetType(M,((PetscObject)mat)->type_name);
2149: MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);
2150: PetscFree2(dlens,olens);
2151: } else {
2152: PetscInt ml,nl;
2154: M = *newmat;
2155: MatGetLocalSize(M,&ml,&nl);
2156: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2157: MatZeroEntries(M);
2158: /*
2159: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2160: rather than the slower MatSetValues().
2161: */
2162: M->was_assembled = PETSC_TRUE;
2163: M->assembled = PETSC_FALSE;
2164: }
2165: MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);
2166: MatGetOwnershipRange(M,&rstart,&rend);
2167: aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2168: ii = aij->i;
2169: jj = aij->j;
2170: aa = aij->a;
2171: for (i=0; i<m; i++) {
2172: row = rstart/bs + i;
2173: nz = ii[i+1] - ii[i];
2174: cwork = jj; jj += nz;
2175: vwork = aa; aa += nz*bs*bs;
2176: MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2177: }
2179: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2180: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2181: *newmat = M;
2183: /* save submatrix used in processor for next request */
2184: if (call == MAT_INITIAL_MATRIX) {
2185: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2186: PetscObjectDereference((PetscObject)Mreuse);
2187: }
2188: return(0);
2189: }
2193: PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2194: {
2195: MPI_Comm comm,pcomm;
2196: PetscInt clocal_size,nrows;
2197: const PetscInt *rows;
2198: PetscMPIInt size;
2199: IS crowp,lcolp;
2203: PetscObjectGetComm((PetscObject)A,&comm);
2204: /* make a collective version of 'rowp' */
2205: PetscObjectGetComm((PetscObject)rowp,&pcomm);
2206: if (pcomm==comm) {
2207: crowp = rowp;
2208: } else {
2209: ISGetSize(rowp,&nrows);
2210: ISGetIndices(rowp,&rows);
2211: ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);
2212: ISRestoreIndices(rowp,&rows);
2213: }
2214: ISSetPermutation(crowp);
2215: /* make a local version of 'colp' */
2216: PetscObjectGetComm((PetscObject)colp,&pcomm);
2217: MPI_Comm_size(pcomm,&size);
2218: if (size==1) {
2219: lcolp = colp;
2220: } else {
2221: ISAllGather(colp,&lcolp);
2222: }
2223: ISSetPermutation(lcolp);
2224: /* now we just get the submatrix */
2225: MatGetLocalSize(A,PETSC_NULL,&clocal_size);
2226: MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);
2227: /* clean up */
2228: if (pcomm!=comm) {
2229: ISDestroy(&crowp);
2230: }
2231: if (size>1) {
2232: ISDestroy(&lcolp);
2233: }
2234: return(0);
2235: }
2239: PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2240: {
2241: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2242: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
2245: if (nghosts) *nghosts = B->nbs;
2246: if (ghosts) *ghosts = baij->garray;
2247: return(0);
2248: }
2252: PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2253: {
2254: Mat B;
2255: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2256: Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2257: Mat_SeqAIJ *b;
2259: PetscMPIInt size,rank,*recvcounts = 0,*displs = 0;
2260: PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2261: PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2264: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
2265: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
2267: /* ----------------------------------------------------------------
2268: Tell every processor the number of nonzeros per row
2269: */
2270: PetscMalloc1((A->rmap->N/bs),&lens);
2271: for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2272: lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2273: }
2274: sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2275: PetscMalloc1(2*size,&recvcounts);
2276: displs = recvcounts + size;
2277: for (i=0; i<size; i++) {
2278: recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2279: displs[i] = A->rmap->range[i]/bs;
2280: }
2281: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2282: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2283: #else
2284: MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2285: #endif
2286: /* ---------------------------------------------------------------
2287: Create the sequential matrix of the same type as the local block diagonal
2288: */
2289: MatCreate(PETSC_COMM_SELF,&B);
2290: MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);
2291: MatSetType(B,MATSEQAIJ);
2292: MatSeqAIJSetPreallocation(B,0,lens);
2293: b = (Mat_SeqAIJ*)B->data;
2295: /*--------------------------------------------------------------------
2296: Copy my part of matrix column indices over
2297: */
2298: sendcount = ad->nz + bd->nz;
2299: jsendbuf = b->j + b->i[rstarts[rank]/bs];
2300: a_jsendbuf = ad->j;
2301: b_jsendbuf = bd->j;
2302: n = A->rmap->rend/bs - A->rmap->rstart/bs;
2303: cnt = 0;
2304: for (i=0; i<n; i++) {
2306: /* put in lower diagonal portion */
2307: m = bd->i[i+1] - bd->i[i];
2308: while (m > 0) {
2309: /* is it above diagonal (in bd (compressed) numbering) */
2310: if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2311: jsendbuf[cnt++] = garray[*b_jsendbuf++];
2312: m--;
2313: }
2315: /* put in diagonal portion */
2316: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2317: jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2318: }
2320: /* put in upper diagonal portion */
2321: while (m-- > 0) {
2322: jsendbuf[cnt++] = garray[*b_jsendbuf++];
2323: }
2324: }
2325: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2327: /*--------------------------------------------------------------------
2328: Gather all column indices to all processors
2329: */
2330: for (i=0; i<size; i++) {
2331: recvcounts[i] = 0;
2332: for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2333: recvcounts[i] += lens[j];
2334: }
2335: }
2336: displs[0] = 0;
2337: for (i=1; i<size; i++) {
2338: displs[i] = displs[i-1] + recvcounts[i-1];
2339: }
2340: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2341: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2342: #else
2343: MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2344: #endif
2345: /*--------------------------------------------------------------------
2346: Assemble the matrix into useable form (note numerical values not yet set)
2347: */
2348: /* set the b->ilen (length of each row) values */
2349: PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));
2350: /* set the b->i indices */
2351: b->i[0] = 0;
2352: for (i=1; i<=A->rmap->N/bs; i++) {
2353: b->i[i] = b->i[i-1] + lens[i-1];
2354: }
2355: PetscFree(lens);
2356: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2357: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2358: PetscFree(recvcounts);
2360: if (A->symmetric) {
2361: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
2362: } else if (A->hermitian) {
2363: MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
2364: } else if (A->structurally_symmetric) {
2365: MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
2366: }
2367: *newmat = B;
2368: return(0);
2369: }
2373: PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2374: {
2375: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
2377: Vec bb1 = 0;
2380: if (flag == SOR_APPLY_UPPER) {
2381: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2382: return(0);
2383: }
2385: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2386: VecDuplicate(bb,&bb1);
2387: }
2389: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2390: if (flag & SOR_ZERO_INITIAL_GUESS) {
2391: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2392: its--;
2393: }
2395: while (its--) {
2396: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2397: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2399: /* update rhs: bb1 = bb - B*x */
2400: VecScale(mat->lvec,-1.0);
2401: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2403: /* local sweep */
2404: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
2405: }
2406: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2407: if (flag & SOR_ZERO_INITIAL_GUESS) {
2408: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2409: its--;
2410: }
2411: while (its--) {
2412: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2413: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2415: /* update rhs: bb1 = bb - B*x */
2416: VecScale(mat->lvec,-1.0);
2417: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2419: /* local sweep */
2420: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
2421: }
2422: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2423: if (flag & SOR_ZERO_INITIAL_GUESS) {
2424: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2425: its--;
2426: }
2427: while (its--) {
2428: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2429: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2431: /* update rhs: bb1 = bb - B*x */
2432: VecScale(mat->lvec,-1.0);
2433: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2435: /* local sweep */
2436: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
2437: }
2438: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2440: VecDestroy(&bb1);
2441: return(0);
2442: }
2446: PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2447: {
2449: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data;
2450: PetscInt N,i,*garray = aij->garray;
2451: PetscInt ib,jb,bs = A->rmap->bs;
2452: Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2453: MatScalar *a_val = a_aij->a;
2454: Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2455: MatScalar *b_val = b_aij->a;
2456: PetscReal *work;
2459: MatGetSize(A,NULL,&N);
2460: PetscCalloc1(N,&work);
2461: if (type == NORM_2) {
2462: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2463: for (jb=0; jb<bs; jb++) {
2464: for (ib=0; ib<bs; ib++) {
2465: work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2466: a_val++;
2467: }
2468: }
2469: }
2470: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2471: for (jb=0; jb<bs; jb++) {
2472: for (ib=0; ib<bs; ib++) {
2473: work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2474: b_val++;
2475: }
2476: }
2477: }
2478: } else if (type == NORM_1) {
2479: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2480: for (jb=0; jb<bs; jb++) {
2481: for (ib=0; ib<bs; ib++) {
2482: work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2483: a_val++;
2484: }
2485: }
2486: }
2487: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2488: for (jb=0; jb<bs; jb++) {
2489: for (ib=0; ib<bs; ib++) {
2490: work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2491: b_val++;
2492: }
2493: }
2494: }
2495: } else if (type == NORM_INFINITY) {
2496: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2497: for (jb=0; jb<bs; jb++) {
2498: for (ib=0; ib<bs; ib++) {
2499: int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2500: work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2501: a_val++;
2502: }
2503: }
2504: }
2505: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2506: for (jb=0; jb<bs; jb++) {
2507: for (ib=0; ib<bs; ib++) {
2508: int col = garray[b_aij->j[i]] * bs + jb;
2509: work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2510: b_val++;
2511: }
2512: }
2513: }
2514: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2515: if (type == NORM_INFINITY) {
2516: MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
2517: } else {
2518: MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
2519: }
2520: PetscFree(work);
2521: if (type == NORM_2) {
2522: for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2523: }
2524: return(0);
2525: }
2529: PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2530: {
2531: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data;
2535: MatInvertBlockDiagonal(a->A,values);
2536: return(0);
2537: }
2540: /* -------------------------------------------------------------------*/
2541: static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2542: MatGetRow_MPIBAIJ,
2543: MatRestoreRow_MPIBAIJ,
2544: MatMult_MPIBAIJ,
2545: /* 4*/ MatMultAdd_MPIBAIJ,
2546: MatMultTranspose_MPIBAIJ,
2547: MatMultTransposeAdd_MPIBAIJ,
2548: 0,
2549: 0,
2550: 0,
2551: /*10*/ 0,
2552: 0,
2553: 0,
2554: MatSOR_MPIBAIJ,
2555: MatTranspose_MPIBAIJ,
2556: /*15*/ MatGetInfo_MPIBAIJ,
2557: MatEqual_MPIBAIJ,
2558: MatGetDiagonal_MPIBAIJ,
2559: MatDiagonalScale_MPIBAIJ,
2560: MatNorm_MPIBAIJ,
2561: /*20*/ MatAssemblyBegin_MPIBAIJ,
2562: MatAssemblyEnd_MPIBAIJ,
2563: MatSetOption_MPIBAIJ,
2564: MatZeroEntries_MPIBAIJ,
2565: /*24*/ MatZeroRows_MPIBAIJ,
2566: 0,
2567: 0,
2568: 0,
2569: 0,
2570: /*29*/ MatSetUp_MPIBAIJ,
2571: 0,
2572: 0,
2573: 0,
2574: 0,
2575: /*34*/ MatDuplicate_MPIBAIJ,
2576: 0,
2577: 0,
2578: 0,
2579: 0,
2580: /*39*/ MatAXPY_MPIBAIJ,
2581: MatGetSubMatrices_MPIBAIJ,
2582: MatIncreaseOverlap_MPIBAIJ,
2583: MatGetValues_MPIBAIJ,
2584: MatCopy_MPIBAIJ,
2585: /*44*/ 0,
2586: MatScale_MPIBAIJ,
2587: 0,
2588: 0,
2589: MatZeroRowsColumns_MPIBAIJ,
2590: /*49*/ 0,
2591: 0,
2592: 0,
2593: 0,
2594: 0,
2595: /*54*/ MatFDColoringCreate_MPIXAIJ,
2596: 0,
2597: MatSetUnfactored_MPIBAIJ,
2598: MatPermute_MPIBAIJ,
2599: MatSetValuesBlocked_MPIBAIJ,
2600: /*59*/ MatGetSubMatrix_MPIBAIJ,
2601: MatDestroy_MPIBAIJ,
2602: MatView_MPIBAIJ,
2603: 0,
2604: 0,
2605: /*64*/ 0,
2606: 0,
2607: 0,
2608: 0,
2609: 0,
2610: /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2611: 0,
2612: 0,
2613: 0,
2614: 0,
2615: /*74*/ 0,
2616: MatFDColoringApply_BAIJ,
2617: 0,
2618: 0,
2619: 0,
2620: /*79*/ 0,
2621: 0,
2622: 0,
2623: 0,
2624: MatLoad_MPIBAIJ,
2625: /*84*/ 0,
2626: 0,
2627: 0,
2628: 0,
2629: 0,
2630: /*89*/ 0,
2631: 0,
2632: 0,
2633: 0,
2634: 0,
2635: /*94*/ 0,
2636: 0,
2637: 0,
2638: 0,
2639: 0,
2640: /*99*/ 0,
2641: 0,
2642: 0,
2643: 0,
2644: 0,
2645: /*104*/0,
2646: MatRealPart_MPIBAIJ,
2647: MatImaginaryPart_MPIBAIJ,
2648: 0,
2649: 0,
2650: /*109*/0,
2651: 0,
2652: 0,
2653: 0,
2654: 0,
2655: /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2656: 0,
2657: MatGetGhosts_MPIBAIJ,
2658: 0,
2659: 0,
2660: /*119*/0,
2661: 0,
2662: 0,
2663: 0,
2664: MatGetMultiProcBlock_MPIBAIJ,
2665: /*124*/0,
2666: MatGetColumnNorms_MPIBAIJ,
2667: MatInvertBlockDiagonal_MPIBAIJ,
2668: 0,
2669: 0,
2670: /*129*/ 0,
2671: 0,
2672: 0,
2673: 0,
2674: 0,
2675: /*134*/ 0,
2676: 0,
2677: 0,
2678: 0,
2679: 0,
2680: /*139*/ 0,
2681: 0,
2682: 0,
2683: MatFDColoringSetUp_MPIXAIJ
2684: };
2688: PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2689: {
2691: *a = ((Mat_MPIBAIJ*)A->data)->A;
2692: return(0);
2693: }
2695: PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2699: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2700: {
2701: PetscInt m,rstart,cstart,cend;
2702: PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2703: const PetscInt *JJ =0;
2704: PetscScalar *values=0;
2705: PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2709: PetscLayoutSetBlockSize(B->rmap,bs);
2710: PetscLayoutSetBlockSize(B->cmap,bs);
2711: PetscLayoutSetUp(B->rmap);
2712: PetscLayoutSetUp(B->cmap);
2713: PetscLayoutGetBlockSize(B->rmap,&bs);
2714: m = B->rmap->n/bs;
2715: rstart = B->rmap->rstart/bs;
2716: cstart = B->cmap->rstart/bs;
2717: cend = B->cmap->rend/bs;
2719: if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2720: PetscMalloc2(m,&d_nnz,m,&o_nnz);
2721: for (i=0; i<m; i++) {
2722: nz = ii[i+1] - ii[i];
2723: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2724: nz_max = PetscMax(nz_max,nz);
2725: JJ = jj + ii[i];
2726: for (j=0; j<nz; j++) {
2727: if (*JJ >= cstart) break;
2728: JJ++;
2729: }
2730: d = 0;
2731: for (; j<nz; j++) {
2732: if (*JJ++ >= cend) break;
2733: d++;
2734: }
2735: d_nnz[i] = d;
2736: o_nnz[i] = nz - d;
2737: }
2738: MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2739: PetscFree2(d_nnz,o_nnz);
2741: values = (PetscScalar*)V;
2742: if (!values) {
2743: PetscMalloc1(bs*bs*nz_max,&values);
2744: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2745: }
2746: for (i=0; i<m; i++) {
2747: PetscInt row = i + rstart;
2748: PetscInt ncols = ii[i+1] - ii[i];
2749: const PetscInt *icols = jj + ii[i];
2750: if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2751: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2752: MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2753: } else { /* block ordering does not match so we can only insert one block at a time. */
2754: PetscInt j;
2755: for (j=0; j<ncols; j++) {
2756: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2757: MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);
2758: }
2759: }
2760: }
2762: if (!V) { PetscFree(values); }
2763: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2764: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2765: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2766: return(0);
2767: }
2771: /*@C
2772: MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2773: (the default parallel PETSc format).
2775: Collective on MPI_Comm
2777: Input Parameters:
2778: + A - the matrix
2779: . bs - the block size
2780: . i - the indices into j for the start of each local row (starts with zero)
2781: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2782: - v - optional values in the matrix
2784: Level: developer
2786: Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
2787: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2788: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
2789: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2790: block column and the second index is over columns within a block.
2792: .keywords: matrix, aij, compressed row, sparse, parallel
2794: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2795: @*/
2796: PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2797: {
2804: PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2805: return(0);
2806: }
2810: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2811: {
2812: Mat_MPIBAIJ *b;
2814: PetscInt i;
2817: PetscLayoutSetBlockSize(B->rmap,bs);
2818: PetscLayoutSetBlockSize(B->cmap,bs);
2819: PetscLayoutSetUp(B->rmap);
2820: PetscLayoutSetUp(B->cmap);
2821: PetscLayoutGetBlockSize(B->rmap,&bs);
2823: if (d_nnz) {
2824: for (i=0; i<B->rmap->n/bs; i++) {
2825: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2826: }
2827: }
2828: if (o_nnz) {
2829: for (i=0; i<B->rmap->n/bs; i++) {
2830: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2831: }
2832: }
2834: b = (Mat_MPIBAIJ*)B->data;
2835: b->bs2 = bs*bs;
2836: b->mbs = B->rmap->n/bs;
2837: b->nbs = B->cmap->n/bs;
2838: b->Mbs = B->rmap->N/bs;
2839: b->Nbs = B->cmap->N/bs;
2841: for (i=0; i<=b->size; i++) {
2842: b->rangebs[i] = B->rmap->range[i]/bs;
2843: }
2844: b->rstartbs = B->rmap->rstart/bs;
2845: b->rendbs = B->rmap->rend/bs;
2846: b->cstartbs = B->cmap->rstart/bs;
2847: b->cendbs = B->cmap->rend/bs;
2849: if (!B->preallocated) {
2850: MatCreate(PETSC_COMM_SELF,&b->A);
2851: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2852: MatSetType(b->A,MATSEQBAIJ);
2853: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
2854: MatCreate(PETSC_COMM_SELF,&b->B);
2855: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
2856: MatSetType(b->B,MATSEQBAIJ);
2857: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
2858: MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);
2859: }
2861: MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2862: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2863: B->preallocated = PETSC_TRUE;
2864: return(0);
2865: }
2867: extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2868: extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2872: PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2873: {
2874: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
2876: Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2877: PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2878: const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2881: PetscMalloc1((M+1),&ii);
2882: ii[0] = 0;
2883: for (i=0; i<M; i++) {
2884: if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2885: if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2886: ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2887: /* remove one from count of matrix has diagonal */
2888: for (j=id[i]; j<id[i+1]; j++) {
2889: if (jd[j] == i) {ii[i+1]--;break;}
2890: }
2891: }
2892: PetscMalloc1(ii[M],&jj);
2893: cnt = 0;
2894: for (i=0; i<M; i++) {
2895: for (j=io[i]; j<io[i+1]; j++) {
2896: if (garray[jo[j]] > rstart) break;
2897: jj[cnt++] = garray[jo[j]];
2898: }
2899: for (k=id[i]; k<id[i+1]; k++) {
2900: if (jd[k] != i) {
2901: jj[cnt++] = rstart + jd[k];
2902: }
2903: }
2904: for (; j<io[i+1]; j++) {
2905: jj[cnt++] = garray[jo[j]];
2906: }
2907: }
2908: MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);
2909: return(0);
2910: }
2912: #include <../src/mat/impls/aij/mpi/mpiaij.h>
2914: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2918: PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2919: {
2921: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2922: Mat B;
2923: Mat_MPIAIJ *b;
2926: if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2928: MatCreate(PetscObjectComm((PetscObject)A),&B);
2929: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
2930: MatSetType(B,MATMPIAIJ);
2931: MatSeqAIJSetPreallocation(B,0,NULL);
2932: MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);
2933: b = (Mat_MPIAIJ*) B->data;
2935: MatDestroy(&b->A);
2936: MatDestroy(&b->B);
2937: MatDisAssemble_MPIBAIJ(A);
2938: MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);
2939: MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);
2940: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2941: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2942: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2943: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2944: if (reuse == MAT_REUSE_MATRIX) {
2945: MatHeaderReplace(A,B);
2946: } else {
2947: *newmat = B;
2948: }
2949: return(0);
2950: }
2952: #if defined(PETSC_HAVE_MUMPS)
2953: PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2954: #endif
2956: /*MC
2957: MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2959: Options Database Keys:
2960: + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2961: . -mat_block_size <bs> - set the blocksize used to store the matrix
2962: - -mat_use_hash_table <fact>
2964: Level: beginner
2966: .seealso: MatCreateMPIBAIJ
2967: M*/
2969: PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2973: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2974: {
2975: Mat_MPIBAIJ *b;
2977: PetscBool flg;
2980: PetscNewLog(B,&b);
2981: B->data = (void*)b;
2983: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2984: B->assembled = PETSC_FALSE;
2986: B->insertmode = NOT_SET_VALUES;
2987: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
2988: MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);
2990: /* build local table of row and column ownerships */
2991: PetscMalloc1((b->size+1),&b->rangebs);
2993: /* build cache for off array entries formed */
2994: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
2996: b->donotstash = PETSC_FALSE;
2997: b->colmap = NULL;
2998: b->garray = NULL;
2999: b->roworiented = PETSC_TRUE;
3001: /* stuff used in block assembly */
3002: b->barray = 0;
3004: /* stuff used for matrix vector multiply */
3005: b->lvec = 0;
3006: b->Mvctx = 0;
3008: /* stuff for MatGetRow() */
3009: b->rowindices = 0;
3010: b->rowvalues = 0;
3011: b->getrowactive = PETSC_FALSE;
3013: /* hash table stuff */
3014: b->ht = 0;
3015: b->hd = 0;
3016: b->ht_size = 0;
3017: b->ht_flag = PETSC_FALSE;
3018: b->ht_fact = 0;
3019: b->ht_total_ct = 0;
3020: b->ht_insert_ct = 0;
3022: /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3023: b->ijonly = PETSC_FALSE;
3025: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");
3026: PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);
3027: if (flg) {
3028: PetscReal fact = 1.39;
3029: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
3030: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);
3031: if (fact <= 1.0) fact = 1.39;
3032: MatMPIBAIJSetHashTableFactor(B,fact);
3033: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
3034: }
3035: PetscOptionsEnd();
3037: #if defined(PETSC_HAVE_MUMPS)
3038: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);
3039: #endif
3040: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);
3041: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);
3042: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);
3043: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);
3044: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);
3045: PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);
3046: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);
3047: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
3048: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);
3049: PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);
3050: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);
3051: PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);
3052: return(0);
3053: }
3055: /*MC
3056: MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3058: This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3059: and MATMPIBAIJ otherwise.
3061: Options Database Keys:
3062: . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3064: Level: beginner
3066: .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3067: M*/
3071: /*@C
3072: MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3073: (block compressed row). For good matrix assembly performance
3074: the user should preallocate the matrix storage by setting the parameters
3075: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3076: performance can be increased by more than a factor of 50.
3078: Collective on Mat
3080: Input Parameters:
3081: + A - the matrix
3082: . bs - size of block
3083: . d_nz - number of block nonzeros per block row in diagonal portion of local
3084: submatrix (same for all local rows)
3085: . d_nnz - array containing the number of block nonzeros in the various block rows
3086: of the in diagonal portion of the local (possibly different for each block
3087: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and
3088: set it even if it is zero.
3089: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
3090: submatrix (same for all local rows).
3091: - o_nnz - array containing the number of nonzeros in the various block rows of the
3092: off-diagonal portion of the local submatrix (possibly different for
3093: each block row) or NULL.
3095: If the *_nnz parameter is given then the *_nz parameter is ignored
3097: Options Database Keys:
3098: + -mat_block_size - size of the blocks to use
3099: - -mat_use_hash_table <fact>
3101: Notes:
3102: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3103: than it must be used on all processors that share the object for that argument.
3105: Storage Information:
3106: For a square global matrix we define each processor's diagonal portion
3107: to be its local rows and the corresponding columns (a square submatrix);
3108: each processor's off-diagonal portion encompasses the remainder of the
3109: local matrix (a rectangular submatrix).
3111: The user can specify preallocated storage for the diagonal part of
3112: the local submatrix with either d_nz or d_nnz (not both). Set
3113: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3114: memory allocation. Likewise, specify preallocated storage for the
3115: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3117: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3118: the figure below we depict these three local rows and all columns (0-11).
3120: .vb
3121: 0 1 2 3 4 5 6 7 8 9 10 11
3122: --------------------------
3123: row 3 |o o o d d d o o o o o o
3124: row 4 |o o o d d d o o o o o o
3125: row 5 |o o o d d d o o o o o o
3126: --------------------------
3127: .ve
3129: Thus, any entries in the d locations are stored in the d (diagonal)
3130: submatrix, and any entries in the o locations are stored in the
3131: o (off-diagonal) submatrix. Note that the d and the o submatrices are
3132: stored simply in the MATSEQBAIJ format for compressed row storage.
3134: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3135: and o_nz should indicate the number of block nonzeros per row in the o matrix.
3136: In general, for PDE problems in which most nonzeros are near the diagonal,
3137: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3138: or you will get TERRIBLE performance; see the users' manual chapter on
3139: matrices.
3141: You can call MatGetInfo() to get information on how effective the preallocation was;
3142: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3143: You can also run with the option -info and look for messages with the string
3144: malloc in them to see if additional memory allocation was needed.
3146: Level: intermediate
3148: .keywords: matrix, block, aij, compressed row, sparse, parallel
3150: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3151: @*/
3152: PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3153: {
3160: PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
3161: return(0);
3162: }
3166: /*@C
3167: MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3168: (block compressed row). For good matrix assembly performance
3169: the user should preallocate the matrix storage by setting the parameters
3170: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3171: performance can be increased by more than a factor of 50.
3173: Collective on MPI_Comm
3175: Input Parameters:
3176: + comm - MPI communicator
3177: . bs - size of blockk
3178: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3179: This value should be the same as the local size used in creating the
3180: y vector for the matrix-vector product y = Ax.
3181: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3182: This value should be the same as the local size used in creating the
3183: x vector for the matrix-vector product y = Ax.
3184: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3185: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3186: . d_nz - number of nonzero blocks per block row in diagonal portion of local
3187: submatrix (same for all local rows)
3188: . d_nnz - array containing the number of nonzero blocks in the various block rows
3189: of the in diagonal portion of the local (possibly different for each block
3190: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
3191: and set it even if it is zero.
3192: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
3193: submatrix (same for all local rows).
3194: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3195: off-diagonal portion of the local submatrix (possibly different for
3196: each block row) or NULL.
3198: Output Parameter:
3199: . A - the matrix
3201: Options Database Keys:
3202: + -mat_block_size - size of the blocks to use
3203: - -mat_use_hash_table <fact>
3205: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3206: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3207: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3209: Notes:
3210: If the *_nnz parameter is given then the *_nz parameter is ignored
3212: A nonzero block is any block that as 1 or more nonzeros in it
3214: The user MUST specify either the local or global matrix dimensions
3215: (possibly both).
3217: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3218: than it must be used on all processors that share the object for that argument.
3220: Storage Information:
3221: For a square global matrix we define each processor's diagonal portion
3222: to be its local rows and the corresponding columns (a square submatrix);
3223: each processor's off-diagonal portion encompasses the remainder of the
3224: local matrix (a rectangular submatrix).
3226: The user can specify preallocated storage for the diagonal part of
3227: the local submatrix with either d_nz or d_nnz (not both). Set
3228: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3229: memory allocation. Likewise, specify preallocated storage for the
3230: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3232: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3233: the figure below we depict these three local rows and all columns (0-11).
3235: .vb
3236: 0 1 2 3 4 5 6 7 8 9 10 11
3237: --------------------------
3238: row 3 |o o o d d d o o o o o o
3239: row 4 |o o o d d d o o o o o o
3240: row 5 |o o o d d d o o o o o o
3241: --------------------------
3242: .ve
3244: Thus, any entries in the d locations are stored in the d (diagonal)
3245: submatrix, and any entries in the o locations are stored in the
3246: o (off-diagonal) submatrix. Note that the d and the o submatrices are
3247: stored simply in the MATSEQBAIJ format for compressed row storage.
3249: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3250: and o_nz should indicate the number of block nonzeros per row in the o matrix.
3251: In general, for PDE problems in which most nonzeros are near the diagonal,
3252: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3253: or you will get TERRIBLE performance; see the users' manual chapter on
3254: matrices.
3256: Level: intermediate
3258: .keywords: matrix, block, aij, compressed row, sparse, parallel
3260: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3261: @*/
3262: PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3263: {
3265: PetscMPIInt size;
3268: MatCreate(comm,A);
3269: MatSetSizes(*A,m,n,M,N);
3270: MPI_Comm_size(comm,&size);
3271: if (size > 1) {
3272: MatSetType(*A,MATMPIBAIJ);
3273: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
3274: } else {
3275: MatSetType(*A,MATSEQBAIJ);
3276: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
3277: }
3278: return(0);
3279: }
3283: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3284: {
3285: Mat mat;
3286: Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3288: PetscInt len=0;
3291: *newmat = 0;
3292: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
3293: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3294: MatSetType(mat,((PetscObject)matin)->type_name);
3295: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3297: mat->factortype = matin->factortype;
3298: mat->preallocated = PETSC_TRUE;
3299: mat->assembled = PETSC_TRUE;
3300: mat->insertmode = NOT_SET_VALUES;
3302: a = (Mat_MPIBAIJ*)mat->data;
3303: mat->rmap->bs = matin->rmap->bs;
3304: a->bs2 = oldmat->bs2;
3305: a->mbs = oldmat->mbs;
3306: a->nbs = oldmat->nbs;
3307: a->Mbs = oldmat->Mbs;
3308: a->Nbs = oldmat->Nbs;
3310: PetscLayoutReference(matin->rmap,&mat->rmap);
3311: PetscLayoutReference(matin->cmap,&mat->cmap);
3313: a->size = oldmat->size;
3314: a->rank = oldmat->rank;
3315: a->donotstash = oldmat->donotstash;
3316: a->roworiented = oldmat->roworiented;
3317: a->rowindices = 0;
3318: a->rowvalues = 0;
3319: a->getrowactive = PETSC_FALSE;
3320: a->barray = 0;
3321: a->rstartbs = oldmat->rstartbs;
3322: a->rendbs = oldmat->rendbs;
3323: a->cstartbs = oldmat->cstartbs;
3324: a->cendbs = oldmat->cendbs;
3326: /* hash table stuff */
3327: a->ht = 0;
3328: a->hd = 0;
3329: a->ht_size = 0;
3330: a->ht_flag = oldmat->ht_flag;
3331: a->ht_fact = oldmat->ht_fact;
3332: a->ht_total_ct = 0;
3333: a->ht_insert_ct = 0;
3335: PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));
3336: if (oldmat->colmap) {
3337: #if defined(PETSC_USE_CTABLE)
3338: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3339: #else
3340: PetscMalloc1((a->Nbs),&a->colmap);
3341: PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));
3342: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
3343: #endif
3344: } else a->colmap = 0;
3346: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3347: PetscMalloc1(len,&a->garray);
3348: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
3349: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
3350: } else a->garray = 0;
3352: MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);
3353: VecDuplicate(oldmat->lvec,&a->lvec);
3354: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
3355: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3356: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
3358: MatDuplicate(oldmat->A,cpvalues,&a->A);
3359: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
3360: MatDuplicate(oldmat->B,cpvalues,&a->B);
3361: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
3362: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3363: *newmat = mat;
3364: return(0);
3365: }
3369: PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3370: {
3372: int fd;
3373: PetscInt i,nz,j,rstart,rend;
3374: PetscScalar *vals,*buf;
3375: MPI_Comm comm;
3376: MPI_Status status;
3377: PetscMPIInt rank,size,maxnz;
3378: PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3379: PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3380: PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3381: PetscMPIInt tag = ((PetscObject)viewer)->tag;
3382: PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3383: PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3386: PetscObjectGetComm((PetscObject)viewer,&comm);
3387: PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");
3388: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3389: PetscOptionsEnd();
3391: MPI_Comm_size(comm,&size);
3392: MPI_Comm_rank(comm,&rank);
3393: if (!rank) {
3394: PetscViewerBinaryGetDescriptor(viewer,&fd);
3395: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
3396: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3397: }
3399: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3401: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3402: M = header[1]; N = header[2];
3404: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3405: if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3406: if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3408: /* If global sizes are set, check if they are consistent with that given in the file */
3409: if (sizesset) {
3410: MatGetSize(newmat,&grows,&gcols);
3411: }
3412: if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3413: if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3415: if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3417: /*
3418: This code adds extra rows to make sure the number of rows is
3419: divisible by the blocksize
3420: */
3421: Mbs = M/bs;
3422: extra_rows = bs - M + bs*Mbs;
3423: if (extra_rows == bs) extra_rows = 0;
3424: else Mbs++;
3425: if (extra_rows && !rank) {
3426: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3427: }
3429: /* determine ownership of all rows */
3430: if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3431: mbs = Mbs/size + ((Mbs % size) > rank);
3432: m = mbs*bs;
3433: } else { /* User set */
3434: m = newmat->rmap->n;
3435: mbs = m/bs;
3436: }
3437: PetscMalloc2(size+1,&rowners,size+1,&browners);
3438: MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3440: /* process 0 needs enough room for process with most rows */
3441: if (!rank) {
3442: mmax = rowners[1];
3443: for (i=2; i<=size; i++) {
3444: mmax = PetscMax(mmax,rowners[i]);
3445: }
3446: mmax*=bs;
3447: } else mmax = -1; /* unused, but compiler warns anyway */
3449: rowners[0] = 0;
3450: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3451: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3452: rstart = rowners[rank];
3453: rend = rowners[rank+1];
3455: /* distribute row lengths to all processors */
3456: PetscMalloc1(m,&locrowlens);
3457: if (!rank) {
3458: mend = m;
3459: if (size == 1) mend = mend - extra_rows;
3460: PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);
3461: for (j=mend; j<m; j++) locrowlens[j] = 1;
3462: PetscMalloc1(mmax,&rowlengths);
3463: PetscCalloc1(size,&procsnz);
3464: for (j=0; j<m; j++) {
3465: procsnz[0] += locrowlens[j];
3466: }
3467: for (i=1; i<size; i++) {
3468: mend = browners[i+1] - browners[i];
3469: if (i == size-1) mend = mend - extra_rows;
3470: PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);
3471: for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3472: /* calculate the number of nonzeros on each processor */
3473: for (j=0; j<browners[i+1]-browners[i]; j++) {
3474: procsnz[i] += rowlengths[j];
3475: }
3476: MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);
3477: }
3478: PetscFree(rowlengths);
3479: } else {
3480: MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);
3481: }
3483: if (!rank) {
3484: /* determine max buffer needed and allocate it */
3485: maxnz = procsnz[0];
3486: for (i=1; i<size; i++) {
3487: maxnz = PetscMax(maxnz,procsnz[i]);
3488: }
3489: PetscMalloc1(maxnz,&cols);
3491: /* read in my part of the matrix column indices */
3492: nz = procsnz[0];
3493: PetscMalloc1((nz+1),&ibuf);
3494: mycols = ibuf;
3495: if (size == 1) nz -= extra_rows;
3496: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3497: if (size == 1) {
3498: for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3499: }
3501: /* read in every ones (except the last) and ship off */
3502: for (i=1; i<size-1; i++) {
3503: nz = procsnz[i];
3504: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3505: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
3506: }
3507: /* read in the stuff for the last proc */
3508: if (size != 1) {
3509: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
3510: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3511: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3512: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
3513: }
3514: PetscFree(cols);
3515: } else {
3516: /* determine buffer space needed for message */
3517: nz = 0;
3518: for (i=0; i<m; i++) {
3519: nz += locrowlens[i];
3520: }
3521: PetscMalloc1((nz+1),&ibuf);
3522: mycols = ibuf;
3523: /* receive message of column indices*/
3524: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
3525: MPI_Get_count(&status,MPIU_INT,&maxnz);
3526: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3527: }
3529: /* loop over local rows, determining number of off diagonal entries */
3530: PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);
3531: PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);
3532: rowcount = 0; nzcount = 0;
3533: for (i=0; i<mbs; i++) {
3534: dcount = 0;
3535: odcount = 0;
3536: for (j=0; j<bs; j++) {
3537: kmax = locrowlens[rowcount];
3538: for (k=0; k<kmax; k++) {
3539: tmp = mycols[nzcount++]/bs;
3540: if (!mask[tmp]) {
3541: mask[tmp] = 1;
3542: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3543: else masked1[dcount++] = tmp;
3544: }
3545: }
3546: rowcount++;
3547: }
3549: dlens[i] = dcount;
3550: odlens[i] = odcount;
3552: /* zero out the mask elements we set */
3553: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3554: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3555: }
3558: if (!sizesset) {
3559: MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);
3560: }
3561: MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);
3563: if (!rank) {
3564: PetscMalloc1((maxnz+1),&buf);
3565: /* read in my part of the matrix numerical values */
3566: nz = procsnz[0];
3567: vals = buf;
3568: mycols = ibuf;
3569: if (size == 1) nz -= extra_rows;
3570: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3571: if (size == 1) {
3572: for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3573: }
3575: /* insert into matrix */
3576: jj = rstart*bs;
3577: for (i=0; i<m; i++) {
3578: MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
3579: mycols += locrowlens[i];
3580: vals += locrowlens[i];
3581: jj++;
3582: }
3583: /* read in other processors (except the last one) and ship out */
3584: for (i=1; i<size-1; i++) {
3585: nz = procsnz[i];
3586: vals = buf;
3587: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3588: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
3589: }
3590: /* the last proc */
3591: if (size != 1) {
3592: nz = procsnz[i] - extra_rows;
3593: vals = buf;
3594: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3595: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3596: MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);
3597: }
3598: PetscFree(procsnz);
3599: } else {
3600: /* receive numeric values */
3601: PetscMalloc1((nz+1),&buf);
3603: /* receive message of values*/
3604: vals = buf;
3605: mycols = ibuf;
3606: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);
3608: /* insert into matrix */
3609: jj = rstart*bs;
3610: for (i=0; i<m; i++) {
3611: MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
3612: mycols += locrowlens[i];
3613: vals += locrowlens[i];
3614: jj++;
3615: }
3616: }
3617: PetscFree(locrowlens);
3618: PetscFree(buf);
3619: PetscFree(ibuf);
3620: PetscFree2(rowners,browners);
3621: PetscFree2(dlens,odlens);
3622: PetscFree3(mask,masked1,masked2);
3623: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3624: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3625: return(0);
3626: }
3630: /*@
3631: MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3633: Input Parameters:
3634: . mat - the matrix
3635: . fact - factor
3637: Not Collective, each process can use a different factor
3639: Level: advanced
3641: Notes:
3642: This can also be set by the command line option: -mat_use_hash_table <fact>
3644: .keywords: matrix, hashtable, factor, HT
3646: .seealso: MatSetOption()
3647: @*/
3648: PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3649: {
3653: PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));
3654: return(0);
3655: }
3659: PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3660: {
3661: Mat_MPIBAIJ *baij;
3664: baij = (Mat_MPIBAIJ*)mat->data;
3665: baij->ht_fact = fact;
3666: return(0);
3667: }
3671: PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3672: {
3673: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3676: *Ad = a->A;
3677: *Ao = a->B;
3678: *colmap = a->garray;
3679: return(0);
3680: }
3682: /*
3683: Special version for direct calls from Fortran (to eliminate two function call overheads
3684: */
3685: #if defined(PETSC_HAVE_FORTRAN_CAPS)
3686: #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3687: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3688: #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3689: #endif
3693: /*@C
3694: MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3696: Collective on Mat
3698: Input Parameters:
3699: + mat - the matrix
3700: . min - number of input rows
3701: . im - input rows
3702: . nin - number of input columns
3703: . in - input columns
3704: . v - numerical values input
3705: - addvin - INSERT_VALUES or ADD_VALUES
3707: Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3709: Level: advanced
3711: .seealso: MatSetValuesBlocked()
3712: @*/
3713: PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3714: {
3715: /* convert input arguments to C version */
3716: Mat mat = *matin;
3717: PetscInt m = *min, n = *nin;
3718: InsertMode addv = *addvin;
3720: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
3721: const MatScalar *value;
3722: MatScalar *barray = baij->barray;
3723: PetscBool roworiented = baij->roworiented;
3724: PetscErrorCode ierr;
3725: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
3726: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3727: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3730: /* tasks normally handled by MatSetValuesBlocked() */
3731: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3732: #if defined(PETSC_USE_DEBUG)
3733: else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3734: if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3735: #endif
3736: if (mat->assembled) {
3737: mat->was_assembled = PETSC_TRUE;
3738: mat->assembled = PETSC_FALSE;
3739: }
3740: PetscLogEventBegin(MAT_SetValues,mat,0,0,0);
3743: if (!barray) {
3744: PetscMalloc1(bs2,&barray);
3745: baij->barray = barray;
3746: }
3748: if (roworiented) stepval = (n-1)*bs;
3749: else stepval = (m-1)*bs;
3751: for (i=0; i<m; i++) {
3752: if (im[i] < 0) continue;
3753: #if defined(PETSC_USE_DEBUG)
3754: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3755: #endif
3756: if (im[i] >= rstart && im[i] < rend) {
3757: row = im[i] - rstart;
3758: for (j=0; j<n; j++) {
3759: /* If NumCol = 1 then a copy is not required */
3760: if ((roworiented) && (n == 1)) {
3761: barray = (MatScalar*)v + i*bs2;
3762: } else if ((!roworiented) && (m == 1)) {
3763: barray = (MatScalar*)v + j*bs2;
3764: } else { /* Here a copy is required */
3765: if (roworiented) {
3766: value = v + i*(stepval+bs)*bs + j*bs;
3767: } else {
3768: value = v + j*(stepval+bs)*bs + i*bs;
3769: }
3770: for (ii=0; ii<bs; ii++,value+=stepval) {
3771: for (jj=0; jj<bs; jj++) {
3772: *barray++ = *value++;
3773: }
3774: }
3775: barray -=bs2;
3776: }
3778: if (in[j] >= cstart && in[j] < cend) {
3779: col = in[j] - cstart;
3780: MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);
3781: } else if (in[j] < 0) continue;
3782: #if defined(PETSC_USE_DEBUG)
3783: else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3784: #endif
3785: else {
3786: if (mat->was_assembled) {
3787: if (!baij->colmap) {
3788: MatCreateColmap_MPIBAIJ_Private(mat);
3789: }
3791: #if defined(PETSC_USE_DEBUG)
3792: #if defined(PETSC_USE_CTABLE)
3793: { PetscInt data;
3794: PetscTableFind(baij->colmap,in[j]+1,&data);
3795: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3796: }
3797: #else
3798: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3799: #endif
3800: #endif
3801: #if defined(PETSC_USE_CTABLE)
3802: PetscTableFind(baij->colmap,in[j]+1,&col);
3803: col = (col - 1)/bs;
3804: #else
3805: col = (baij->colmap[in[j]] - 1)/bs;
3806: #endif
3807: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3808: MatDisAssemble_MPIBAIJ(mat);
3809: col = in[j];
3810: }
3811: } else col = in[j];
3812: MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
3813: }
3814: }
3815: } else {
3816: if (!baij->donotstash) {
3817: if (roworiented) {
3818: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3819: } else {
3820: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3821: }
3822: }
3823: }
3824: }
3826: /* task normally handled by MatSetValuesBlocked() */
3827: PetscLogEventEnd(MAT_SetValues,mat,0,0,0);
3828: return(0);
3829: }
3833: /*@
3834: MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3835: CSR format the local rows.
3837: Collective on MPI_Comm
3839: Input Parameters:
3840: + comm - MPI communicator
3841: . bs - the block size, only a block size of 1 is supported
3842: . m - number of local rows (Cannot be PETSC_DECIDE)
3843: . n - This value should be the same as the local size used in creating the
3844: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3845: calculated if N is given) For square matrices n is almost always m.
3846: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3847: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3848: . i - row indices
3849: . j - column indices
3850: - a - matrix values
3852: Output Parameter:
3853: . mat - the matrix
3855: Level: intermediate
3857: Notes:
3858: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3859: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3860: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3862: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3863: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3864: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3865: with column-major ordering within blocks.
3867: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3869: .keywords: matrix, aij, compressed row, sparse, parallel
3871: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3872: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3873: @*/
3874: PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3875: {
3879: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3880: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3881: MatCreate(comm,mat);
3882: MatSetSizes(*mat,m,n,M,N);
3883: MatSetType(*mat,MATMPISBAIJ);
3884: MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3885: MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);
3886: MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3887: return(0);
3888: }