Actual source code: mpibaij.c
petsc-master 2020-08-25
1: #include <../src/mat/impls/baij/mpi/mpibaij.h>
3: #include <petsc/private/hashseti.h>
4: #include <petscblaslapack.h>
5: #include <petscsf.h>
7: #if defined(PETSC_HAVE_HYPRE)
8: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9: #endif
11: PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12: {
13: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
15: PetscInt i,*idxb = NULL;
16: PetscScalar *va,*vb;
17: Vec vtmp;
20: MatGetRowMaxAbs(a->A,v,idx);
21: VecGetArray(v,&va);
22: if (idx) {
23: for (i=0; i<A->rmap->n; i++) {
24: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25: }
26: }
28: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
29: if (idx) {PetscMalloc1(A->rmap->n,&idxb);}
30: MatGetRowMaxAbs(a->B,vtmp,idxb);
31: VecGetArray(vtmp,&vb);
33: for (i=0; i<A->rmap->n; i++) {
34: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35: va[i] = vb[i];
36: if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37: }
38: }
40: VecRestoreArray(v,&va);
41: VecRestoreArray(vtmp,&vb);
42: PetscFree(idxb);
43: VecDestroy(&vtmp);
44: return(0);
45: }
47: PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
48: {
49: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
53: MatStoreValues(aij->A);
54: MatStoreValues(aij->B);
55: return(0);
56: }
58: PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
59: {
60: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
64: MatRetrieveValues(aij->A);
65: MatRetrieveValues(aij->B);
66: return(0);
67: }
69: /*
70: Local utility routine that creates a mapping from the global column
71: number to the local number in the off-diagonal part of the local
72: storage of the matrix. This is done in a non scalable way since the
73: length of colmap equals the global matrix length.
74: */
75: PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76: {
77: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
78: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
80: PetscInt nbs = B->nbs,i,bs=mat->rmap->bs;
83: #if defined(PETSC_USE_CTABLE)
84: PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);
85: for (i=0; i<nbs; i++) {
86: PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);
87: }
88: #else
89: PetscCalloc1(baij->Nbs+1,&baij->colmap);
90: PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));
91: for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92: #endif
93: return(0);
94: }
96: #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \
97: { \
98: brow = row/bs; \
99: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100: rmax = aimax[brow]; nrow = ailen[brow]; \
101: bcol = col/bs; \
102: ridx = row % bs; cidx = col % bs; \
103: low = 0; high = nrow; \
104: while (high-low > 3) { \
105: t = (low+high)/2; \
106: if (rp[t] > bcol) high = t; \
107: else low = t; \
108: } \
109: for (_i=low; _i<high; _i++) { \
110: if (rp[_i] > bcol) break; \
111: if (rp[_i] == bcol) { \
112: bap = ap + bs2*_i + bs*cidx + ridx; \
113: if (addv == ADD_VALUES) *bap += value; \
114: else *bap = value; \
115: goto a_noinsert; \
116: } \
117: } \
118: if (a->nonew == 1) goto a_noinsert; \
119: if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121: N = nrow++ - 1; \
122: /* shift up all the later entries in this row */ \
123: PetscArraymove(rp+_i+1,rp+_i,N-_i+1);\
124: PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)); \
125: PetscArrayzero(ap+bs2*_i,bs2); \
126: rp[_i] = bcol; \
127: ap[bs2*_i + bs*cidx + ridx] = value; \
128: a_noinsert:; \
129: ailen[brow] = nrow; \
130: }
132: #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \
133: { \
134: brow = row/bs; \
135: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136: rmax = bimax[brow]; nrow = bilen[brow]; \
137: bcol = col/bs; \
138: ridx = row % bs; cidx = col % bs; \
139: low = 0; high = nrow; \
140: while (high-low > 3) { \
141: t = (low+high)/2; \
142: if (rp[t] > bcol) high = t; \
143: else low = t; \
144: } \
145: for (_i=low; _i<high; _i++) { \
146: if (rp[_i] > bcol) break; \
147: if (rp[_i] == bcol) { \
148: bap = ap + bs2*_i + bs*cidx + ridx; \
149: if (addv == ADD_VALUES) *bap += value; \
150: else *bap = value; \
151: goto b_noinsert; \
152: } \
153: } \
154: if (b->nonew == 1) goto b_noinsert; \
155: if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
156: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157: N = nrow++ - 1; \
158: /* shift up all the later entries in this row */ \
159: PetscArraymove(rp+_i+1,rp+_i,N-_i+1);\
160: PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));\
161: PetscArrayzero(ap+bs2*_i,bs2); \
162: rp[_i] = bcol; \
163: ap[bs2*_i + bs*cidx + ridx] = value; \
164: b_noinsert:; \
165: bilen[brow] = nrow; \
166: }
168: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169: {
170: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
171: MatScalar value;
172: PetscBool roworiented = baij->roworiented;
174: PetscInt i,j,row,col;
175: PetscInt rstart_orig=mat->rmap->rstart;
176: PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177: PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
179: /* Some Variables required in the macro */
180: Mat A = baij->A;
181: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
182: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183: MatScalar *aa =a->a;
185: Mat B = baij->B;
186: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
187: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188: MatScalar *ba =b->a;
190: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
191: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
192: MatScalar *ap,*bap;
195: for (i=0; i<m; i++) {
196: if (im[i] < 0) continue;
197: if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
198: if (im[i] >= rstart_orig && im[i] < rend_orig) {
199: row = im[i] - rstart_orig;
200: for (j=0; j<n; j++) {
201: if (in[j] >= cstart_orig && in[j] < cend_orig) {
202: col = in[j] - cstart_orig;
203: if (roworiented) value = v[i*n+j];
204: else value = v[i+j*m];
205: MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
206: } else if (in[j] < 0) continue;
207: else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
208: else {
209: if (mat->was_assembled) {
210: if (!baij->colmap) {
211: MatCreateColmap_MPIBAIJ_Private(mat);
212: }
213: #if defined(PETSC_USE_CTABLE)
214: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
215: col = col - 1;
216: #else
217: col = baij->colmap[in[j]/bs] - 1;
218: #endif
219: if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
220: MatDisAssemble_MPIBAIJ(mat);
221: col = in[j];
222: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223: B = baij->B;
224: b = (Mat_SeqBAIJ*)(B)->data;
225: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226: ba =b->a;
227: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
228: else col += in[j]%bs;
229: } else col = in[j];
230: if (roworiented) value = v[i*n+j];
231: else value = v[i+j*m];
232: MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
233: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
234: }
235: }
236: } else {
237: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
238: if (!baij->donotstash) {
239: mat->assembled = PETSC_FALSE;
240: if (roworiented) {
241: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
242: } else {
243: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
244: }
245: }
246: }
247: }
248: return(0);
249: }
251: PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
252: {
253: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
254: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
255: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
256: PetscErrorCode ierr;
257: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
258: PetscBool roworiented=a->roworiented;
259: const PetscScalar *value = v;
260: MatScalar *ap,*aa = a->a,*bap;
263: rp = aj + ai[row];
264: ap = aa + bs2*ai[row];
265: rmax = imax[row];
266: nrow = ailen[row];
267: value = v;
268: low = 0;
269: high = nrow;
270: while (high-low > 7) {
271: t = (low+high)/2;
272: if (rp[t] > col) high = t;
273: else low = t;
274: }
275: for (i=low; i<high; i++) {
276: if (rp[i] > col) break;
277: if (rp[i] == col) {
278: bap = ap + bs2*i;
279: if (roworiented) {
280: if (is == ADD_VALUES) {
281: for (ii=0; ii<bs; ii++) {
282: for (jj=ii; jj<bs2; jj+=bs) {
283: bap[jj] += *value++;
284: }
285: }
286: } else {
287: for (ii=0; ii<bs; ii++) {
288: for (jj=ii; jj<bs2; jj+=bs) {
289: bap[jj] = *value++;
290: }
291: }
292: }
293: } else {
294: if (is == ADD_VALUES) {
295: for (ii=0; ii<bs; ii++,value+=bs) {
296: for (jj=0; jj<bs; jj++) {
297: bap[jj] += value[jj];
298: }
299: bap += bs;
300: }
301: } else {
302: for (ii=0; ii<bs; ii++,value+=bs) {
303: for (jj=0; jj<bs; jj++) {
304: bap[jj] = value[jj];
305: }
306: bap += bs;
307: }
308: }
309: }
310: goto noinsert2;
311: }
312: }
313: if (nonew == 1) goto noinsert2;
314: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
315: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
316: N = nrow++ - 1; high++;
317: /* shift up all the later entries in this row */
318: PetscArraymove(rp+i+1,rp+i,N-i+1);
319: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
320: rp[i] = col;
321: bap = ap + bs2*i;
322: if (roworiented) {
323: for (ii=0; ii<bs; ii++) {
324: for (jj=ii; jj<bs2; jj+=bs) {
325: bap[jj] = *value++;
326: }
327: }
328: } else {
329: for (ii=0; ii<bs; ii++) {
330: for (jj=0; jj<bs; jj++) {
331: *bap++ = *value++;
332: }
333: }
334: }
335: noinsert2:;
336: ailen[row] = nrow;
337: return(0);
338: }
340: /*
341: This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
342: by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
343: */
344: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
345: {
346: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
347: const PetscScalar *value;
348: MatScalar *barray = baij->barray;
349: PetscBool roworiented = baij->roworiented;
350: PetscErrorCode ierr;
351: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
352: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
353: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
356: if (!barray) {
357: PetscMalloc1(bs2,&barray);
358: baij->barray = barray;
359: }
361: if (roworiented) stepval = (n-1)*bs;
362: else stepval = (m-1)*bs;
364: for (i=0; i<m; i++) {
365: if (im[i] < 0) continue;
366: if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
367: if (im[i] >= rstart && im[i] < rend) {
368: row = im[i] - rstart;
369: for (j=0; j<n; j++) {
370: /* If NumCol = 1 then a copy is not required */
371: if ((roworiented) && (n == 1)) {
372: barray = (MatScalar*)v + i*bs2;
373: } else if ((!roworiented) && (m == 1)) {
374: barray = (MatScalar*)v + j*bs2;
375: } else { /* Here a copy is required */
376: if (roworiented) {
377: value = v + (i*(stepval+bs) + j)*bs;
378: } else {
379: value = v + (j*(stepval+bs) + i)*bs;
380: }
381: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
382: for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
383: barray += bs;
384: }
385: barray -= bs2;
386: }
388: if (in[j] >= cstart && in[j] < cend) {
389: col = in[j] - cstart;
390: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);
391: } else if (in[j] < 0) continue;
392: else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
393: else {
394: if (mat->was_assembled) {
395: if (!baij->colmap) {
396: MatCreateColmap_MPIBAIJ_Private(mat);
397: }
399: #if defined(PETSC_USE_DEBUG)
400: #if defined(PETSC_USE_CTABLE)
401: { PetscInt data;
402: PetscTableFind(baij->colmap,in[j]+1,&data);
403: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
404: }
405: #else
406: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
407: #endif
408: #endif
409: #if defined(PETSC_USE_CTABLE)
410: PetscTableFind(baij->colmap,in[j]+1,&col);
411: col = (col - 1)/bs;
412: #else
413: col = (baij->colmap[in[j]] - 1)/bs;
414: #endif
415: if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
416: MatDisAssemble_MPIBAIJ(mat);
417: col = in[j];
418: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
419: } else col = in[j];
420: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);
421: }
422: }
423: } else {
424: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
425: if (!baij->donotstash) {
426: if (roworiented) {
427: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
428: } else {
429: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
430: }
431: }
432: }
433: }
434: return(0);
435: }
437: #define HASH_KEY 0.6180339887
438: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
439: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
440: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
441: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
442: {
443: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
444: PetscBool roworiented = baij->roworiented;
446: PetscInt i,j,row,col;
447: PetscInt rstart_orig=mat->rmap->rstart;
448: PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs;
449: PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
450: PetscReal tmp;
451: MatScalar **HD = baij->hd,value;
452: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
455: for (i=0; i<m; i++) {
456: if (PetscDefined(USE_DEBUG)) {
457: if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
458: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
459: }
460: row = im[i];
461: if (row >= rstart_orig && row < rend_orig) {
462: for (j=0; j<n; j++) {
463: col = in[j];
464: if (roworiented) value = v[i*n+j];
465: else value = v[i+j*m];
466: /* Look up PetscInto the Hash Table */
467: key = (row/bs)*Nbs+(col/bs)+1;
468: h1 = HASH(size,key,tmp);
471: idx = h1;
472: if (PetscDefined(USE_DEBUG)) {
473: insert_ct++;
474: total_ct++;
475: if (HT[idx] != key) {
476: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
477: if (idx == size) {
478: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
479: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
480: }
481: }
482: } else if (HT[idx] != key) {
483: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
484: if (idx == size) {
485: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
486: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
487: }
488: }
489: /* A HASH table entry is found, so insert the values at the correct address */
490: if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
491: else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
492: }
493: } else if (!baij->donotstash) {
494: if (roworiented) {
495: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
496: } else {
497: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
498: }
499: }
500: }
501: if (PetscDefined(USE_DEBUG)) {
502: baij->ht_total_ct += total_ct;
503: baij->ht_insert_ct += insert_ct;
504: }
505: return(0);
506: }
508: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
509: {
510: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
511: PetscBool roworiented = baij->roworiented;
512: PetscErrorCode ierr;
513: PetscInt i,j,ii,jj,row,col;
514: PetscInt rstart=baij->rstartbs;
515: PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
516: PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
517: PetscReal tmp;
518: MatScalar **HD = baij->hd,*baij_a;
519: const PetscScalar *v_t,*value;
520: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
523: if (roworiented) stepval = (n-1)*bs;
524: else stepval = (m-1)*bs;
526: for (i=0; i<m; i++) {
527: if (PetscDefined(USE_DEBUG)) {
528: if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
529: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
530: }
531: row = im[i];
532: v_t = v + i*nbs2;
533: if (row >= rstart && row < rend) {
534: for (j=0; j<n; j++) {
535: col = in[j];
537: /* Look up into the Hash Table */
538: key = row*Nbs+col+1;
539: h1 = HASH(size,key,tmp);
541: idx = h1;
542: if (PetscDefined(USE_DEBUG)) {
543: total_ct++;
544: insert_ct++;
545: if (HT[idx] != key) {
546: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
547: if (idx == size) {
548: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
549: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
550: }
551: }
552: } else if (HT[idx] != key) {
553: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
554: if (idx == size) {
555: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
556: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
557: }
558: }
559: baij_a = HD[idx];
560: if (roworiented) {
561: /*value = v + i*(stepval+bs)*bs + j*bs;*/
562: /* value = v + (i*(stepval+bs)+j)*bs; */
563: value = v_t;
564: v_t += bs;
565: if (addv == ADD_VALUES) {
566: for (ii=0; ii<bs; ii++,value+=stepval) {
567: for (jj=ii; jj<bs2; jj+=bs) {
568: baij_a[jj] += *value++;
569: }
570: }
571: } else {
572: for (ii=0; ii<bs; ii++,value+=stepval) {
573: for (jj=ii; jj<bs2; jj+=bs) {
574: baij_a[jj] = *value++;
575: }
576: }
577: }
578: } else {
579: value = v + j*(stepval+bs)*bs + i*bs;
580: if (addv == ADD_VALUES) {
581: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
582: for (jj=0; jj<bs; jj++) {
583: baij_a[jj] += *value++;
584: }
585: }
586: } else {
587: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
588: for (jj=0; jj<bs; jj++) {
589: baij_a[jj] = *value++;
590: }
591: }
592: }
593: }
594: }
595: } else {
596: if (!baij->donotstash) {
597: if (roworiented) {
598: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
599: } else {
600: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
601: }
602: }
603: }
604: }
605: if (PetscDefined(USE_DEBUG)) {
606: baij->ht_total_ct += total_ct;
607: baij->ht_insert_ct += insert_ct;
608: }
609: return(0);
610: }
612: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
613: {
614: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
616: PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
617: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
620: for (i=0; i<m; i++) {
621: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
622: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
623: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
624: row = idxm[i] - bsrstart;
625: for (j=0; j<n; j++) {
626: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
627: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
628: if (idxn[j] >= bscstart && idxn[j] < bscend) {
629: col = idxn[j] - bscstart;
630: MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
631: } else {
632: if (!baij->colmap) {
633: MatCreateColmap_MPIBAIJ_Private(mat);
634: }
635: #if defined(PETSC_USE_CTABLE)
636: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
637: data--;
638: #else
639: data = baij->colmap[idxn[j]/bs]-1;
640: #endif
641: if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
642: else {
643: col = data + idxn[j]%bs;
644: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
645: }
646: }
647: }
648: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
649: }
650: return(0);
651: }
653: PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
654: {
655: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
656: Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
658: PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
659: PetscReal sum = 0.0;
660: MatScalar *v;
663: if (baij->size == 1) {
664: MatNorm(baij->A,type,nrm);
665: } else {
666: if (type == NORM_FROBENIUS) {
667: v = amat->a;
668: nz = amat->nz*bs2;
669: for (i=0; i<nz; i++) {
670: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
671: }
672: v = bmat->a;
673: nz = bmat->nz*bs2;
674: for (i=0; i<nz; i++) {
675: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
676: }
677: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
678: *nrm = PetscSqrtReal(*nrm);
679: } else if (type == NORM_1) { /* max column sum */
680: PetscReal *tmp,*tmp2;
681: PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs;
682: PetscCalloc1(mat->cmap->N,&tmp);
683: PetscMalloc1(mat->cmap->N,&tmp2);
684: v = amat->a; jj = amat->j;
685: for (i=0; i<amat->nz; i++) {
686: for (j=0; j<bs; j++) {
687: col = bs*(cstart + *jj) + j; /* column index */
688: for (row=0; row<bs; row++) {
689: tmp[col] += PetscAbsScalar(*v); v++;
690: }
691: }
692: jj++;
693: }
694: v = bmat->a; jj = bmat->j;
695: for (i=0; i<bmat->nz; i++) {
696: for (j=0; j<bs; j++) {
697: col = bs*garray[*jj] + j;
698: for (row=0; row<bs; row++) {
699: tmp[col] += PetscAbsScalar(*v); v++;
700: }
701: }
702: jj++;
703: }
704: MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
705: *nrm = 0.0;
706: for (j=0; j<mat->cmap->N; j++) {
707: if (tmp2[j] > *nrm) *nrm = tmp2[j];
708: }
709: PetscFree(tmp);
710: PetscFree(tmp2);
711: } else if (type == NORM_INFINITY) { /* max row sum */
712: PetscReal *sums;
713: PetscMalloc1(bs,&sums);
714: sum = 0.0;
715: for (j=0; j<amat->mbs; j++) {
716: for (row=0; row<bs; row++) sums[row] = 0.0;
717: v = amat->a + bs2*amat->i[j];
718: nz = amat->i[j+1]-amat->i[j];
719: for (i=0; i<nz; i++) {
720: for (col=0; col<bs; col++) {
721: for (row=0; row<bs; row++) {
722: sums[row] += PetscAbsScalar(*v); v++;
723: }
724: }
725: }
726: v = bmat->a + bs2*bmat->i[j];
727: nz = bmat->i[j+1]-bmat->i[j];
728: for (i=0; i<nz; i++) {
729: for (col=0; col<bs; col++) {
730: for (row=0; row<bs; row++) {
731: sums[row] += PetscAbsScalar(*v); v++;
732: }
733: }
734: }
735: for (row=0; row<bs; row++) {
736: if (sums[row] > sum) sum = sums[row];
737: }
738: }
739: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
740: PetscFree(sums);
741: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
742: }
743: return(0);
744: }
746: /*
747: Creates the hash table, and sets the table
748: This table is created only once.
749: If new entried need to be added to the matrix
750: then the hash table has to be destroyed and
751: recreated.
752: */
753: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
754: {
755: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
756: Mat A = baij->A,B=baij->B;
757: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
758: PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
760: PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
761: PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
762: PetscInt *HT,key;
763: MatScalar **HD;
764: PetscReal tmp;
765: #if defined(PETSC_USE_INFO)
766: PetscInt ct=0,max=0;
767: #endif
770: if (baij->ht) return(0);
772: baij->ht_size = (PetscInt)(factor*nz);
773: ht_size = baij->ht_size;
775: /* Allocate Memory for Hash Table */
776: PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);
777: HD = baij->hd;
778: HT = baij->ht;
780: /* Loop Over A */
781: for (i=0; i<a->mbs; i++) {
782: for (j=ai[i]; j<ai[i+1]; j++) {
783: row = i+rstart;
784: col = aj[j]+cstart;
786: key = row*Nbs + col + 1;
787: h1 = HASH(ht_size,key,tmp);
788: for (k=0; k<ht_size; k++) {
789: if (!HT[(h1+k)%ht_size]) {
790: HT[(h1+k)%ht_size] = key;
791: HD[(h1+k)%ht_size] = a->a + j*bs2;
792: break;
793: #if defined(PETSC_USE_INFO)
794: } else {
795: ct++;
796: #endif
797: }
798: }
799: #if defined(PETSC_USE_INFO)
800: if (k> max) max = k;
801: #endif
802: }
803: }
804: /* Loop Over B */
805: for (i=0; i<b->mbs; i++) {
806: for (j=bi[i]; j<bi[i+1]; j++) {
807: row = i+rstart;
808: col = garray[bj[j]];
809: key = row*Nbs + col + 1;
810: h1 = HASH(ht_size,key,tmp);
811: for (k=0; k<ht_size; k++) {
812: if (!HT[(h1+k)%ht_size]) {
813: HT[(h1+k)%ht_size] = key;
814: HD[(h1+k)%ht_size] = b->a + j*bs2;
815: break;
816: #if defined(PETSC_USE_INFO)
817: } else {
818: ct++;
819: #endif
820: }
821: }
822: #if defined(PETSC_USE_INFO)
823: if (k> max) max = k;
824: #endif
825: }
826: }
828: /* Print Summary */
829: #if defined(PETSC_USE_INFO)
830: for (i=0,j=0; i<ht_size; i++) {
831: if (HT[i]) j++;
832: }
833: PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
834: #endif
835: return(0);
836: }
838: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
839: {
840: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
842: PetscInt nstash,reallocs;
845: if (baij->donotstash || mat->nooffprocentries) return(0);
847: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
848: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
849: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
850: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
851: MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
852: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
853: return(0);
854: }
856: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
857: {
858: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
859: Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data;
861: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
862: PetscInt *row,*col;
863: PetscBool r1,r2,r3,other_disassembled;
864: MatScalar *val;
865: PetscMPIInt n;
868: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
869: if (!baij->donotstash && !mat->nooffprocentries) {
870: while (1) {
871: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
872: if (!flg) break;
874: for (i=0; i<n;) {
875: /* Now identify the consecutive vals belonging to the same row */
876: for (j=i,rstart=row[j]; j<n; j++) {
877: if (row[j] != rstart) break;
878: }
879: if (j < n) ncols = j-i;
880: else ncols = n-i;
881: /* Now assemble all these values with a single function call */
882: MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
883: i = j;
884: }
885: }
886: MatStashScatterEnd_Private(&mat->stash);
887: /* Now process the block-stash. Since the values are stashed column-oriented,
888: set the roworiented flag to column oriented, and after MatSetValues()
889: restore the original flags */
890: r1 = baij->roworiented;
891: r2 = a->roworiented;
892: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
894: baij->roworiented = PETSC_FALSE;
895: a->roworiented = PETSC_FALSE;
897: (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
898: while (1) {
899: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
900: if (!flg) break;
902: for (i=0; i<n;) {
903: /* Now identify the consecutive vals belonging to the same row */
904: for (j=i,rstart=row[j]; j<n; j++) {
905: if (row[j] != rstart) break;
906: }
907: if (j < n) ncols = j-i;
908: else ncols = n-i;
909: MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);
910: i = j;
911: }
912: }
913: MatStashScatterEnd_Private(&mat->bstash);
915: baij->roworiented = r1;
916: a->roworiented = r2;
918: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
919: }
921: MatAssemblyBegin(baij->A,mode);
922: MatAssemblyEnd(baij->A,mode);
924: /* determine if any processor has disassembled, if so we must
925: also disassemble ourselfs, in order that we may reassemble. */
926: /*
927: if nonzero structure of submatrix B cannot change then we know that
928: no processor disassembled thus we can skip this stuff
929: */
930: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
931: MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
932: if (mat->was_assembled && !other_disassembled) {
933: MatDisAssemble_MPIBAIJ(mat);
934: }
935: }
937: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
938: MatSetUpMultiply_MPIBAIJ(mat);
939: }
940: MatAssemblyBegin(baij->B,mode);
941: MatAssemblyEnd(baij->B,mode);
943: #if defined(PETSC_USE_INFO)
944: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
945: PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
947: baij->ht_total_ct = 0;
948: baij->ht_insert_ct = 0;
949: }
950: #endif
951: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
952: MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
954: mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
955: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
956: }
958: PetscFree2(baij->rowvalues,baij->rowindices);
960: baij->rowvalues = NULL;
962: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
963: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
964: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
965: MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
966: }
967: return(0);
968: }
970: extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
971: #include <petscdraw.h>
972: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
973: {
974: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
975: PetscErrorCode ierr;
976: PetscMPIInt rank = baij->rank;
977: PetscInt bs = mat->rmap->bs;
978: PetscBool iascii,isdraw;
979: PetscViewer sviewer;
980: PetscViewerFormat format;
983: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
984: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
985: if (iascii) {
986: PetscViewerGetFormat(viewer,&format);
987: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
988: MatInfo info;
989: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
990: MatGetInfo(mat,MAT_LOCAL,&info);
991: PetscViewerASCIIPushSynchronized(viewer);
992: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
993: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);
994: MatGetInfo(baij->A,MAT_LOCAL,&info);
995: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
996: MatGetInfo(baij->B,MAT_LOCAL,&info);
997: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
998: PetscViewerFlush(viewer);
999: PetscViewerASCIIPopSynchronized(viewer);
1000: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1001: VecScatterView(baij->Mvctx,viewer);
1002: return(0);
1003: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1004: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1005: return(0);
1006: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1007: return(0);
1008: }
1009: }
1011: if (isdraw) {
1012: PetscDraw draw;
1013: PetscBool isnull;
1014: PetscViewerDrawGetDraw(viewer,0,&draw);
1015: PetscDrawIsNull(draw,&isnull);
1016: if (isnull) return(0);
1017: }
1019: {
1020: /* assemble the entire matrix onto first processor. */
1021: Mat A;
1022: Mat_SeqBAIJ *Aloc;
1023: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1024: MatScalar *a;
1025: const char *matname;
1027: /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1028: /* Perhaps this should be the type of mat? */
1029: MatCreate(PetscObjectComm((PetscObject)mat),&A);
1030: if (!rank) {
1031: MatSetSizes(A,M,N,M,N);
1032: } else {
1033: MatSetSizes(A,0,0,M,N);
1034: }
1035: MatSetType(A,MATMPIBAIJ);
1036: MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);
1037: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1038: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
1040: /* copy over the A part */
1041: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1042: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1043: PetscMalloc1(bs,&rvals);
1045: for (i=0; i<mbs; i++) {
1046: rvals[0] = bs*(baij->rstartbs + i);
1047: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1048: for (j=ai[i]; j<ai[i+1]; j++) {
1049: col = (baij->cstartbs+aj[j])*bs;
1050: for (k=0; k<bs; k++) {
1051: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1052: col++; a += bs;
1053: }
1054: }
1055: }
1056: /* copy over the B part */
1057: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1058: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059: for (i=0; i<mbs; i++) {
1060: rvals[0] = bs*(baij->rstartbs + i);
1061: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1062: for (j=ai[i]; j<ai[i+1]; j++) {
1063: col = baij->garray[aj[j]]*bs;
1064: for (k=0; k<bs; k++) {
1065: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1066: col++; a += bs;
1067: }
1068: }
1069: }
1070: PetscFree(rvals);
1071: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1072: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1073: /*
1074: Everyone has to call to draw the matrix since the graphics waits are
1075: synchronized across all processors that share the PetscDraw object
1076: */
1077: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
1078: PetscObjectGetName((PetscObject)mat,&matname);
1079: if (!rank) {
1080: PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);
1081: MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1082: }
1083: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
1084: PetscViewerFlush(viewer);
1085: MatDestroy(&A);
1086: }
1087: return(0);
1088: }
1090: /* Used for both MPIBAIJ and MPISBAIJ matrices */
1091: PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1092: {
1093: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
1094: Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data;
1095: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data;
1096: const PetscInt *garray = aij->garray;
1097: PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1098: PetscInt *rowlens,*colidxs;
1099: PetscScalar *matvals;
1103: PetscViewerSetUp(viewer);
1105: M = mat->rmap->N;
1106: N = mat->cmap->N;
1107: m = mat->rmap->n;
1108: rs = mat->rmap->rstart;
1109: cs = mat->cmap->rstart;
1110: bs = mat->rmap->bs;
1111: nz = bs*bs*(A->nz + B->nz);
1113: /* write matrix header */
1114: header[0] = MAT_FILE_CLASSID;
1115: header[1] = M; header[2] = N; header[3] = nz;
1116: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1117: PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);
1119: /* fill in and store row lengths */
1120: PetscMalloc1(m,&rowlens);
1121: for (cnt=0, i=0; i<A->mbs; i++)
1122: for (j=0; j<bs; j++)
1123: rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1124: PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);
1125: PetscFree(rowlens);
1127: /* fill in and store column indices */
1128: PetscMalloc1(nz,&colidxs);
1129: for (cnt=0, i=0; i<A->mbs; i++) {
1130: for (k=0; k<bs; k++) {
1131: for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1132: if (garray[B->j[jb]] > cs/bs) break;
1133: for (l=0; l<bs; l++)
1134: colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1135: }
1136: for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1137: for (l=0; l<bs; l++)
1138: colidxs[cnt++] = bs*A->j[ja] + l + cs;
1139: for (; jb<B->i[i+1]; jb++)
1140: for (l=0; l<bs; l++)
1141: colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1142: }
1143: }
1144: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1145: PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);
1146: PetscFree(colidxs);
1148: /* fill in and store nonzero values */
1149: PetscMalloc1(nz,&matvals);
1150: for (cnt=0, i=0; i<A->mbs; i++) {
1151: for (k=0; k<bs; k++) {
1152: for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1153: if (garray[B->j[jb]] > cs/bs) break;
1154: for (l=0; l<bs; l++)
1155: matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1156: }
1157: for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1158: for (l=0; l<bs; l++)
1159: matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1160: for (; jb<B->i[i+1]; jb++)
1161: for (l=0; l<bs; l++)
1162: matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1163: }
1164: }
1165: PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);
1166: PetscFree(matvals);
1168: /* write block size option to the viewer's .info file */
1169: MatView_Binary_BlockSizes(mat,viewer);
1170: return(0);
1171: }
1173: PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1174: {
1176: PetscBool iascii,isdraw,issocket,isbinary;
1179: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1180: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1181: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1182: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1183: if (iascii || isdraw || issocket) {
1184: MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1185: } else if (isbinary) {
1186: MatView_MPIBAIJ_Binary(mat,viewer);
1187: }
1188: return(0);
1189: }
1191: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1192: {
1193: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1197: #if defined(PETSC_USE_LOG)
1198: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1199: #endif
1200: MatStashDestroy_Private(&mat->stash);
1201: MatStashDestroy_Private(&mat->bstash);
1202: MatDestroy(&baij->A);
1203: MatDestroy(&baij->B);
1204: #if defined(PETSC_USE_CTABLE)
1205: PetscTableDestroy(&baij->colmap);
1206: #else
1207: PetscFree(baij->colmap);
1208: #endif
1209: PetscFree(baij->garray);
1210: VecDestroy(&baij->lvec);
1211: VecScatterDestroy(&baij->Mvctx);
1212: PetscFree2(baij->rowvalues,baij->rowindices);
1213: PetscFree(baij->barray);
1214: PetscFree2(baij->hd,baij->ht);
1215: PetscFree(baij->rangebs);
1216: PetscFree(mat->data);
1218: PetscObjectChangeTypeName((PetscObject)mat,NULL);
1219: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1220: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1221: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);
1222: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);
1223: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1224: PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);
1225: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);
1226: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);
1227: #if defined(PETSC_HAVE_HYPRE)
1228: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);
1229: #endif
1230: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);
1231: return(0);
1232: }
1234: PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1235: {
1236: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1238: PetscInt nt;
1241: VecGetLocalSize(xx,&nt);
1242: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1243: VecGetLocalSize(yy,&nt);
1244: if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1245: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1246: (*a->A->ops->mult)(a->A,xx,yy);
1247: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1248: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1249: return(0);
1250: }
1252: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1253: {
1254: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1258: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1259: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1260: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1261: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1262: return(0);
1263: }
1265: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1266: {
1267: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1271: /* do nondiagonal part */
1272: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1273: /* do local part */
1274: (*a->A->ops->multtranspose)(a->A,xx,yy);
1275: /* add partial results together */
1276: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1277: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1278: return(0);
1279: }
1281: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1282: {
1283: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1287: /* do nondiagonal part */
1288: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1289: /* do local part */
1290: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1291: /* add partial results together */
1292: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1293: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1294: return(0);
1295: }
1297: /*
1298: This only works correctly for square matrices where the subblock A->A is the
1299: diagonal block
1300: */
1301: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1302: {
1303: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1307: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1308: MatGetDiagonal(a->A,v);
1309: return(0);
1310: }
1312: PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1313: {
1314: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1318: MatScale(a->A,aa);
1319: MatScale(a->B,aa);
1320: return(0);
1321: }
1323: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324: {
1325: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1326: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1328: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1329: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1330: PetscInt *cmap,*idx_p,cstart = mat->cstartbs;
1333: if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1334: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1335: mat->getrowactive = PETSC_TRUE;
1337: if (!mat->rowvalues && (idx || v)) {
1338: /*
1339: allocate enough space to hold information from the longest row.
1340: */
1341: Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1342: PetscInt max = 1,mbs = mat->mbs,tmp;
1343: for (i=0; i<mbs; i++) {
1344: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1345: if (max < tmp) max = tmp;
1346: }
1347: PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);
1348: }
1349: lrow = row - brstart;
1351: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1352: if (!v) {pvA = NULL; pvB = NULL;}
1353: if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1354: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1355: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1356: nztot = nzA + nzB;
1358: cmap = mat->garray;
1359: if (v || idx) {
1360: if (nztot) {
1361: /* Sort by increasing column numbers, assuming A and B already sorted */
1362: PetscInt imark = -1;
1363: if (v) {
1364: *v = v_p = mat->rowvalues;
1365: for (i=0; i<nzB; i++) {
1366: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1367: else break;
1368: }
1369: imark = i;
1370: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1371: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1372: }
1373: if (idx) {
1374: *idx = idx_p = mat->rowindices;
1375: if (imark > -1) {
1376: for (i=0; i<imark; i++) {
1377: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1378: }
1379: } else {
1380: for (i=0; i<nzB; i++) {
1381: if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1382: else break;
1383: }
1384: imark = i;
1385: }
1386: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1387: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1388: }
1389: } else {
1390: if (idx) *idx = NULL;
1391: if (v) *v = NULL;
1392: }
1393: }
1394: *nz = nztot;
1395: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1396: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1397: return(0);
1398: }
1400: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1401: {
1402: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1405: if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1406: baij->getrowactive = PETSC_FALSE;
1407: return(0);
1408: }
1410: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1411: {
1412: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1416: MatZeroEntries(l->A);
1417: MatZeroEntries(l->B);
1418: return(0);
1419: }
1421: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1422: {
1423: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1424: Mat A = a->A,B = a->B;
1426: PetscLogDouble isend[5],irecv[5];
1429: info->block_size = (PetscReal)matin->rmap->bs;
1431: MatGetInfo(A,MAT_LOCAL,info);
1433: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1434: isend[3] = info->memory; isend[4] = info->mallocs;
1436: MatGetInfo(B,MAT_LOCAL,info);
1438: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1439: isend[3] += info->memory; isend[4] += info->mallocs;
1441: if (flag == MAT_LOCAL) {
1442: info->nz_used = isend[0];
1443: info->nz_allocated = isend[1];
1444: info->nz_unneeded = isend[2];
1445: info->memory = isend[3];
1446: info->mallocs = isend[4];
1447: } else if (flag == MAT_GLOBAL_MAX) {
1448: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));
1450: info->nz_used = irecv[0];
1451: info->nz_allocated = irecv[1];
1452: info->nz_unneeded = irecv[2];
1453: info->memory = irecv[3];
1454: info->mallocs = irecv[4];
1455: } else if (flag == MAT_GLOBAL_SUM) {
1456: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));
1458: info->nz_used = irecv[0];
1459: info->nz_allocated = irecv[1];
1460: info->nz_unneeded = irecv[2];
1461: info->memory = irecv[3];
1462: info->mallocs = irecv[4];
1463: } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1464: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1465: info->fill_ratio_needed = 0;
1466: info->factor_mallocs = 0;
1467: return(0);
1468: }
1470: PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1471: {
1472: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1476: switch (op) {
1477: case MAT_NEW_NONZERO_LOCATIONS:
1478: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1479: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1480: case MAT_KEEP_NONZERO_PATTERN:
1481: case MAT_NEW_NONZERO_LOCATION_ERR:
1482: MatCheckPreallocated(A,1);
1483: MatSetOption(a->A,op,flg);
1484: MatSetOption(a->B,op,flg);
1485: break;
1486: case MAT_ROW_ORIENTED:
1487: MatCheckPreallocated(A,1);
1488: a->roworiented = flg;
1490: MatSetOption(a->A,op,flg);
1491: MatSetOption(a->B,op,flg);
1492: break;
1493: case MAT_NEW_DIAGONALS:
1494: case MAT_SORTED_FULL:
1495: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1496: break;
1497: case MAT_IGNORE_OFF_PROC_ENTRIES:
1498: a->donotstash = flg;
1499: break;
1500: case MAT_USE_HASH_TABLE:
1501: a->ht_flag = flg;
1502: a->ht_fact = 1.39;
1503: break;
1504: case MAT_SYMMETRIC:
1505: case MAT_STRUCTURALLY_SYMMETRIC:
1506: case MAT_HERMITIAN:
1507: case MAT_SUBMAT_SINGLEIS:
1508: case MAT_SYMMETRY_ETERNAL:
1509: MatCheckPreallocated(A,1);
1510: MatSetOption(a->A,op,flg);
1511: break;
1512: default:
1513: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1514: }
1515: return(0);
1516: }
1518: PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1519: {
1520: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1521: Mat_SeqBAIJ *Aloc;
1522: Mat B;
1524: PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1525: PetscInt bs=A->rmap->bs,mbs=baij->mbs;
1526: MatScalar *a;
1529: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1530: MatCreate(PetscObjectComm((PetscObject)A),&B);
1531: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1532: MatSetType(B,((PetscObject)A)->type_name);
1533: /* Do not know preallocation information, but must set block size */
1534: MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);
1535: } else {
1536: B = *matout;
1537: }
1539: /* copy over the A part */
1540: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1541: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542: PetscMalloc1(bs,&rvals);
1544: for (i=0; i<mbs; i++) {
1545: rvals[0] = bs*(baij->rstartbs + i);
1546: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1547: for (j=ai[i]; j<ai[i+1]; j++) {
1548: col = (baij->cstartbs+aj[j])*bs;
1549: for (k=0; k<bs; k++) {
1550: MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1552: col++; a += bs;
1553: }
1554: }
1555: }
1556: /* copy over the B part */
1557: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1558: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1559: for (i=0; i<mbs; i++) {
1560: rvals[0] = bs*(baij->rstartbs + i);
1561: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1562: for (j=ai[i]; j<ai[i+1]; j++) {
1563: col = baij->garray[aj[j]]*bs;
1564: for (k=0; k<bs; k++) {
1565: MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1566: col++;
1567: a += bs;
1568: }
1569: }
1570: }
1571: PetscFree(rvals);
1572: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1573: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1575: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1576: else {
1577: MatHeaderMerge(A,&B);
1578: }
1579: return(0);
1580: }
1582: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1583: {
1584: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1585: Mat a = baij->A,b = baij->B;
1587: PetscInt s1,s2,s3;
1590: MatGetLocalSize(mat,&s2,&s3);
1591: if (rr) {
1592: VecGetLocalSize(rr,&s1);
1593: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1594: /* Overlap communication with computation. */
1595: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1596: }
1597: if (ll) {
1598: VecGetLocalSize(ll,&s1);
1599: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1600: (*b->ops->diagonalscale)(b,ll,NULL);
1601: }
1602: /* scale the diagonal block */
1603: (*a->ops->diagonalscale)(a,ll,rr);
1605: if (rr) {
1606: /* Do a scatter end and then right scale the off-diagonal block */
1607: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1608: (*b->ops->diagonalscale)(b,NULL,baij->lvec);
1609: }
1610: return(0);
1611: }
1613: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1614: {
1615: Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1616: PetscInt *lrows;
1617: PetscInt r, len;
1618: PetscBool cong;
1622: /* get locally owned rows */
1623: MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);
1624: /* fix right hand side if needed */
1625: if (x && b) {
1626: const PetscScalar *xx;
1627: PetscScalar *bb;
1629: VecGetArrayRead(x,&xx);
1630: VecGetArray(b,&bb);
1631: for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1632: VecRestoreArrayRead(x,&xx);
1633: VecRestoreArray(b,&bb);
1634: }
1636: /* actually zap the local rows */
1637: /*
1638: Zero the required rows. If the "diagonal block" of the matrix
1639: is square and the user wishes to set the diagonal we use separate
1640: code so that MatSetValues() is not called for each diagonal allocating
1641: new memory, thus calling lots of mallocs and slowing things down.
1643: */
1644: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1645: MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);
1646: MatHasCongruentLayouts(A,&cong);
1647: if ((diag != 0.0) && cong) {
1648: MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);
1649: } else if (diag != 0.0) {
1650: MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);
1651: if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1652: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1653: for (r = 0; r < len; ++r) {
1654: const PetscInt row = lrows[r] + A->rmap->rstart;
1655: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
1656: }
1657: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1658: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1659: } else {
1660: MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);
1661: }
1662: PetscFree(lrows);
1664: /* only change matrix nonzero state if pattern was allowed to be changed */
1665: if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1666: PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1667: MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
1668: }
1669: return(0);
1670: }
1672: PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1673: {
1674: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1675: PetscErrorCode ierr;
1676: PetscMPIInt n = A->rmap->n,p = 0;
1677: PetscInt i,j,k,r,len = 0,row,col,count;
1678: PetscInt *lrows,*owners = A->rmap->range;
1679: PetscSFNode *rrows;
1680: PetscSF sf;
1681: const PetscScalar *xx;
1682: PetscScalar *bb,*mask;
1683: Vec xmask,lmask;
1684: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data;
1685: PetscInt bs = A->rmap->bs, bs2 = baij->bs2;
1686: PetscScalar *aa;
1689: /* Create SF where leaves are input rows and roots are owned rows */
1690: PetscMalloc1(n, &lrows);
1691: for (r = 0; r < n; ++r) lrows[r] = -1;
1692: PetscMalloc1(N, &rrows);
1693: for (r = 0; r < N; ++r) {
1694: const PetscInt idx = rows[r];
1695: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1696: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1697: PetscLayoutFindOwner(A->rmap,idx,&p);
1698: }
1699: rrows[r].rank = p;
1700: rrows[r].index = rows[r] - owners[p];
1701: }
1702: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
1703: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
1704: /* Collect flags for rows to be zeroed */
1705: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1706: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1707: PetscSFDestroy(&sf);
1708: /* Compress and put in row numbers */
1709: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1710: /* zero diagonal part of matrix */
1711: MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
1712: /* handle off diagonal part of matrix */
1713: MatCreateVecs(A,&xmask,NULL);
1714: VecDuplicate(l->lvec,&lmask);
1715: VecGetArray(xmask,&bb);
1716: for (i=0; i<len; i++) bb[lrows[i]] = 1;
1717: VecRestoreArray(xmask,&bb);
1718: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1719: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1720: VecDestroy(&xmask);
1721: if (x) {
1722: VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
1723: VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
1724: VecGetArrayRead(l->lvec,&xx);
1725: VecGetArray(b,&bb);
1726: }
1727: VecGetArray(lmask,&mask);
1728: /* remove zeroed rows of off diagonal matrix */
1729: for (i = 0; i < len; ++i) {
1730: row = lrows[i];
1731: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1732: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1733: for (k = 0; k < count; ++k) {
1734: aa[0] = 0.0;
1735: aa += bs;
1736: }
1737: }
1738: /* loop over all elements of off process part of matrix zeroing removed columns*/
1739: for (i = 0; i < l->B->rmap->N; ++i) {
1740: row = i/bs;
1741: for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1742: for (k = 0; k < bs; ++k) {
1743: col = bs*baij->j[j] + k;
1744: if (PetscAbsScalar(mask[col])) {
1745: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1746: if (x) bb[i] -= aa[0]*xx[col];
1747: aa[0] = 0.0;
1748: }
1749: }
1750: }
1751: }
1752: if (x) {
1753: VecRestoreArray(b,&bb);
1754: VecRestoreArrayRead(l->lvec,&xx);
1755: }
1756: VecRestoreArray(lmask,&mask);
1757: VecDestroy(&lmask);
1758: PetscFree(lrows);
1760: /* only change matrix nonzero state if pattern was allowed to be changed */
1761: if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1762: PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1763: MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
1764: }
1765: return(0);
1766: }
1768: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1769: {
1770: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1774: MatSetUnfactored(a->A);
1775: return(0);
1776: }
1778: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1780: PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag)
1781: {
1782: Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1783: Mat a,b,c,d;
1784: PetscBool flg;
1788: a = matA->A; b = matA->B;
1789: c = matB->A; d = matB->B;
1791: MatEqual(a,c,&flg);
1792: if (flg) {
1793: MatEqual(b,d,&flg);
1794: }
1795: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1796: return(0);
1797: }
1799: PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1800: {
1802: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1803: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
1806: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1807: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1808: MatCopy_Basic(A,B,str);
1809: } else {
1810: MatCopy(a->A,b->A,str);
1811: MatCopy(a->B,b->B,str);
1812: }
1813: PetscObjectStateIncrease((PetscObject)B);
1814: return(0);
1815: }
1817: PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1818: {
1822: MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);
1823: return(0);
1824: }
1826: PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1827: {
1829: PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs;
1830: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
1831: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
1834: MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);
1835: return(0);
1836: }
1838: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1839: {
1841: Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1842: PetscBLASInt bnz,one=1;
1843: Mat_SeqBAIJ *x,*y;
1844: PetscInt bs2 = Y->rmap->bs*Y->rmap->bs;
1847: if (str == SAME_NONZERO_PATTERN) {
1848: PetscScalar alpha = a;
1849: x = (Mat_SeqBAIJ*)xx->A->data;
1850: y = (Mat_SeqBAIJ*)yy->A->data;
1851: PetscBLASIntCast(x->nz*bs2,&bnz);
1852: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1853: x = (Mat_SeqBAIJ*)xx->B->data;
1854: y = (Mat_SeqBAIJ*)yy->B->data;
1855: PetscBLASIntCast(x->nz*bs2,&bnz);
1856: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1857: PetscObjectStateIncrease((PetscObject)Y);
1858: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1859: MatAXPY_Basic(Y,a,X,str);
1860: } else {
1861: Mat B;
1862: PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1863: PetscMalloc1(yy->A->rmap->N,&nnz_d);
1864: PetscMalloc1(yy->B->rmap->N,&nnz_o);
1865: MatCreate(PetscObjectComm((PetscObject)Y),&B);
1866: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
1867: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
1868: MatSetBlockSizesFromMats(B,Y,Y);
1869: MatSetType(B,MATMPIBAIJ);
1870: MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);
1871: MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
1872: MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);
1873: /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1874: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
1875: MatHeaderReplace(Y,&B);
1876: PetscFree(nnz_d);
1877: PetscFree(nnz_o);
1878: }
1879: return(0);
1880: }
1882: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1883: {
1884: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1888: MatRealPart(a->A);
1889: MatRealPart(a->B);
1890: return(0);
1891: }
1893: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1894: {
1895: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1899: MatImaginaryPart(a->A);
1900: MatImaginaryPart(a->B);
1901: return(0);
1902: }
1904: PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1905: {
1907: IS iscol_local;
1908: PetscInt csize;
1911: ISGetLocalSize(iscol,&csize);
1912: if (call == MAT_REUSE_MATRIX) {
1913: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
1914: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1915: } else {
1916: ISAllGather(iscol,&iscol_local);
1917: }
1918: MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
1919: if (call == MAT_INITIAL_MATRIX) {
1920: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
1921: ISDestroy(&iscol_local);
1922: }
1923: return(0);
1924: }
1926: /*
1927: Not great since it makes two copies of the submatrix, first an SeqBAIJ
1928: in local and then by concatenating the local matrices the end result.
1929: Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1930: This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1931: */
1932: PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1933: {
1935: PetscMPIInt rank,size;
1936: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1937: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1938: Mat M,Mreuse;
1939: MatScalar *vwork,*aa;
1940: MPI_Comm comm;
1941: IS isrow_new, iscol_new;
1942: Mat_SeqBAIJ *aij;
1945: PetscObjectGetComm((PetscObject)mat,&comm);
1946: MPI_Comm_rank(comm,&rank);
1947: MPI_Comm_size(comm,&size);
1948: /* The compression and expansion should be avoided. Doesn't point
1949: out errors, might change the indices, hence buggey */
1950: ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);
1951: ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);
1953: if (call == MAT_REUSE_MATRIX) {
1954: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
1955: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1956: MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);
1957: } else {
1958: MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);
1959: }
1960: ISDestroy(&isrow_new);
1961: ISDestroy(&iscol_new);
1962: /*
1963: m - number of local rows
1964: n - number of columns (same on all processors)
1965: rstart - first row in new global matrix generated
1966: */
1967: MatGetBlockSize(mat,&bs);
1968: MatGetSize(Mreuse,&m,&n);
1969: m = m/bs;
1970: n = n/bs;
1972: if (call == MAT_INITIAL_MATRIX) {
1973: aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1974: ii = aij->i;
1975: jj = aij->j;
1977: /*
1978: Determine the number of non-zeros in the diagonal and off-diagonal
1979: portions of the matrix in order to do correct preallocation
1980: */
1982: /* first get start and end of "diagonal" columns */
1983: if (csize == PETSC_DECIDE) {
1984: ISGetSize(isrow,&mglobal);
1985: if (mglobal == n*bs) { /* square matrix */
1986: nlocal = m;
1987: } else {
1988: nlocal = n/size + ((n % size) > rank);
1989: }
1990: } else {
1991: nlocal = csize/bs;
1992: }
1993: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
1994: rstart = rend - nlocal;
1995: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1997: /* next, compute all the lengths */
1998: PetscMalloc2(m+1,&dlens,m+1,&olens);
1999: for (i=0; i<m; i++) {
2000: jend = ii[i+1] - ii[i];
2001: olen = 0;
2002: dlen = 0;
2003: for (j=0; j<jend; j++) {
2004: if (*jj < rstart || *jj >= rend) olen++;
2005: else dlen++;
2006: jj++;
2007: }
2008: olens[i] = olen;
2009: dlens[i] = dlen;
2010: }
2011: MatCreate(comm,&M);
2012: MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);
2013: MatSetType(M,((PetscObject)mat)->type_name);
2014: MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);
2015: MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);
2016: PetscFree2(dlens,olens);
2017: } else {
2018: PetscInt ml,nl;
2020: M = *newmat;
2021: MatGetLocalSize(M,&ml,&nl);
2022: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2023: MatZeroEntries(M);
2024: /*
2025: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2026: rather than the slower MatSetValues().
2027: */
2028: M->was_assembled = PETSC_TRUE;
2029: M->assembled = PETSC_FALSE;
2030: }
2031: MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);
2032: MatGetOwnershipRange(M,&rstart,&rend);
2033: aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2034: ii = aij->i;
2035: jj = aij->j;
2036: aa = aij->a;
2037: for (i=0; i<m; i++) {
2038: row = rstart/bs + i;
2039: nz = ii[i+1] - ii[i];
2040: cwork = jj; jj += nz;
2041: vwork = aa; aa += nz*bs*bs;
2042: MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2043: }
2045: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2046: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2047: *newmat = M;
2049: /* save submatrix used in processor for next request */
2050: if (call == MAT_INITIAL_MATRIX) {
2051: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2052: PetscObjectDereference((PetscObject)Mreuse);
2053: }
2054: return(0);
2055: }
2057: PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2058: {
2059: MPI_Comm comm,pcomm;
2060: PetscInt clocal_size,nrows;
2061: const PetscInt *rows;
2062: PetscMPIInt size;
2063: IS crowp,lcolp;
2067: PetscObjectGetComm((PetscObject)A,&comm);
2068: /* make a collective version of 'rowp' */
2069: PetscObjectGetComm((PetscObject)rowp,&pcomm);
2070: if (pcomm==comm) {
2071: crowp = rowp;
2072: } else {
2073: ISGetSize(rowp,&nrows);
2074: ISGetIndices(rowp,&rows);
2075: ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);
2076: ISRestoreIndices(rowp,&rows);
2077: }
2078: ISSetPermutation(crowp);
2079: /* make a local version of 'colp' */
2080: PetscObjectGetComm((PetscObject)colp,&pcomm);
2081: MPI_Comm_size(pcomm,&size);
2082: if (size==1) {
2083: lcolp = colp;
2084: } else {
2085: ISAllGather(colp,&lcolp);
2086: }
2087: ISSetPermutation(lcolp);
2088: /* now we just get the submatrix */
2089: MatGetLocalSize(A,NULL,&clocal_size);
2090: MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);
2091: /* clean up */
2092: if (pcomm!=comm) {
2093: ISDestroy(&crowp);
2094: }
2095: if (size>1) {
2096: ISDestroy(&lcolp);
2097: }
2098: return(0);
2099: }
2101: PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2102: {
2103: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2104: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
2107: if (nghosts) *nghosts = B->nbs;
2108: if (ghosts) *ghosts = baij->garray;
2109: return(0);
2110: }
2112: PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2113: {
2114: Mat B;
2115: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2116: Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2117: Mat_SeqAIJ *b;
2119: PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL;
2120: PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2121: PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2124: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
2125: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
2127: /* ----------------------------------------------------------------
2128: Tell every processor the number of nonzeros per row
2129: */
2130: PetscMalloc1(A->rmap->N/bs,&lens);
2131: for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2132: lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2133: }
2134: PetscMalloc1(2*size,&recvcounts);
2135: displs = recvcounts + size;
2136: for (i=0; i<size; i++) {
2137: recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2138: displs[i] = A->rmap->range[i]/bs;
2139: }
2140: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2141: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2142: #else
2143: sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2144: MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2145: #endif
2146: /* ---------------------------------------------------------------
2147: Create the sequential matrix of the same type as the local block diagonal
2148: */
2149: MatCreate(PETSC_COMM_SELF,&B);
2150: MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);
2151: MatSetType(B,MATSEQAIJ);
2152: MatSeqAIJSetPreallocation(B,0,lens);
2153: b = (Mat_SeqAIJ*)B->data;
2155: /*--------------------------------------------------------------------
2156: Copy my part of matrix column indices over
2157: */
2158: sendcount = ad->nz + bd->nz;
2159: jsendbuf = b->j + b->i[rstarts[rank]/bs];
2160: a_jsendbuf = ad->j;
2161: b_jsendbuf = bd->j;
2162: n = A->rmap->rend/bs - A->rmap->rstart/bs;
2163: cnt = 0;
2164: for (i=0; i<n; i++) {
2166: /* put in lower diagonal portion */
2167: m = bd->i[i+1] - bd->i[i];
2168: while (m > 0) {
2169: /* is it above diagonal (in bd (compressed) numbering) */
2170: if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2171: jsendbuf[cnt++] = garray[*b_jsendbuf++];
2172: m--;
2173: }
2175: /* put in diagonal portion */
2176: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2177: jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2178: }
2180: /* put in upper diagonal portion */
2181: while (m-- > 0) {
2182: jsendbuf[cnt++] = garray[*b_jsendbuf++];
2183: }
2184: }
2185: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2187: /*--------------------------------------------------------------------
2188: Gather all column indices to all processors
2189: */
2190: for (i=0; i<size; i++) {
2191: recvcounts[i] = 0;
2192: for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2193: recvcounts[i] += lens[j];
2194: }
2195: }
2196: displs[0] = 0;
2197: for (i=1; i<size; i++) {
2198: displs[i] = displs[i-1] + recvcounts[i-1];
2199: }
2200: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2201: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2202: #else
2203: MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2204: #endif
2205: /*--------------------------------------------------------------------
2206: Assemble the matrix into useable form (note numerical values not yet set)
2207: */
2208: /* set the b->ilen (length of each row) values */
2209: PetscArraycpy(b->ilen,lens,A->rmap->N/bs);
2210: /* set the b->i indices */
2211: b->i[0] = 0;
2212: for (i=1; i<=A->rmap->N/bs; i++) {
2213: b->i[i] = b->i[i-1] + lens[i-1];
2214: }
2215: PetscFree(lens);
2216: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2217: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2218: PetscFree(recvcounts);
2220: if (A->symmetric) {
2221: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
2222: } else if (A->hermitian) {
2223: MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
2224: } else if (A->structurally_symmetric) {
2225: MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
2226: }
2227: *newmat = B;
2228: return(0);
2229: }
2231: PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2232: {
2233: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
2235: Vec bb1 = NULL;
2238: if (flag == SOR_APPLY_UPPER) {
2239: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2240: return(0);
2241: }
2243: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2244: VecDuplicate(bb,&bb1);
2245: }
2247: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2248: if (flag & SOR_ZERO_INITIAL_GUESS) {
2249: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2250: its--;
2251: }
2253: while (its--) {
2254: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2255: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2257: /* update rhs: bb1 = bb - B*x */
2258: VecScale(mat->lvec,-1.0);
2259: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2261: /* local sweep */
2262: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
2263: }
2264: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2265: if (flag & SOR_ZERO_INITIAL_GUESS) {
2266: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2267: its--;
2268: }
2269: while (its--) {
2270: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2271: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2273: /* update rhs: bb1 = bb - B*x */
2274: VecScale(mat->lvec,-1.0);
2275: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2277: /* local sweep */
2278: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
2279: }
2280: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2281: if (flag & SOR_ZERO_INITIAL_GUESS) {
2282: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2283: its--;
2284: }
2285: while (its--) {
2286: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2287: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2289: /* update rhs: bb1 = bb - B*x */
2290: VecScale(mat->lvec,-1.0);
2291: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2293: /* local sweep */
2294: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
2295: }
2296: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2298: VecDestroy(&bb1);
2299: return(0);
2300: }
2302: PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2303: {
2305: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data;
2306: PetscInt N,i,*garray = aij->garray;
2307: PetscInt ib,jb,bs = A->rmap->bs;
2308: Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2309: MatScalar *a_val = a_aij->a;
2310: Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2311: MatScalar *b_val = b_aij->a;
2312: PetscReal *work;
2315: MatGetSize(A,NULL,&N);
2316: PetscCalloc1(N,&work);
2317: if (type == NORM_2) {
2318: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2319: for (jb=0; jb<bs; jb++) {
2320: for (ib=0; ib<bs; ib++) {
2321: work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2322: a_val++;
2323: }
2324: }
2325: }
2326: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2327: for (jb=0; jb<bs; jb++) {
2328: for (ib=0; ib<bs; ib++) {
2329: work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2330: b_val++;
2331: }
2332: }
2333: }
2334: } else if (type == NORM_1) {
2335: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2336: for (jb=0; jb<bs; jb++) {
2337: for (ib=0; ib<bs; ib++) {
2338: work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2339: a_val++;
2340: }
2341: }
2342: }
2343: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2344: for (jb=0; jb<bs; jb++) {
2345: for (ib=0; ib<bs; ib++) {
2346: work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2347: b_val++;
2348: }
2349: }
2350: }
2351: } else if (type == NORM_INFINITY) {
2352: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2353: for (jb=0; jb<bs; jb++) {
2354: for (ib=0; ib<bs; ib++) {
2355: int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2356: work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2357: a_val++;
2358: }
2359: }
2360: }
2361: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2362: for (jb=0; jb<bs; jb++) {
2363: for (ib=0; ib<bs; ib++) {
2364: int col = garray[b_aij->j[i]] * bs + jb;
2365: work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2366: b_val++;
2367: }
2368: }
2369: }
2370: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2371: if (type == NORM_INFINITY) {
2372: MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
2373: } else {
2374: MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
2375: }
2376: PetscFree(work);
2377: if (type == NORM_2) {
2378: for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2379: }
2380: return(0);
2381: }
2383: PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2384: {
2385: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data;
2389: MatInvertBlockDiagonal(a->A,values);
2390: A->factorerrortype = a->A->factorerrortype;
2391: A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2392: A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row;
2393: return(0);
2394: }
2396: PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2397: {
2399: Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data;
2400: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data;
2403: if (!Y->preallocated) {
2404: MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);
2405: } else if (!aij->nz) {
2406: PetscInt nonew = aij->nonew;
2407: MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);
2408: aij->nonew = nonew;
2409: }
2410: MatShift_Basic(Y,a);
2411: return(0);
2412: }
2414: PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d)
2415: {
2416: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2420: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2421: MatMissingDiagonal(a->A,missing,d);
2422: if (d) {
2423: PetscInt rstart;
2424: MatGetOwnershipRange(A,&rstart,NULL);
2425: *d += rstart/A->rmap->bs;
2427: }
2428: return(0);
2429: }
2431: PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2432: {
2434: *a = ((Mat_MPIBAIJ*)A->data)->A;
2435: return(0);
2436: }
2438: /* -------------------------------------------------------------------*/
2439: static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2440: MatGetRow_MPIBAIJ,
2441: MatRestoreRow_MPIBAIJ,
2442: MatMult_MPIBAIJ,
2443: /* 4*/ MatMultAdd_MPIBAIJ,
2444: MatMultTranspose_MPIBAIJ,
2445: MatMultTransposeAdd_MPIBAIJ,
2446: NULL,
2447: NULL,
2448: NULL,
2449: /*10*/ NULL,
2450: NULL,
2451: NULL,
2452: MatSOR_MPIBAIJ,
2453: MatTranspose_MPIBAIJ,
2454: /*15*/ MatGetInfo_MPIBAIJ,
2455: MatEqual_MPIBAIJ,
2456: MatGetDiagonal_MPIBAIJ,
2457: MatDiagonalScale_MPIBAIJ,
2458: MatNorm_MPIBAIJ,
2459: /*20*/ MatAssemblyBegin_MPIBAIJ,
2460: MatAssemblyEnd_MPIBAIJ,
2461: MatSetOption_MPIBAIJ,
2462: MatZeroEntries_MPIBAIJ,
2463: /*24*/ MatZeroRows_MPIBAIJ,
2464: NULL,
2465: NULL,
2466: NULL,
2467: NULL,
2468: /*29*/ MatSetUp_MPIBAIJ,
2469: NULL,
2470: NULL,
2471: MatGetDiagonalBlock_MPIBAIJ,
2472: NULL,
2473: /*34*/ MatDuplicate_MPIBAIJ,
2474: NULL,
2475: NULL,
2476: NULL,
2477: NULL,
2478: /*39*/ MatAXPY_MPIBAIJ,
2479: MatCreateSubMatrices_MPIBAIJ,
2480: MatIncreaseOverlap_MPIBAIJ,
2481: MatGetValues_MPIBAIJ,
2482: MatCopy_MPIBAIJ,
2483: /*44*/ NULL,
2484: MatScale_MPIBAIJ,
2485: MatShift_MPIBAIJ,
2486: NULL,
2487: MatZeroRowsColumns_MPIBAIJ,
2488: /*49*/ NULL,
2489: NULL,
2490: NULL,
2491: NULL,
2492: NULL,
2493: /*54*/ MatFDColoringCreate_MPIXAIJ,
2494: NULL,
2495: MatSetUnfactored_MPIBAIJ,
2496: MatPermute_MPIBAIJ,
2497: MatSetValuesBlocked_MPIBAIJ,
2498: /*59*/ MatCreateSubMatrix_MPIBAIJ,
2499: MatDestroy_MPIBAIJ,
2500: MatView_MPIBAIJ,
2501: NULL,
2502: NULL,
2503: /*64*/ NULL,
2504: NULL,
2505: NULL,
2506: NULL,
2507: NULL,
2508: /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2509: NULL,
2510: NULL,
2511: NULL,
2512: NULL,
2513: /*74*/ NULL,
2514: MatFDColoringApply_BAIJ,
2515: NULL,
2516: NULL,
2517: NULL,
2518: /*79*/ NULL,
2519: NULL,
2520: NULL,
2521: NULL,
2522: MatLoad_MPIBAIJ,
2523: /*84*/ NULL,
2524: NULL,
2525: NULL,
2526: NULL,
2527: NULL,
2528: /*89*/ NULL,
2529: NULL,
2530: NULL,
2531: NULL,
2532: NULL,
2533: /*94*/ NULL,
2534: NULL,
2535: NULL,
2536: NULL,
2537: NULL,
2538: /*99*/ NULL,
2539: NULL,
2540: NULL,
2541: NULL,
2542: NULL,
2543: /*104*/NULL,
2544: MatRealPart_MPIBAIJ,
2545: MatImaginaryPart_MPIBAIJ,
2546: NULL,
2547: NULL,
2548: /*109*/NULL,
2549: NULL,
2550: NULL,
2551: NULL,
2552: MatMissingDiagonal_MPIBAIJ,
2553: /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2554: NULL,
2555: MatGetGhosts_MPIBAIJ,
2556: NULL,
2557: NULL,
2558: /*119*/NULL,
2559: NULL,
2560: NULL,
2561: NULL,
2562: MatGetMultiProcBlock_MPIBAIJ,
2563: /*124*/NULL,
2564: MatGetColumnNorms_MPIBAIJ,
2565: MatInvertBlockDiagonal_MPIBAIJ,
2566: NULL,
2567: NULL,
2568: /*129*/ NULL,
2569: NULL,
2570: NULL,
2571: NULL,
2572: NULL,
2573: /*134*/ NULL,
2574: NULL,
2575: NULL,
2576: NULL,
2577: NULL,
2578: /*139*/ MatSetBlockSizes_Default,
2579: NULL,
2580: NULL,
2581: MatFDColoringSetUp_MPIXAIJ,
2582: NULL,
2583: /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2584: };
2587: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2588: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2590: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2591: {
2592: PetscInt m,rstart,cstart,cend;
2593: PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2594: const PetscInt *JJ =NULL;
2595: PetscScalar *values=NULL;
2596: PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2598: PetscBool nooffprocentries;
2601: PetscLayoutSetBlockSize(B->rmap,bs);
2602: PetscLayoutSetBlockSize(B->cmap,bs);
2603: PetscLayoutSetUp(B->rmap);
2604: PetscLayoutSetUp(B->cmap);
2605: PetscLayoutGetBlockSize(B->rmap,&bs);
2606: m = B->rmap->n/bs;
2607: rstart = B->rmap->rstart/bs;
2608: cstart = B->cmap->rstart/bs;
2609: cend = B->cmap->rend/bs;
2611: if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2612: PetscMalloc2(m,&d_nnz,m,&o_nnz);
2613: for (i=0; i<m; i++) {
2614: nz = ii[i+1] - ii[i];
2615: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2616: nz_max = PetscMax(nz_max,nz);
2617: dlen = 0;
2618: olen = 0;
2619: JJ = jj + ii[i];
2620: for (j=0; j<nz; j++) {
2621: if (*JJ < cstart || *JJ >= cend) olen++;
2622: else dlen++;
2623: JJ++;
2624: }
2625: d_nnz[i] = dlen;
2626: o_nnz[i] = olen;
2627: }
2628: MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2629: PetscFree2(d_nnz,o_nnz);
2631: values = (PetscScalar*)V;
2632: if (!values) {
2633: PetscCalloc1(bs*bs*nz_max,&values);
2634: }
2635: for (i=0; i<m; i++) {
2636: PetscInt row = i + rstart;
2637: PetscInt ncols = ii[i+1] - ii[i];
2638: const PetscInt *icols = jj + ii[i];
2639: if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2640: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2641: MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2642: } else { /* block ordering does not match so we can only insert one block at a time. */
2643: PetscInt j;
2644: for (j=0; j<ncols; j++) {
2645: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2646: MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);
2647: }
2648: }
2649: }
2651: if (!V) { PetscFree(values); }
2652: nooffprocentries = B->nooffprocentries;
2653: B->nooffprocentries = PETSC_TRUE;
2654: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2655: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2656: B->nooffprocentries = nooffprocentries;
2658: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2659: return(0);
2660: }
2662: /*@C
2663: MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2665: Collective
2667: Input Parameters:
2668: + B - the matrix
2669: . bs - the block size
2670: . i - the indices into j for the start of each local row (starts with zero)
2671: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2672: - v - optional values in the matrix
2674: Level: advanced
2676: Notes:
2677: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
2678: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2679: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
2680: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2681: block column and the second index is over columns within a block.
2683: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2685: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2686: @*/
2687: PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2688: {
2695: PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2696: return(0);
2697: }
2699: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2700: {
2701: Mat_MPIBAIJ *b;
2703: PetscInt i;
2704: PetscMPIInt size;
2707: MatSetBlockSize(B,PetscAbs(bs));
2708: PetscLayoutSetUp(B->rmap);
2709: PetscLayoutSetUp(B->cmap);
2710: PetscLayoutGetBlockSize(B->rmap,&bs);
2712: if (d_nnz) {
2713: for (i=0; i<B->rmap->n/bs; i++) {
2714: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2715: }
2716: }
2717: if (o_nnz) {
2718: for (i=0; i<B->rmap->n/bs; i++) {
2719: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2720: }
2721: }
2723: b = (Mat_MPIBAIJ*)B->data;
2724: b->bs2 = bs*bs;
2725: b->mbs = B->rmap->n/bs;
2726: b->nbs = B->cmap->n/bs;
2727: b->Mbs = B->rmap->N/bs;
2728: b->Nbs = B->cmap->N/bs;
2730: for (i=0; i<=b->size; i++) {
2731: b->rangebs[i] = B->rmap->range[i]/bs;
2732: }
2733: b->rstartbs = B->rmap->rstart/bs;
2734: b->rendbs = B->rmap->rend/bs;
2735: b->cstartbs = B->cmap->rstart/bs;
2736: b->cendbs = B->cmap->rend/bs;
2738: #if defined(PETSC_USE_CTABLE)
2739: PetscTableDestroy(&b->colmap);
2740: #else
2741: PetscFree(b->colmap);
2742: #endif
2743: PetscFree(b->garray);
2744: VecDestroy(&b->lvec);
2745: VecScatterDestroy(&b->Mvctx);
2747: /* Because the B will have been resized we simply destroy it and create a new one each time */
2748: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
2749: MatDestroy(&b->B);
2750: MatCreate(PETSC_COMM_SELF,&b->B);
2751: MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);
2752: MatSetType(b->B,MATSEQBAIJ);
2753: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
2755: if (!B->preallocated) {
2756: MatCreate(PETSC_COMM_SELF,&b->A);
2757: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2758: MatSetType(b->A,MATSEQBAIJ);
2759: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
2760: MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);
2761: }
2763: MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2764: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2765: B->preallocated = PETSC_TRUE;
2766: B->was_assembled = PETSC_FALSE;
2767: B->assembled = PETSC_FALSE;
2768: return(0);
2769: }
2771: extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2772: extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2774: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2775: {
2776: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
2778: Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2779: PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2780: const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2783: PetscMalloc1(M+1,&ii);
2784: ii[0] = 0;
2785: for (i=0; i<M; i++) {
2786: if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2787: if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2788: ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2789: /* remove one from count of matrix has diagonal */
2790: for (j=id[i]; j<id[i+1]; j++) {
2791: if (jd[j] == i) {ii[i+1]--;break;}
2792: }
2793: }
2794: PetscMalloc1(ii[M],&jj);
2795: cnt = 0;
2796: for (i=0; i<M; i++) {
2797: for (j=io[i]; j<io[i+1]; j++) {
2798: if (garray[jo[j]] > rstart) break;
2799: jj[cnt++] = garray[jo[j]];
2800: }
2801: for (k=id[i]; k<id[i+1]; k++) {
2802: if (jd[k] != i) {
2803: jj[cnt++] = rstart + jd[k];
2804: }
2805: }
2806: for (; j<io[i+1]; j++) {
2807: jj[cnt++] = garray[jo[j]];
2808: }
2809: }
2810: MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);
2811: return(0);
2812: }
2814: #include <../src/mat/impls/aij/mpi/mpiaij.h>
2816: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2818: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2819: {
2821: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2822: Mat B;
2823: Mat_MPIAIJ *b;
2826: if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2828: if (reuse == MAT_REUSE_MATRIX) {
2829: B = *newmat;
2830: } else {
2831: MatCreate(PetscObjectComm((PetscObject)A),&B);
2832: MatSetType(B,MATMPIAIJ);
2833: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
2834: MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
2835: MatSeqAIJSetPreallocation(B,0,NULL);
2836: MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);
2837: }
2838: b = (Mat_MPIAIJ*) B->data;
2840: if (reuse == MAT_REUSE_MATRIX) {
2841: MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);
2842: MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);
2843: } else {
2844: MatDestroy(&b->A);
2845: MatDestroy(&b->B);
2846: MatDisAssemble_MPIBAIJ(A);
2847: MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);
2848: MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);
2849: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2850: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2851: }
2852: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2853: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2855: if (reuse == MAT_INPLACE_MATRIX) {
2856: MatHeaderReplace(A,&B);
2857: } else {
2858: *newmat = B;
2859: }
2860: return(0);
2861: }
2863: /*MC
2864: MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2866: Options Database Keys:
2867: + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2868: . -mat_block_size <bs> - set the blocksize used to store the matrix
2869: - -mat_use_hash_table <fact>
2871: Level: beginner
2873: Notes:
2874: MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2875: space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2877: .seealso: MatCreateBAIJ
2878: M*/
2880: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2882: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2883: {
2884: Mat_MPIBAIJ *b;
2886: PetscBool flg = PETSC_FALSE;
2889: PetscNewLog(B,&b);
2890: B->data = (void*)b;
2892: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2893: B->assembled = PETSC_FALSE;
2895: B->insertmode = NOT_SET_VALUES;
2896: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
2897: MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);
2899: /* build local table of row and column ownerships */
2900: PetscMalloc1(b->size+1,&b->rangebs);
2902: /* build cache for off array entries formed */
2903: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
2905: b->donotstash = PETSC_FALSE;
2906: b->colmap = NULL;
2907: b->garray = NULL;
2908: b->roworiented = PETSC_TRUE;
2910: /* stuff used in block assembly */
2911: b->barray = NULL;
2913: /* stuff used for matrix vector multiply */
2914: b->lvec = NULL;
2915: b->Mvctx = NULL;
2917: /* stuff for MatGetRow() */
2918: b->rowindices = NULL;
2919: b->rowvalues = NULL;
2920: b->getrowactive = PETSC_FALSE;
2922: /* hash table stuff */
2923: b->ht = NULL;
2924: b->hd = NULL;
2925: b->ht_size = 0;
2926: b->ht_flag = PETSC_FALSE;
2927: b->ht_fact = 0;
2928: b->ht_total_ct = 0;
2929: b->ht_insert_ct = 0;
2931: /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2932: b->ijonly = PETSC_FALSE;
2935: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);
2936: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);
2937: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);
2938: #if defined(PETSC_HAVE_HYPRE)
2939: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);
2940: #endif
2941: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);
2942: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);
2943: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);
2944: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
2945: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);
2946: PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);
2947: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);
2948: PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);
2950: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2951: PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);
2952: if (flg) {
2953: PetscReal fact = 1.39;
2954: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
2955: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);
2956: if (fact <= 1.0) fact = 1.39;
2957: MatMPIBAIJSetHashTableFactor(B,fact);
2958: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
2959: }
2960: PetscOptionsEnd();
2961: return(0);
2962: }
2964: /*MC
2965: MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2967: This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2968: and MATMPIBAIJ otherwise.
2970: Options Database Keys:
2971: . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2973: Level: beginner
2975: .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2976: M*/
2978: /*@C
2979: MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2980: (block compressed row). For good matrix assembly performance
2981: the user should preallocate the matrix storage by setting the parameters
2982: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2983: performance can be increased by more than a factor of 50.
2985: Collective on Mat
2987: Input Parameters:
2988: + B - the matrix
2989: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2990: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2991: . d_nz - number of block nonzeros per block row in diagonal portion of local
2992: submatrix (same for all local rows)
2993: . d_nnz - array containing the number of block nonzeros in the various block rows
2994: of the in diagonal portion of the local (possibly different for each block
2995: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and
2996: set it even if it is zero.
2997: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2998: submatrix (same for all local rows).
2999: - o_nnz - array containing the number of nonzeros in the various block rows of the
3000: off-diagonal portion of the local submatrix (possibly different for
3001: each block row) or NULL.
3003: If the *_nnz parameter is given then the *_nz parameter is ignored
3005: Options Database Keys:
3006: + -mat_block_size - size of the blocks to use
3007: - -mat_use_hash_table <fact>
3009: Notes:
3010: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3011: than it must be used on all processors that share the object for that argument.
3013: Storage Information:
3014: For a square global matrix we define each processor's diagonal portion
3015: to be its local rows and the corresponding columns (a square submatrix);
3016: each processor's off-diagonal portion encompasses the remainder of the
3017: local matrix (a rectangular submatrix).
3019: The user can specify preallocated storage for the diagonal part of
3020: the local submatrix with either d_nz or d_nnz (not both). Set
3021: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3022: memory allocation. Likewise, specify preallocated storage for the
3023: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3025: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3026: the figure below we depict these three local rows and all columns (0-11).
3028: .vb
3029: 0 1 2 3 4 5 6 7 8 9 10 11
3030: --------------------------
3031: row 3 |o o o d d d o o o o o o
3032: row 4 |o o o d d d o o o o o o
3033: row 5 |o o o d d d o o o o o o
3034: --------------------------
3035: .ve
3037: Thus, any entries in the d locations are stored in the d (diagonal)
3038: submatrix, and any entries in the o locations are stored in the
3039: o (off-diagonal) submatrix. Note that the d and the o submatrices are
3040: stored simply in the MATSEQBAIJ format for compressed row storage.
3042: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3043: and o_nz should indicate the number of block nonzeros per row in the o matrix.
3044: In general, for PDE problems in which most nonzeros are near the diagonal,
3045: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3046: or you will get TERRIBLE performance; see the users' manual chapter on
3047: matrices.
3049: You can call MatGetInfo() to get information on how effective the preallocation was;
3050: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3051: You can also run with the option -info and look for messages with the string
3052: malloc in them to see if additional memory allocation was needed.
3054: Level: intermediate
3056: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3057: @*/
3058: PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3059: {
3066: PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
3067: return(0);
3068: }
3070: /*@C
3071: MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3072: (block compressed row). For good matrix assembly performance
3073: the user should preallocate the matrix storage by setting the parameters
3074: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3075: performance can be increased by more than a factor of 50.
3077: Collective
3079: Input Parameters:
3080: + comm - MPI communicator
3081: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3082: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3083: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3084: This value should be the same as the local size used in creating the
3085: y vector for the matrix-vector product y = Ax.
3086: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3087: This value should be the same as the local size used in creating the
3088: x vector for the matrix-vector product y = Ax.
3089: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3090: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3091: . d_nz - number of nonzero blocks per block row in diagonal portion of local
3092: submatrix (same for all local rows)
3093: . d_nnz - array containing the number of nonzero blocks in the various block rows
3094: of the in diagonal portion of the local (possibly different for each block
3095: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
3096: and set it even if it is zero.
3097: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
3098: submatrix (same for all local rows).
3099: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3100: off-diagonal portion of the local submatrix (possibly different for
3101: each block row) or NULL.
3103: Output Parameter:
3104: . A - the matrix
3106: Options Database Keys:
3107: + -mat_block_size - size of the blocks to use
3108: - -mat_use_hash_table <fact>
3110: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3111: MatXXXXSetPreallocation() paradigm instead of this routine directly.
3112: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3114: Notes:
3115: If the *_nnz parameter is given then the *_nz parameter is ignored
3117: A nonzero block is any block that as 1 or more nonzeros in it
3119: The user MUST specify either the local or global matrix dimensions
3120: (possibly both).
3122: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3123: than it must be used on all processors that share the object for that argument.
3125: Storage Information:
3126: For a square global matrix we define each processor's diagonal portion
3127: to be its local rows and the corresponding columns (a square submatrix);
3128: each processor's off-diagonal portion encompasses the remainder of the
3129: local matrix (a rectangular submatrix).
3131: The user can specify preallocated storage for the diagonal part of
3132: the local submatrix with either d_nz or d_nnz (not both). Set
3133: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3134: memory allocation. Likewise, specify preallocated storage for the
3135: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3137: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3138: the figure below we depict these three local rows and all columns (0-11).
3140: .vb
3141: 0 1 2 3 4 5 6 7 8 9 10 11
3142: --------------------------
3143: row 3 |o o o d d d o o o o o o
3144: row 4 |o o o d d d o o o o o o
3145: row 5 |o o o d d d o o o o o o
3146: --------------------------
3147: .ve
3149: Thus, any entries in the d locations are stored in the d (diagonal)
3150: submatrix, and any entries in the o locations are stored in the
3151: o (off-diagonal) submatrix. Note that the d and the o submatrices are
3152: stored simply in the MATSEQBAIJ format for compressed row storage.
3154: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3155: and o_nz should indicate the number of block nonzeros per row in the o matrix.
3156: In general, for PDE problems in which most nonzeros are near the diagonal,
3157: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3158: or you will get TERRIBLE performance; see the users' manual chapter on
3159: matrices.
3161: Level: intermediate
3163: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3164: @*/
3165: PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3166: {
3168: PetscMPIInt size;
3171: MatCreate(comm,A);
3172: MatSetSizes(*A,m,n,M,N);
3173: MPI_Comm_size(comm,&size);
3174: if (size > 1) {
3175: MatSetType(*A,MATMPIBAIJ);
3176: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
3177: } else {
3178: MatSetType(*A,MATSEQBAIJ);
3179: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
3180: }
3181: return(0);
3182: }
3184: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3185: {
3186: Mat mat;
3187: Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3189: PetscInt len=0;
3192: *newmat = NULL;
3193: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
3194: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3195: MatSetType(mat,((PetscObject)matin)->type_name);
3197: mat->factortype = matin->factortype;
3198: mat->preallocated = PETSC_TRUE;
3199: mat->assembled = PETSC_TRUE;
3200: mat->insertmode = NOT_SET_VALUES;
3202: a = (Mat_MPIBAIJ*)mat->data;
3203: mat->rmap->bs = matin->rmap->bs;
3204: a->bs2 = oldmat->bs2;
3205: a->mbs = oldmat->mbs;
3206: a->nbs = oldmat->nbs;
3207: a->Mbs = oldmat->Mbs;
3208: a->Nbs = oldmat->Nbs;
3210: PetscLayoutReference(matin->rmap,&mat->rmap);
3211: PetscLayoutReference(matin->cmap,&mat->cmap);
3213: a->size = oldmat->size;
3214: a->rank = oldmat->rank;
3215: a->donotstash = oldmat->donotstash;
3216: a->roworiented = oldmat->roworiented;
3217: a->rowindices = NULL;
3218: a->rowvalues = NULL;
3219: a->getrowactive = PETSC_FALSE;
3220: a->barray = NULL;
3221: a->rstartbs = oldmat->rstartbs;
3222: a->rendbs = oldmat->rendbs;
3223: a->cstartbs = oldmat->cstartbs;
3224: a->cendbs = oldmat->cendbs;
3226: /* hash table stuff */
3227: a->ht = NULL;
3228: a->hd = NULL;
3229: a->ht_size = 0;
3230: a->ht_flag = oldmat->ht_flag;
3231: a->ht_fact = oldmat->ht_fact;
3232: a->ht_total_ct = 0;
3233: a->ht_insert_ct = 0;
3235: PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);
3236: if (oldmat->colmap) {
3237: #if defined(PETSC_USE_CTABLE)
3238: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3239: #else
3240: PetscMalloc1(a->Nbs,&a->colmap);
3241: PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));
3242: PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);
3243: #endif
3244: } else a->colmap = NULL;
3246: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3247: PetscMalloc1(len,&a->garray);
3248: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
3249: PetscArraycpy(a->garray,oldmat->garray,len);
3250: } else a->garray = NULL;
3252: MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);
3253: VecDuplicate(oldmat->lvec,&a->lvec);
3254: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
3255: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3256: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
3258: MatDuplicate(oldmat->A,cpvalues,&a->A);
3259: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
3260: MatDuplicate(oldmat->B,cpvalues,&a->B);
3261: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
3262: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3263: *newmat = mat;
3264: return(0);
3265: }
3267: /* Used for both MPIBAIJ and MPISBAIJ matrices */
3268: PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3269: {
3270: PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3271: PetscInt *rowidxs,*colidxs,rs,cs,ce;
3272: PetscScalar *matvals;
3276: PetscViewerSetUp(viewer);
3278: /* read in matrix header */
3279: PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);
3280: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3281: M = header[1]; N = header[2]; nz = header[3];
3282: if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3283: if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3284: if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3286: /* set block sizes from the viewer's .info file */
3287: MatLoad_Binary_BlockSizes(mat,viewer);
3288: /* set local sizes if not set already */
3289: if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3290: if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3291: /* set global sizes if not set already */
3292: if (mat->rmap->N < 0) mat->rmap->N = M;
3293: if (mat->cmap->N < 0) mat->cmap->N = N;
3294: PetscLayoutSetUp(mat->rmap);
3295: PetscLayoutSetUp(mat->cmap);
3297: /* check if the matrix sizes are correct */
3298: MatGetSize(mat,&rows,&cols);
3299: if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3300: MatGetBlockSize(mat,&bs);
3301: MatGetLocalSize(mat,&m,&n);
3302: PetscLayoutGetRange(mat->rmap,&rs,NULL);
3303: PetscLayoutGetRange(mat->cmap,&cs,&ce);
3304: mbs = m/bs; nbs = n/bs;
3306: /* read in row lengths and build row indices */
3307: PetscMalloc1(m+1,&rowidxs);
3308: PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);
3309: rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3310: MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));
3311: if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3313: /* read in column indices and matrix values */
3314: PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);
3315: PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);
3316: PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);
3318: { /* preallocate matrix storage */
3319: PetscBT bt; /* helper bit set to count diagonal nonzeros */
3320: PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3321: PetscBool sbaij,done;
3322: PetscInt *d_nnz,*o_nnz;
3324: PetscBTCreate(nbs,&bt);
3325: PetscHSetICreate(&ht);
3326: PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);
3327: PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);
3328: for (i=0; i<mbs; i++) {
3329: PetscBTMemzero(nbs,bt);
3330: PetscHSetIClear(ht);
3331: for (k=0; k<bs; k++) {
3332: PetscInt row = bs*i + k;
3333: for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3334: PetscInt col = colidxs[j];
3335: if (!sbaij || col >= row) {
3336: if (col >= cs && col < ce) {
3337: if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3338: } else {
3339: PetscHSetIQueryAdd(ht,col/bs,&done);
3340: if (done) o_nnz[i]++;
3341: }
3342: }
3343: }
3344: }
3345: }
3346: PetscBTDestroy(&bt);
3347: PetscHSetIDestroy(&ht);
3348: MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);
3349: MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);
3350: PetscFree2(d_nnz,o_nnz);
3351: }
3353: /* store matrix values */
3354: for (i=0; i<m; i++) {
3355: PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3356: (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);
3357: }
3359: PetscFree(rowidxs);
3360: PetscFree2(colidxs,matvals);
3361: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
3362: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
3363: return(0);
3364: }
3366: PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3367: {
3369: PetscBool isbinary;
3372: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3373: if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3374: MatLoad_MPIBAIJ_Binary(mat,viewer);
3375: return(0);
3376: }
3378: /*@
3379: MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3381: Input Parameters:
3382: + mat - the matrix
3383: - fact - factor
3385: Not Collective, each process can use a different factor
3387: Level: advanced
3389: Notes:
3390: This can also be set by the command line option: -mat_use_hash_table <fact>
3392: .seealso: MatSetOption()
3393: @*/
3394: PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3395: {
3399: PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));
3400: return(0);
3401: }
3403: PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3404: {
3405: Mat_MPIBAIJ *baij;
3408: baij = (Mat_MPIBAIJ*)mat->data;
3409: baij->ht_fact = fact;
3410: return(0);
3411: }
3413: PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3414: {
3415: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3416: PetscBool flg;
3420: PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);
3421: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3422: if (Ad) *Ad = a->A;
3423: if (Ao) *Ao = a->B;
3424: if (colmap) *colmap = a->garray;
3425: return(0);
3426: }
3428: /*
3429: Special version for direct calls from Fortran (to eliminate two function call overheads
3430: */
3431: #if defined(PETSC_HAVE_FORTRAN_CAPS)
3432: #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3433: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3434: #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3435: #endif
3437: /*@C
3438: MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3440: Collective on Mat
3442: Input Parameters:
3443: + mat - the matrix
3444: . min - number of input rows
3445: . im - input rows
3446: . nin - number of input columns
3447: . in - input columns
3448: . v - numerical values input
3449: - addvin - INSERT_VALUES or ADD_VALUES
3451: Notes:
3452: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3454: Level: advanced
3456: .seealso: MatSetValuesBlocked()
3457: @*/
3458: PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3459: {
3460: /* convert input arguments to C version */
3461: Mat mat = *matin;
3462: PetscInt m = *min, n = *nin;
3463: InsertMode addv = *addvin;
3465: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
3466: const MatScalar *value;
3467: MatScalar *barray = baij->barray;
3468: PetscBool roworiented = baij->roworiented;
3469: PetscErrorCode ierr;
3470: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
3471: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3472: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3475: /* tasks normally handled by MatSetValuesBlocked() */
3476: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3477: else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3478: if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3479: if (mat->assembled) {
3480: mat->was_assembled = PETSC_TRUE;
3481: mat->assembled = PETSC_FALSE;
3482: }
3483: PetscLogEventBegin(MAT_SetValues,mat,0,0,0);
3486: if (!barray) {
3487: PetscMalloc1(bs2,&barray);
3488: baij->barray = barray;
3489: }
3491: if (roworiented) stepval = (n-1)*bs;
3492: else stepval = (m-1)*bs;
3494: for (i=0; i<m; i++) {
3495: if (im[i] < 0) continue;
3496: if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3497: if (im[i] >= rstart && im[i] < rend) {
3498: row = im[i] - rstart;
3499: for (j=0; j<n; j++) {
3500: /* If NumCol = 1 then a copy is not required */
3501: if ((roworiented) && (n == 1)) {
3502: barray = (MatScalar*)v + i*bs2;
3503: } else if ((!roworiented) && (m == 1)) {
3504: barray = (MatScalar*)v + j*bs2;
3505: } else { /* Here a copy is required */
3506: if (roworiented) {
3507: value = v + i*(stepval+bs)*bs + j*bs;
3508: } else {
3509: value = v + j*(stepval+bs)*bs + i*bs;
3510: }
3511: for (ii=0; ii<bs; ii++,value+=stepval) {
3512: for (jj=0; jj<bs; jj++) {
3513: *barray++ = *value++;
3514: }
3515: }
3516: barray -=bs2;
3517: }
3519: if (in[j] >= cstart && in[j] < cend) {
3520: col = in[j] - cstart;
3521: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);
3522: } else if (in[j] < 0) continue;
3523: else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3524: else {
3525: if (mat->was_assembled) {
3526: if (!baij->colmap) {
3527: MatCreateColmap_MPIBAIJ_Private(mat);
3528: }
3530: #if defined(PETSC_USE_DEBUG)
3531: #if defined(PETSC_USE_CTABLE)
3532: { PetscInt data;
3533: PetscTableFind(baij->colmap,in[j]+1,&data);
3534: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3535: }
3536: #else
3537: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3538: #endif
3539: #endif
3540: #if defined(PETSC_USE_CTABLE)
3541: PetscTableFind(baij->colmap,in[j]+1,&col);
3542: col = (col - 1)/bs;
3543: #else
3544: col = (baij->colmap[in[j]] - 1)/bs;
3545: #endif
3546: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3547: MatDisAssemble_MPIBAIJ(mat);
3548: col = in[j];
3549: }
3550: } else col = in[j];
3551: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);
3552: }
3553: }
3554: } else {
3555: if (!baij->donotstash) {
3556: if (roworiented) {
3557: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3558: } else {
3559: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3560: }
3561: }
3562: }
3563: }
3565: /* task normally handled by MatSetValuesBlocked() */
3566: PetscLogEventEnd(MAT_SetValues,mat,0,0,0);
3567: return(0);
3568: }
3570: /*@
3571: MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3572: CSR format the local rows.
3574: Collective
3576: Input Parameters:
3577: + comm - MPI communicator
3578: . bs - the block size, only a block size of 1 is supported
3579: . m - number of local rows (Cannot be PETSC_DECIDE)
3580: . n - This value should be the same as the local size used in creating the
3581: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3582: calculated if N is given) For square matrices n is almost always m.
3583: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3584: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3585: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3586: . j - column indices
3587: - a - matrix values
3589: Output Parameter:
3590: . mat - the matrix
3592: Level: intermediate
3594: Notes:
3595: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3596: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3597: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3599: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3600: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3601: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3602: with column-major ordering within blocks.
3604: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3606: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3607: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3608: @*/
3609: PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3610: {
3614: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3615: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3616: MatCreate(comm,mat);
3617: MatSetSizes(*mat,m,n,M,N);
3618: MatSetType(*mat,MATMPIBAIJ);
3619: MatSetBlockSize(*mat,bs);
3620: MatSetUp(*mat);
3621: MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3622: MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);
3623: MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3624: return(0);
3625: }
3627: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3628: {
3630: PetscInt m,N,i,rstart,nnz,Ii,bs,cbs;
3631: PetscInt *indx;
3632: PetscScalar *values;
3635: MatGetSize(inmat,&m,&N);
3636: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3637: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data;
3638: PetscInt *dnz,*onz,mbs,Nbs,nbs;
3639: PetscInt *bindx,rmax=a->rmax,j;
3640: PetscMPIInt rank,size;
3642: MatGetBlockSizes(inmat,&bs,&cbs);
3643: mbs = m/bs; Nbs = N/cbs;
3644: if (n == PETSC_DECIDE) {
3645: PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3646: }
3647: nbs = n/cbs;
3649: PetscMalloc1(rmax,&bindx);
3650: MatPreallocateInitialize(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
3652: MPI_Comm_rank(comm,&rank);
3653: MPI_Comm_rank(comm,&size);
3654: if (rank == size-1) {
3655: /* Check sum(nbs) = Nbs */
3656: if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3657: }
3659: rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3660: for (i=0; i<mbs; i++) {
3661: MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL); /* non-blocked nnz and indx */
3662: nnz = nnz/bs;
3663: for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3664: MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);
3665: MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);
3666: }
3667: PetscFree(bindx);
3669: MatCreate(comm,outmat);
3670: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3671: MatSetBlockSizes(*outmat,bs,cbs);
3672: MatSetType(*outmat,MATBAIJ);
3673: MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);
3674: MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);
3675: MatPreallocateFinalize(dnz,onz);
3676: }
3678: /* numeric phase */
3679: MatGetBlockSizes(inmat,&bs,&cbs);
3680: MatGetOwnershipRange(*outmat,&rstart,NULL);
3682: for (i=0; i<m; i++) {
3683: MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);
3684: Ii = i + rstart;
3685: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
3686: MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);
3687: }
3688: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
3689: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
3690: return(0);
3691: }