Actual source code: mpibaij.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/baij/mpi/mpibaij.h
5: EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6: EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7: EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8: EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9: EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10: EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12: EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13: EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14: EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16: /* UGLY, ugly, ugly
17: When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18: not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19: inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20: converts the entries into single precision and then calls ..._MatScalar() to put them
21: into the single precision data structures.
22: */
23: #if defined(PETSC_USE_MAT_SINGLE)
24: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25: EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26: EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27: EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28: EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29: #else
30: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
31: #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ
32: #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ
33: #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT
34: #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT
35: #endif
39: PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
40: {
41: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
43: PetscInt i,*idxb = 0;
44: PetscScalar *va,*vb;
45: Vec vtmp;
48:
49: MatGetRowMaxAbs(a->A,v,idx);
50: VecGetArray(v,&va);
51: if (idx) {
52: for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
53: }
55: VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);
56: if (idx) {PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);}
57: MatGetRowMaxAbs(a->B,vtmp,idxb);
58: VecGetArray(vtmp,&vb);
60: for (i=0; i<A->rmap.n; i++){
61: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
62: }
64: VecRestoreArray(v,&va);
65: VecRestoreArray(vtmp,&vb);
66: if (idxb) {PetscFree(idxb);}
67: VecDestroy(vtmp);
68: return(0);
69: }
74: PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
75: {
76: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
80: MatStoreValues(aij->A);
81: MatStoreValues(aij->B);
82: return(0);
83: }
89: PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
90: {
91: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
95: MatRetrieveValues(aij->A);
96: MatRetrieveValues(aij->B);
97: return(0);
98: }
101: /*
102: Local utility routine that creates a mapping from the global column
103: number to the local number in the off-diagonal part of the local
104: storage of the matrix. This is done in a non scable way since the
105: length of colmap equals the global matrix length.
106: */
109: PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
110: {
111: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
112: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
114: PetscInt nbs = B->nbs,i,bs=mat->rmap.bs;
117: #if defined (PETSC_USE_CTABLE)
118: PetscTableCreate(baij->nbs,&baij->colmap);
119: for (i=0; i<nbs; i++){
120: PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);
121: }
122: #else
123: PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);
124: PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
125: PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));
126: for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
127: #endif
128: return(0);
129: }
131: #define CHUNKSIZE 10
133: #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
134: { \
135: \
136: brow = row/bs; \
137: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
138: rmax = aimax[brow]; nrow = ailen[brow]; \
139: bcol = col/bs; \
140: ridx = row % bs; cidx = col % bs; \
141: low = 0; high = nrow; \
142: while (high-low > 3) { \
143: t = (low+high)/2; \
144: if (rp[t] > bcol) high = t; \
145: else low = t; \
146: } \
147: for (_i=low; _i<high; _i++) { \
148: if (rp[_i] > bcol) break; \
149: if (rp[_i] == bcol) { \
150: bap = ap + bs2*_i + bs*cidx + ridx; \
151: if (addv == ADD_VALUES) *bap += value; \
152: else *bap = value; \
153: goto a_noinsert; \
154: } \
155: } \
156: if (a->nonew == 1) goto a_noinsert; \
157: if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
158: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
159: N = nrow++ - 1; \
160: /* shift up all the later entries in this row */ \
161: for (ii=N; ii>=_i; ii--) { \
162: rp[ii+1] = rp[ii]; \
163: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
164: } \
165: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); } \
166: rp[_i] = bcol; \
167: ap[bs2*_i + bs*cidx + ridx] = value; \
168: a_noinsert:; \
169: ailen[brow] = nrow; \
170: }
172: #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
173: { \
174: brow = row/bs; \
175: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176: rmax = bimax[brow]; nrow = bilen[brow]; \
177: bcol = col/bs; \
178: ridx = row % bs; cidx = col % bs; \
179: low = 0; high = nrow; \
180: while (high-low > 3) { \
181: t = (low+high)/2; \
182: if (rp[t] > bcol) high = t; \
183: else low = t; \
184: } \
185: for (_i=low; _i<high; _i++) { \
186: if (rp[_i] > bcol) break; \
187: if (rp[_i] == bcol) { \
188: bap = ap + bs2*_i + bs*cidx + ridx; \
189: if (addv == ADD_VALUES) *bap += value; \
190: else *bap = value; \
191: goto b_noinsert; \
192: } \
193: } \
194: if (b->nonew == 1) goto b_noinsert; \
195: if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
196: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197: CHKMEMQ;\
198: N = nrow++ - 1; \
199: /* shift up all the later entries in this row */ \
200: for (ii=N; ii>=_i; ii--) { \
201: rp[ii+1] = rp[ii]; \
202: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
203: } \
204: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));} \
205: rp[_i] = bcol; \
206: ap[bs2*_i + bs*cidx + ridx] = value; \
207: b_noinsert:; \
208: bilen[brow] = nrow; \
209: }
211: #if defined(PETSC_USE_MAT_SINGLE)
214: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
215: {
216: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
218: PetscInt i,N = m*n;
219: MatScalar *vsingle;
222: if (N > b->setvalueslen) {
223: PetscFree(b->setvaluescopy);
224: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
225: b->setvalueslen = N;
226: }
227: vsingle = b->setvaluescopy;
229: for (i=0; i<N; i++) {
230: vsingle[i] = v[i];
231: }
232: MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
233: return(0);
234: }
238: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
239: {
240: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
242: PetscInt i,N = m*n*b->bs2;
243: MatScalar *vsingle;
246: if (N > b->setvalueslen) {
247: PetscFree(b->setvaluescopy);
248: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
249: b->setvalueslen = N;
250: }
251: vsingle = b->setvaluescopy;
252: for (i=0; i<N; i++) {
253: vsingle[i] = v[i];
254: }
255: MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
256: return(0);
257: }
261: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
262: {
263: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
265: PetscInt i,N = m*n;
266: MatScalar *vsingle;
269: if (N > b->setvalueslen) {
270: PetscFree(b->setvaluescopy);
271: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
272: b->setvalueslen = N;
273: }
274: vsingle = b->setvaluescopy;
275: for (i=0; i<N; i++) {
276: vsingle[i] = v[i];
277: }
278: MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);
279: return(0);
280: }
284: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
285: {
286: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288: PetscInt i,N = m*n*b->bs2;
289: MatScalar *vsingle;
292: if (N > b->setvalueslen) {
293: PetscFree(b->setvaluescopy);
294: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
295: b->setvalueslen = N;
296: }
297: vsingle = b->setvaluescopy;
298: for (i=0; i<N; i++) {
299: vsingle[i] = v[i];
300: }
301: MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);
302: return(0);
303: }
304: #endif
308: PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
309: {
310: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
311: MatScalar value;
312: PetscTruth roworiented = baij->roworiented;
314: PetscInt i,j,row,col;
315: PetscInt rstart_orig=mat->rmap.rstart;
316: PetscInt rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
317: PetscInt cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
319: /* Some Variables required in the macro */
320: Mat A = baij->A;
321: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
322: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323: MatScalar *aa=a->a;
325: Mat B = baij->B;
326: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
327: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328: MatScalar *ba=b->a;
330: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
331: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
332: MatScalar *ap,*bap;
335: for (i=0; i<m; i++) {
336: if (im[i] < 0) continue;
337: #if defined(PETSC_USE_DEBUG)
338: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
339: #endif
340: if (im[i] >= rstart_orig && im[i] < rend_orig) {
341: row = im[i] - rstart_orig;
342: for (j=0; j<n; j++) {
343: if (in[j] >= cstart_orig && in[j] < cend_orig){
344: col = in[j] - cstart_orig;
345: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
346: MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
347: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
348: } else if (in[j] < 0) continue;
349: #if defined(PETSC_USE_DEBUG)
350: else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
351: #endif
352: else {
353: if (mat->was_assembled) {
354: if (!baij->colmap) {
355: CreateColmap_MPIBAIJ_Private(mat);
356: }
357: #if defined (PETSC_USE_CTABLE)
358: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
359: col = col - 1;
360: #else
361: col = baij->colmap[in[j]/bs] - 1;
362: #endif
363: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
364: DisAssemble_MPIBAIJ(mat);
365: col = in[j];
366: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
367: B = baij->B;
368: b = (Mat_SeqBAIJ*)(B)->data;
369: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
370: ba=b->a;
371: } else col += in[j]%bs;
372: } else col = in[j];
373: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374: MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
375: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
376: }
377: }
378: } else {
379: if (!baij->donotstash) {
380: if (roworiented) {
381: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
382: } else {
383: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
384: }
385: }
386: }
387: }
388: return(0);
389: }
393: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
394: {
395: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
396: const MatScalar *value;
397: MatScalar *barray=baij->barray;
398: PetscTruth roworiented = baij->roworiented;
399: PetscErrorCode ierr;
400: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
401: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
402: PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
403:
405: if(!barray) {
406: PetscMalloc(bs2*sizeof(MatScalar),&barray);
407: baij->barray = barray;
408: }
410: if (roworiented) {
411: stepval = (n-1)*bs;
412: } else {
413: stepval = (m-1)*bs;
414: }
415: for (i=0; i<m; i++) {
416: if (im[i] < 0) continue;
417: #if defined(PETSC_USE_DEBUG)
418: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
419: #endif
420: if (im[i] >= rstart && im[i] < rend) {
421: row = im[i] - rstart;
422: for (j=0; j<n; j++) {
423: /* If NumCol = 1 then a copy is not required */
424: if ((roworiented) && (n == 1)) {
425: barray = (MatScalar*)v + i*bs2;
426: } else if((!roworiented) && (m == 1)) {
427: barray = (MatScalar*)v + j*bs2;
428: } else { /* Here a copy is required */
429: if (roworiented) {
430: value = v + i*(stepval+bs)*bs + j*bs;
431: } else {
432: value = v + j*(stepval+bs)*bs + i*bs;
433: }
434: for (ii=0; ii<bs; ii++,value+=stepval) {
435: for (jj=0; jj<bs; jj++) {
436: *barray++ = *value++;
437: }
438: }
439: barray -=bs2;
440: }
441:
442: if (in[j] >= cstart && in[j] < cend){
443: col = in[j] - cstart;
444: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);
445: }
446: else if (in[j] < 0) continue;
447: #if defined(PETSC_USE_DEBUG)
448: else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
449: #endif
450: else {
451: if (mat->was_assembled) {
452: if (!baij->colmap) {
453: CreateColmap_MPIBAIJ_Private(mat);
454: }
456: #if defined(PETSC_USE_DEBUG)
457: #if defined (PETSC_USE_CTABLE)
458: { PetscInt data;
459: PetscTableFind(baij->colmap,in[j]+1,&data);
460: if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461: }
462: #else
463: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
464: #endif
465: #endif
466: #if defined (PETSC_USE_CTABLE)
467: PetscTableFind(baij->colmap,in[j]+1,&col);
468: col = (col - 1)/bs;
469: #else
470: col = (baij->colmap[in[j]] - 1)/bs;
471: #endif
472: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
473: DisAssemble_MPIBAIJ(mat);
474: col = in[j];
475: }
476: }
477: else col = in[j];
478: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);
479: }
480: }
481: } else {
482: if (!baij->donotstash) {
483: if (roworiented) {
484: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
485: } else {
486: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
487: }
488: }
489: }
490: }
491: return(0);
492: }
494: #define HASH_KEY 0.6180339887
495: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
496: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
497: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
500: PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
501: {
502: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
503: PetscTruth roworiented = baij->roworiented;
505: PetscInt i,j,row,col;
506: PetscInt rstart_orig=mat->rmap.rstart;
507: PetscInt rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
508: PetscInt h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
509: PetscReal tmp;
510: MatScalar **HD = baij->hd,value;
511: #if defined(PETSC_USE_DEBUG)
512: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
513: #endif
517: for (i=0; i<m; i++) {
518: #if defined(PETSC_USE_DEBUG)
519: if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
520: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
521: #endif
522: row = im[i];
523: if (row >= rstart_orig && row < rend_orig) {
524: for (j=0; j<n; j++) {
525: col = in[j];
526: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
527: /* Look up PetscInto the Hash Table */
528: key = (row/bs)*Nbs+(col/bs)+1;
529: h1 = HASH(size,key,tmp);
531:
532: idx = h1;
533: #if defined(PETSC_USE_DEBUG)
534: insert_ct++;
535: total_ct++;
536: if (HT[idx] != key) {
537: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
538: if (idx == size) {
539: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
540: if (idx == h1) {
541: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
542: }
543: }
544: }
545: #else
546: if (HT[idx] != key) {
547: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
548: if (idx == size) {
549: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
550: if (idx == h1) {
551: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552: }
553: }
554: }
555: #endif
556: /* A HASH table entry is found, so insert the values at the correct address */
557: if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
558: else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
559: }
560: } else {
561: if (!baij->donotstash) {
562: if (roworiented) {
563: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
564: } else {
565: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
566: }
567: }
568: }
569: }
570: #if defined(PETSC_USE_DEBUG)
571: baij->ht_total_ct = total_ct;
572: baij->ht_insert_ct = insert_ct;
573: #endif
574: return(0);
575: }
579: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
580: {
581: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
582: PetscTruth roworiented = baij->roworiented;
583: PetscErrorCode ierr;
584: PetscInt i,j,ii,jj,row,col;
585: PetscInt rstart=baij->rstartbs;
586: PetscInt rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2;
587: PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
588: PetscReal tmp;
589: MatScalar **HD = baij->hd,*baij_a;
590: const MatScalar *v_t,*value;
591: #if defined(PETSC_USE_DEBUG)
592: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
593: #endif
594:
597: if (roworiented) {
598: stepval = (n-1)*bs;
599: } else {
600: stepval = (m-1)*bs;
601: }
602: for (i=0; i<m; i++) {
603: #if defined(PETSC_USE_DEBUG)
604: if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
605: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
606: #endif
607: row = im[i];
608: v_t = v + i*nbs2;
609: if (row >= rstart && row < rend) {
610: for (j=0; j<n; j++) {
611: col = in[j];
613: /* Look up into the Hash Table */
614: key = row*Nbs+col+1;
615: h1 = HASH(size,key,tmp);
616:
617: idx = h1;
618: #if defined(PETSC_USE_DEBUG)
619: total_ct++;
620: insert_ct++;
621: if (HT[idx] != key) {
622: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
623: if (idx == size) {
624: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
625: if (idx == h1) {
626: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
627: }
628: }
629: }
630: #else
631: if (HT[idx] != key) {
632: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
633: if (idx == size) {
634: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
635: if (idx == h1) {
636: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
637: }
638: }
639: }
640: #endif
641: baij_a = HD[idx];
642: if (roworiented) {
643: /*value = v + i*(stepval+bs)*bs + j*bs;*/
644: /* value = v + (i*(stepval+bs)+j)*bs; */
645: value = v_t;
646: v_t += bs;
647: if (addv == ADD_VALUES) {
648: for (ii=0; ii<bs; ii++,value+=stepval) {
649: for (jj=ii; jj<bs2; jj+=bs) {
650: baij_a[jj] += *value++;
651: }
652: }
653: } else {
654: for (ii=0; ii<bs; ii++,value+=stepval) {
655: for (jj=ii; jj<bs2; jj+=bs) {
656: baij_a[jj] = *value++;
657: }
658: }
659: }
660: } else {
661: value = v + j*(stepval+bs)*bs + i*bs;
662: if (addv == ADD_VALUES) {
663: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
664: for (jj=0; jj<bs; jj++) {
665: baij_a[jj] += *value++;
666: }
667: }
668: } else {
669: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
670: for (jj=0; jj<bs; jj++) {
671: baij_a[jj] = *value++;
672: }
673: }
674: }
675: }
676: }
677: } else {
678: if (!baij->donotstash) {
679: if (roworiented) {
680: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
681: } else {
682: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
683: }
684: }
685: }
686: }
687: #if defined(PETSC_USE_DEBUG)
688: baij->ht_total_ct = total_ct;
689: baij->ht_insert_ct = insert_ct;
690: #endif
691: return(0);
692: }
696: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
697: {
698: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
700: PetscInt bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
701: PetscInt bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
704: for (i=0; i<m; i++) {
705: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
706: if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
707: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
708: row = idxm[i] - bsrstart;
709: for (j=0; j<n; j++) {
710: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
711: if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
712: if (idxn[j] >= bscstart && idxn[j] < bscend){
713: col = idxn[j] - bscstart;
714: MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
715: } else {
716: if (!baij->colmap) {
717: CreateColmap_MPIBAIJ_Private(mat);
718: }
719: #if defined (PETSC_USE_CTABLE)
720: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
721: data --;
722: #else
723: data = baij->colmap[idxn[j]/bs]-1;
724: #endif
725: if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
726: else {
727: col = data + idxn[j]%bs;
728: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
729: }
730: }
731: }
732: } else {
733: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
734: }
735: }
736: return(0);
737: }
741: PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
742: {
743: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
744: Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
746: PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
747: PetscReal sum = 0.0;
748: MatScalar *v;
751: if (baij->size == 1) {
752: MatNorm(baij->A,type,nrm);
753: } else {
754: if (type == NORM_FROBENIUS) {
755: v = amat->a;
756: nz = amat->nz*bs2;
757: for (i=0; i<nz; i++) {
758: #if defined(PETSC_USE_COMPLEX)
759: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
760: #else
761: sum += (*v)*(*v); v++;
762: #endif
763: }
764: v = bmat->a;
765: nz = bmat->nz*bs2;
766: for (i=0; i<nz; i++) {
767: #if defined(PETSC_USE_COMPLEX)
768: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
769: #else
770: sum += (*v)*(*v); v++;
771: #endif
772: }
773: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
774: *nrm = sqrt(*nrm);
775: } else if (type == NORM_1) { /* max column sum */
776: PetscReal *tmp,*tmp2;
777: PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs;
778: PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);
779: tmp2 = tmp + mat->cmap.N;
780: PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));
781: v = amat->a; jj = amat->j;
782: for (i=0; i<amat->nz; i++) {
783: for (j=0; j<bs; j++){
784: col = bs*(cstart + *jj) + j; /* column index */
785: for (row=0; row<bs; row++){
786: tmp[col] += PetscAbsScalar(*v); v++;
787: }
788: }
789: jj++;
790: }
791: v = bmat->a; jj = bmat->j;
792: for (i=0; i<bmat->nz; i++) {
793: for (j=0; j<bs; j++){
794: col = bs*garray[*jj] + j;
795: for (row=0; row<bs; row++){
796: tmp[col] += PetscAbsScalar(*v); v++;
797: }
798: }
799: jj++;
800: }
801: MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
802: *nrm = 0.0;
803: for (j=0; j<mat->cmap.N; j++) {
804: if (tmp2[j] > *nrm) *nrm = tmp2[j];
805: }
806: PetscFree(tmp);
807: } else if (type == NORM_INFINITY) { /* max row sum */
808: PetscReal *sums;
809: PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
810: sum = 0.0;
811: for (j=0; j<amat->mbs; j++) {
812: for (row=0; row<bs; row++) sums[row] = 0.0;
813: v = amat->a + bs2*amat->i[j];
814: nz = amat->i[j+1]-amat->i[j];
815: for (i=0; i<nz; i++) {
816: for (col=0; col<bs; col++){
817: for (row=0; row<bs; row++){
818: sums[row] += PetscAbsScalar(*v); v++;
819: }
820: }
821: }
822: v = bmat->a + bs2*bmat->i[j];
823: nz = bmat->i[j+1]-bmat->i[j];
824: for (i=0; i<nz; i++) {
825: for (col=0; col<bs; col++){
826: for (row=0; row<bs; row++){
827: sums[row] += PetscAbsScalar(*v); v++;
828: }
829: }
830: }
831: for (row=0; row<bs; row++){
832: if (sums[row] > sum) sum = sums[row];
833: }
834: }
835: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);
836: PetscFree(sums);
837: } else {
838: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
839: }
840: }
841: return(0);
842: }
844: /*
845: Creates the hash table, and sets the table
846: This table is created only once.
847: If new entried need to be added to the matrix
848: then the hash table has to be destroyed and
849: recreated.
850: */
853: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
854: {
855: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
856: Mat A = baij->A,B=baij->B;
857: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
858: PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
860: PetscInt size,bs2=baij->bs2,rstart=baij->rstartbs;
861: PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
862: PetscInt *HT,key;
863: MatScalar **HD;
864: PetscReal tmp;
865: #if defined(PETSC_USE_INFO)
866: PetscInt ct=0,max=0;
867: #endif
870: baij->ht_size=(PetscInt)(factor*nz);
871: size = baij->ht_size;
873: if (baij->ht) {
874: return(0);
875: }
876:
877: /* Allocate Memory for Hash Table */
878: PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);
879: baij->ht = (PetscInt*)(baij->hd + size);
880: HD = baij->hd;
881: HT = baij->ht;
884: PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));
885:
887: /* Loop Over A */
888: for (i=0; i<a->mbs; i++) {
889: for (j=ai[i]; j<ai[i+1]; j++) {
890: row = i+rstart;
891: col = aj[j]+cstart;
892:
893: key = row*Nbs + col + 1;
894: h1 = HASH(size,key,tmp);
895: for (k=0; k<size; k++){
896: if (!HT[(h1+k)%size]) {
897: HT[(h1+k)%size] = key;
898: HD[(h1+k)%size] = a->a + j*bs2;
899: break;
900: #if defined(PETSC_USE_INFO)
901: } else {
902: ct++;
903: #endif
904: }
905: }
906: #if defined(PETSC_USE_INFO)
907: if (k> max) max = k;
908: #endif
909: }
910: }
911: /* Loop Over B */
912: for (i=0; i<b->mbs; i++) {
913: for (j=bi[i]; j<bi[i+1]; j++) {
914: row = i+rstart;
915: col = garray[bj[j]];
916: key = row*Nbs + col + 1;
917: h1 = HASH(size,key,tmp);
918: for (k=0; k<size; k++){
919: if (!HT[(h1+k)%size]) {
920: HT[(h1+k)%size] = key;
921: HD[(h1+k)%size] = b->a + j*bs2;
922: break;
923: #if defined(PETSC_USE_INFO)
924: } else {
925: ct++;
926: #endif
927: }
928: }
929: #if defined(PETSC_USE_INFO)
930: if (k> max) max = k;
931: #endif
932: }
933: }
934:
935: /* Print Summary */
936: #if defined(PETSC_USE_INFO)
937: for (i=0,j=0; i<size; i++) {
938: if (HT[i]) {j++;}
939: }
940: PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
941: #endif
942: return(0);
943: }
947: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
948: {
949: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
951: PetscInt nstash,reallocs;
952: InsertMode addv;
955: if (baij->donotstash) {
956: return(0);
957: }
959: /* make sure all processors are either in INSERTMODE or ADDMODE */
960: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
961: if (addv == (ADD_VALUES|INSERT_VALUES)) {
962: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
963: }
964: mat->insertmode = addv; /* in case this processor had no cache */
966: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);
967: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
968: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
969: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
970: MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
971: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
972: return(0);
973: }
977: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
978: {
979: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
980: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data;
982: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
983: PetscInt *row,*col,other_disassembled;
984: PetscTruth r1,r2,r3;
985: MatScalar *val;
986: InsertMode addv = mat->insertmode;
987: PetscMPIInt n;
989: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
991: if (!baij->donotstash) {
992: while (1) {
993: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
994: if (!flg) break;
996: for (i=0; i<n;) {
997: /* Now identify the consecutive vals belonging to the same row */
998: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
999: if (j < n) ncols = j-i;
1000: else ncols = n-i;
1001: /* Now assemble all these values with a single function call */
1002: MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);
1003: i = j;
1004: }
1005: }
1006: MatStashScatterEnd_Private(&mat->stash);
1007: /* Now process the block-stash. Since the values are stashed column-oriented,
1008: set the roworiented flag to column oriented, and after MatSetValues()
1009: restore the original flags */
1010: r1 = baij->roworiented;
1011: r2 = a->roworiented;
1012: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1013: baij->roworiented = PETSC_FALSE;
1014: a->roworiented = PETSC_FALSE;
1015: (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
1016: while (1) {
1017: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
1018: if (!flg) break;
1019:
1020: for (i=0; i<n;) {
1021: /* Now identify the consecutive vals belonging to the same row */
1022: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1023: if (j < n) ncols = j-i;
1024: else ncols = n-i;
1025: MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
1026: i = j;
1027: }
1028: }
1029: MatStashScatterEnd_Private(&mat->bstash);
1030: baij->roworiented = r1;
1031: a->roworiented = r2;
1032: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
1033: }
1034:
1035: MatAssemblyBegin(baij->A,mode);
1036: MatAssemblyEnd(baij->A,mode);
1038: /* determine if any processor has disassembled, if so we must
1039: also disassemble ourselfs, in order that we may reassemble. */
1040: /*
1041: if nonzero structure of submatrix B cannot change then we know that
1042: no processor disassembled thus we can skip this stuff
1043: */
1044: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
1045: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
1046: if (mat->was_assembled && !other_disassembled) {
1047: DisAssemble_MPIBAIJ(mat);
1048: }
1049: }
1051: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1052: MatSetUpMultiply_MPIBAIJ(mat);
1053: }
1054: ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1055: MatAssemblyBegin(baij->B,mode);
1056: MatAssemblyEnd(baij->B,mode);
1057:
1058: #if defined(PETSC_USE_INFO)
1059: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1060: PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1061: baij->ht_total_ct = 0;
1062: baij->ht_insert_ct = 0;
1063: }
1064: #endif
1065: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1066: MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
1067: mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
1068: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1069: }
1071: PetscFree(baij->rowvalues);
1072: baij->rowvalues = 0;
1073: return(0);
1074: }
1078: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1079: {
1080: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1081: PetscErrorCode ierr;
1082: PetscMPIInt size = baij->size,rank = baij->rank;
1083: PetscInt bs = mat->rmap.bs;
1084: PetscTruth iascii,isdraw;
1085: PetscViewer sviewer;
1086: PetscViewerFormat format;
1089: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1090: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1091: if (iascii) {
1092: PetscViewerGetFormat(viewer,&format);
1093: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1094: MatInfo info;
1095: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1096: MatGetInfo(mat,MAT_LOCAL,&info);
1097: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1098: rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1099: mat->rmap.bs,(PetscInt)info.memory);
1100: MatGetInfo(baij->A,MAT_LOCAL,&info);
1101: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
1102: MatGetInfo(baij->B,MAT_LOCAL,&info);
1103: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
1104: PetscViewerFlush(viewer);
1105: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1106: VecScatterView(baij->Mvctx,viewer);
1107: return(0);
1108: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1109: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1110: return(0);
1111: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1112: return(0);
1113: }
1114: }
1116: if (isdraw) {
1117: PetscDraw draw;
1118: PetscTruth isnull;
1119: PetscViewerDrawGetDraw(viewer,0,&draw);
1120: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1121: }
1123: if (size == 1) {
1124: PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);
1125: MatView(baij->A,viewer);
1126: } else {
1127: /* assemble the entire matrix onto first processor. */
1128: Mat A;
1129: Mat_SeqBAIJ *Aloc;
1130: PetscInt M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1131: MatScalar *a;
1133: /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1134: /* Perhaps this should be the type of mat? */
1135: MatCreate(((PetscObject)mat)->comm,&A);
1136: if (!rank) {
1137: MatSetSizes(A,M,N,M,N);
1138: } else {
1139: MatSetSizes(A,0,0,M,N);
1140: }
1141: MatSetType(A,MATMPIBAIJ);
1142: MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);
1143: PetscLogObjectParent(mat,A);
1145: /* copy over the A part */
1146: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1147: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148: PetscMalloc(bs*sizeof(PetscInt),&rvals);
1150: for (i=0; i<mbs; i++) {
1151: rvals[0] = bs*(baij->rstartbs + i);
1152: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153: for (j=ai[i]; j<ai[i+1]; j++) {
1154: col = (baij->cstartbs+aj[j])*bs;
1155: for (k=0; k<bs; k++) {
1156: MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
1157: col++; a += bs;
1158: }
1159: }
1160: }
1161: /* copy over the B part */
1162: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1163: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1164: for (i=0; i<mbs; i++) {
1165: rvals[0] = bs*(baij->rstartbs + i);
1166: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1167: for (j=ai[i]; j<ai[i+1]; j++) {
1168: col = baij->garray[aj[j]]*bs;
1169: for (k=0; k<bs; k++) {
1170: MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
1171: col++; a += bs;
1172: }
1173: }
1174: }
1175: PetscFree(rvals);
1176: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1177: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1178: /*
1179: Everyone has to call to draw the matrix since the graphics waits are
1180: synchronized across all processors that share the PetscDraw object
1181: */
1182: PetscViewerGetSingleton(viewer,&sviewer);
1183: if (!rank) {
1184: PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);
1185: MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1186: }
1187: PetscViewerRestoreSingleton(viewer,&sviewer);
1188: MatDestroy(A);
1189: }
1190: return(0);
1191: }
1195: PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1196: {
1198: PetscTruth iascii,isdraw,issocket,isbinary;
1201: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1202: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1203: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1204: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1205: if (iascii || isdraw || issocket || isbinary) {
1206: MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1207: } else {
1208: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1209: }
1210: return(0);
1211: }
1215: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1216: {
1217: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1221: #if defined(PETSC_USE_LOG)
1222: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1223: #endif
1224: MatStashDestroy_Private(&mat->stash);
1225: MatStashDestroy_Private(&mat->bstash);
1226: MatDestroy(baij->A);
1227: MatDestroy(baij->B);
1228: #if defined (PETSC_USE_CTABLE)
1229: if (baij->colmap) {PetscTableDestroy(baij->colmap);}
1230: #else
1231: PetscFree(baij->colmap);
1232: #endif
1233: PetscFree(baij->garray);
1234: if (baij->lvec) {VecDestroy(baij->lvec);}
1235: if (baij->Mvctx) {VecScatterDestroy(baij->Mvctx);}
1236: PetscFree(baij->rowvalues);
1237: PetscFree(baij->barray);
1238: PetscFree(baij->hd);
1239: #if defined(PETSC_USE_MAT_SINGLE)
1240: PetscFree(baij->setvaluescopy);
1241: #endif
1242: PetscFree(baij->rangebs);
1243: PetscFree(baij);
1245: PetscObjectChangeTypeName((PetscObject)mat,0);
1246: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1247: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1248: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1249: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);
1250: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);
1251: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1252: PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);
1253: return(0);
1254: }
1258: PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1259: {
1260: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1262: PetscInt nt;
1265: VecGetLocalSize(xx,&nt);
1266: if (nt != A->cmap.n) {
1267: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1268: }
1269: VecGetLocalSize(yy,&nt);
1270: if (nt != A->rmap.n) {
1271: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1272: }
1273: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1274: (*a->A->ops->mult)(a->A,xx,yy);
1275: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1276: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1277: return(0);
1278: }
1282: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1283: {
1284: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1288: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1289: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1290: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1291: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1292: return(0);
1293: }
1297: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1298: {
1299: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1301: PetscTruth merged;
1304: VecScatterGetMerged(a->Mvctx,&merged);
1305: /* do nondiagonal part */
1306: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1307: if (!merged) {
1308: /* send it on its way */
1309: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1310: /* do local part */
1311: (*a->A->ops->multtranspose)(a->A,xx,yy);
1312: /* receive remote parts: note this assumes the values are not actually */
1313: /* inserted in yy until the next line */
1314: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1315: } else {
1316: /* do local part */
1317: (*a->A->ops->multtranspose)(a->A,xx,yy);
1318: /* send it on its way */
1319: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1320: /* values actually were received in the Begin() but we need to call this nop */
1321: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1322: }
1323: return(0);
1324: }
1328: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1329: {
1330: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1334: /* do nondiagonal part */
1335: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1336: /* send it on its way */
1337: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1338: /* do local part */
1339: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1340: /* receive remote parts: note this assumes the values are not actually */
1341: /* inserted in yy until the next line, which is true for my implementation*/
1342: /* but is not perhaps always true. */
1343: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1344: return(0);
1345: }
1347: /*
1348: This only works correctly for square matrices where the subblock A->A is the
1349: diagonal block
1350: */
1353: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1354: {
1355: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1359: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1360: MatGetDiagonal(a->A,v);
1361: return(0);
1362: }
1366: PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1367: {
1368: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1372: MatScale(a->A,aa);
1373: MatScale(a->B,aa);
1374: return(0);
1375: }
1379: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380: {
1381: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1382: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1384: PetscInt bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1385: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1386: PetscInt *cmap,*idx_p,cstart = mat->cstartbs;
1389: if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1390: mat->getrowactive = PETSC_TRUE;
1392: if (!mat->rowvalues && (idx || v)) {
1393: /*
1394: allocate enough space to hold information from the longest row.
1395: */
1396: Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1397: PetscInt max = 1,mbs = mat->mbs,tmp;
1398: for (i=0; i<mbs; i++) {
1399: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1400: if (max < tmp) { max = tmp; }
1401: }
1402: PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1403: mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1404: }
1405:
1406: if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1407: lrow = row - brstart;
1409: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1410: if (!v) {pvA = 0; pvB = 0;}
1411: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1412: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1413: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1414: nztot = nzA + nzB;
1416: cmap = mat->garray;
1417: if (v || idx) {
1418: if (nztot) {
1419: /* Sort by increasing column numbers, assuming A and B already sorted */
1420: PetscInt imark = -1;
1421: if (v) {
1422: *v = v_p = mat->rowvalues;
1423: for (i=0; i<nzB; i++) {
1424: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1425: else break;
1426: }
1427: imark = i;
1428: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1429: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1430: }
1431: if (idx) {
1432: *idx = idx_p = mat->rowindices;
1433: if (imark > -1) {
1434: for (i=0; i<imark; i++) {
1435: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1436: }
1437: } else {
1438: for (i=0; i<nzB; i++) {
1439: if (cmap[cworkB[i]/bs] < cstart)
1440: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1441: else break;
1442: }
1443: imark = i;
1444: }
1445: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1446: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1447: }
1448: } else {
1449: if (idx) *idx = 0;
1450: if (v) *v = 0;
1451: }
1452: }
1453: *nz = nztot;
1454: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1455: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1456: return(0);
1457: }
1461: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462: {
1463: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1466: if (!baij->getrowactive) {
1467: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1468: }
1469: baij->getrowactive = PETSC_FALSE;
1470: return(0);
1471: }
1475: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1476: {
1477: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1481: MatZeroEntries(l->A);
1482: MatZeroEntries(l->B);
1483: return(0);
1484: }
1488: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1489: {
1490: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1491: Mat A = a->A,B = a->B;
1493: PetscReal isend[5],irecv[5];
1496: info->block_size = (PetscReal)matin->rmap.bs;
1497: MatGetInfo(A,MAT_LOCAL,info);
1498: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1499: isend[3] = info->memory; isend[4] = info->mallocs;
1500: MatGetInfo(B,MAT_LOCAL,info);
1501: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1502: isend[3] += info->memory; isend[4] += info->mallocs;
1503: if (flag == MAT_LOCAL) {
1504: info->nz_used = isend[0];
1505: info->nz_allocated = isend[1];
1506: info->nz_unneeded = isend[2];
1507: info->memory = isend[3];
1508: info->mallocs = isend[4];
1509: } else if (flag == MAT_GLOBAL_MAX) {
1510: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);
1511: info->nz_used = irecv[0];
1512: info->nz_allocated = irecv[1];
1513: info->nz_unneeded = irecv[2];
1514: info->memory = irecv[3];
1515: info->mallocs = irecv[4];
1516: } else if (flag == MAT_GLOBAL_SUM) {
1517: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);
1518: info->nz_used = irecv[0];
1519: info->nz_allocated = irecv[1];
1520: info->nz_unneeded = irecv[2];
1521: info->memory = irecv[3];
1522: info->mallocs = irecv[4];
1523: } else {
1524: SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1525: }
1526: info->rows_global = (PetscReal)A->rmap.N;
1527: info->columns_global = (PetscReal)A->cmap.N;
1528: info->rows_local = (PetscReal)A->rmap.N;
1529: info->columns_local = (PetscReal)A->cmap.N;
1530: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1531: info->fill_ratio_needed = 0;
1532: info->factor_mallocs = 0;
1533: return(0);
1534: }
1538: PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1539: {
1540: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1544: switch (op) {
1545: case MAT_NEW_NONZERO_LOCATIONS:
1546: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1547: case MAT_KEEP_ZEROED_ROWS:
1548: case MAT_NEW_NONZERO_LOCATION_ERR:
1549: MatSetOption(a->A,op,flg);
1550: MatSetOption(a->B,op,flg);
1551: break;
1552: case MAT_ROW_ORIENTED:
1553: a->roworiented = flg;
1554: MatSetOption(a->A,op,flg);
1555: MatSetOption(a->B,op,flg);
1556: break;
1557: case MAT_NEW_DIAGONALS:
1558: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1559: break;
1560: case MAT_IGNORE_OFF_PROC_ENTRIES:
1561: a->donotstash = flg;
1562: break;
1563: case MAT_USE_HASH_TABLE:
1564: a->ht_flag = flg;
1565: break;
1566: case MAT_SYMMETRIC:
1567: case MAT_STRUCTURALLY_SYMMETRIC:
1568: case MAT_HERMITIAN:
1569: case MAT_SYMMETRY_ETERNAL:
1570: MatSetOption(a->A,op,flg);
1571: break;
1572: default:
1573: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1574: }
1575: return(0);
1576: }
1580: PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1581: {
1582: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1583: Mat_SeqBAIJ *Aloc;
1584: Mat B;
1586: PetscInt M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1587: PetscInt bs=A->rmap.bs,mbs=baij->mbs;
1588: MatScalar *a;
1589:
1591: if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1592: MatCreate(((PetscObject)A)->comm,&B);
1593: MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);
1594: MatSetType(B,((PetscObject)A)->type_name);
1595: MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);
1596:
1597: /* copy over the A part */
1598: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1599: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1600: PetscMalloc(bs*sizeof(PetscInt),&rvals);
1601:
1602: for (i=0; i<mbs; i++) {
1603: rvals[0] = bs*(baij->rstartbs + i);
1604: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1605: for (j=ai[i]; j<ai[i+1]; j++) {
1606: col = (baij->cstartbs+aj[j])*bs;
1607: for (k=0; k<bs; k++) {
1608: MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);
1609: col++; a += bs;
1610: }
1611: }
1612: }
1613: /* copy over the B part */
1614: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1615: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1616: for (i=0; i<mbs; i++) {
1617: rvals[0] = bs*(baij->rstartbs + i);
1618: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1619: for (j=ai[i]; j<ai[i+1]; j++) {
1620: col = baij->garray[aj[j]]*bs;
1621: for (k=0; k<bs; k++) {
1622: MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);
1623: col++; a += bs;
1624: }
1625: }
1626: }
1627: PetscFree(rvals);
1628: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1629: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1630:
1631: if (matout) {
1632: *matout = B;
1633: } else {
1634: MatHeaderCopy(A,B);
1635: }
1636: return(0);
1637: }
1641: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1642: {
1643: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1644: Mat a = baij->A,b = baij->B;
1646: PetscInt s1,s2,s3;
1649: MatGetLocalSize(mat,&s2,&s3);
1650: if (rr) {
1651: VecGetLocalSize(rr,&s1);
1652: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1653: /* Overlap communication with computation. */
1654: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1655: }
1656: if (ll) {
1657: VecGetLocalSize(ll,&s1);
1658: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1659: (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1660: }
1661: /* scale the diagonal block */
1662: (*a->ops->diagonalscale)(a,ll,rr);
1664: if (rr) {
1665: /* Do a scatter end and then right scale the off-diagonal block */
1666: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1667: (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1668: }
1669:
1670: return(0);
1671: }
1675: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1676: {
1677: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1679: PetscMPIInt imdex,size = l->size,n,rank = l->rank;
1680: PetscInt i,*owners = A->rmap.range;
1681: PetscInt *nprocs,j,idx,nsends,row;
1682: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
1683: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1684: PetscInt *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1685: MPI_Comm comm = ((PetscObject)A)->comm;
1686: MPI_Request *send_waits,*recv_waits;
1687: MPI_Status recv_status,*send_status;
1688: #if defined(PETSC_DEBUG)
1689: PetscTruth found = PETSC_FALSE;
1690: #endif
1691:
1693: /* first count number of contributors to each processor */
1694: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
1695: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
1696: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
1697: j = 0;
1698: for (i=0; i<N; i++) {
1699: if (lastidx > (idx = rows[i])) j = 0;
1700: lastidx = idx;
1701: for (; j<size; j++) {
1702: if (idx >= owners[j] && idx < owners[j+1]) {
1703: nprocs[2*j]++;
1704: nprocs[2*j+1] = 1;
1705: owner[i] = j;
1706: #if defined(PETSC_DEBUG)
1707: found = PETSC_TRUE;
1708: #endif
1709: break;
1710: }
1711: }
1712: #if defined(PETSC_DEBUG)
1713: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1714: found = PETSC_FALSE;
1715: #endif
1716: }
1717: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1718:
1719: /* inform other processors of number of messages and max length*/
1720: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1721:
1722: /* post receives: */
1723: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
1724: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1725: for (i=0; i<nrecvs; i++) {
1726: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1727: }
1728:
1729: /* do sends:
1730: 1) starts[i] gives the starting index in svalues for stuff going to
1731: the ith processor
1732: */
1733: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
1734: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1735: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
1736: starts[0] = 0;
1737: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1738: for (i=0; i<N; i++) {
1739: svalues[starts[owner[i]]++] = rows[i];
1740: }
1741:
1742: starts[0] = 0;
1743: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1744: count = 0;
1745: for (i=0; i<size; i++) {
1746: if (nprocs[2*i+1]) {
1747: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
1748: }
1749: }
1750: PetscFree(starts);
1752: base = owners[rank];
1753:
1754: /* wait on receives */
1755: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
1756: source = lens + nrecvs;
1757: count = nrecvs; slen = 0;
1758: while (count) {
1759: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1760: /* unpack receives into our local space */
1761: MPI_Get_count(&recv_status,MPIU_INT,&n);
1762: source[imdex] = recv_status.MPI_SOURCE;
1763: lens[imdex] = n;
1764: slen += n;
1765: count--;
1766: }
1767: PetscFree(recv_waits);
1768:
1769: /* move the data into the send scatter */
1770: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
1771: count = 0;
1772: for (i=0; i<nrecvs; i++) {
1773: values = rvalues + i*nmax;
1774: for (j=0; j<lens[i]; j++) {
1775: lrows[count++] = values[j] - base;
1776: }
1777: }
1778: PetscFree(rvalues);
1779: PetscFree(lens);
1780: PetscFree(owner);
1781: PetscFree(nprocs);
1782:
1783: /* actually zap the local rows */
1784: /*
1785: Zero the required rows. If the "diagonal block" of the matrix
1786: is square and the user wishes to set the diagonal we use separate
1787: code so that MatSetValues() is not called for each diagonal allocating
1788: new memory, thus calling lots of mallocs and slowing things down.
1790: Contributed by: Matthew Knepley
1791: */
1792: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1793: MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);
1794: if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1795: MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);
1796: } else if (diag != 0.0) {
1797: MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);
1798: if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1799: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1800: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1801: }
1802: for (i=0; i<slen; i++) {
1803: row = lrows[i] + rstart_bs;
1804: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
1805: }
1806: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1807: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1808: } else {
1809: MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);
1810: }
1812: PetscFree(lrows);
1814: /* wait on sends */
1815: if (nsends) {
1816: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1817: MPI_Waitall(nsends,send_waits,send_status);
1818: PetscFree(send_status);
1819: }
1820: PetscFree(send_waits);
1821: PetscFree(svalues);
1823: return(0);
1824: }
1828: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1829: {
1830: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1834: MatSetUnfactored(a->A);
1835: return(0);
1836: }
1838: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1842: PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1843: {
1844: Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1845: Mat a,b,c,d;
1846: PetscTruth flg;
1850: a = matA->A; b = matA->B;
1851: c = matB->A; d = matB->B;
1853: MatEqual(a,c,&flg);
1854: if (flg) {
1855: MatEqual(b,d,&flg);
1856: }
1857: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
1858: return(0);
1859: }
1863: PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1864: {
1866: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1867: Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1870: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1871: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1872: MatCopy_Basic(A,B,str);
1873: } else {
1874: MatCopy(a->A,b->A,str);
1875: MatCopy(a->B,b->B,str);
1876: }
1877: return(0);
1878: }
1882: PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1883: {
1887: MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1888: return(0);
1889: }
1891: #include petscblaslapack.h
1894: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1895: {
1897: Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1898: PetscBLASInt bnz,one=1;
1899: Mat_SeqBAIJ *x,*y;
1902: if (str == SAME_NONZERO_PATTERN) {
1903: PetscScalar alpha = a;
1904: x = (Mat_SeqBAIJ *)xx->A->data;
1905: y = (Mat_SeqBAIJ *)yy->A->data;
1906: bnz = (PetscBLASInt)x->nz;
1907: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1908: x = (Mat_SeqBAIJ *)xx->B->data;
1909: y = (Mat_SeqBAIJ *)yy->B->data;
1910: bnz = (PetscBLASInt)x->nz;
1911: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1912: } else {
1913: MatAXPY_Basic(Y,a,X,str);
1914: }
1915: return(0);
1916: }
1920: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1921: {
1922: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1926: MatRealPart(a->A);
1927: MatRealPart(a->B);
1928: return(0);
1929: }
1933: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1934: {
1935: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1939: MatImaginaryPart(a->A);
1940: MatImaginaryPart(a->B);
1941: return(0);
1942: }
1944: /* -------------------------------------------------------------------*/
1945: static struct _MatOps MatOps_Values = {
1946: MatSetValues_MPIBAIJ,
1947: MatGetRow_MPIBAIJ,
1948: MatRestoreRow_MPIBAIJ,
1949: MatMult_MPIBAIJ,
1950: /* 4*/ MatMultAdd_MPIBAIJ,
1951: MatMultTranspose_MPIBAIJ,
1952: MatMultTransposeAdd_MPIBAIJ,
1953: 0,
1954: 0,
1955: 0,
1956: /*10*/ 0,
1957: 0,
1958: 0,
1959: 0,
1960: MatTranspose_MPIBAIJ,
1961: /*15*/ MatGetInfo_MPIBAIJ,
1962: MatEqual_MPIBAIJ,
1963: MatGetDiagonal_MPIBAIJ,
1964: MatDiagonalScale_MPIBAIJ,
1965: MatNorm_MPIBAIJ,
1966: /*20*/ MatAssemblyBegin_MPIBAIJ,
1967: MatAssemblyEnd_MPIBAIJ,
1968: 0,
1969: MatSetOption_MPIBAIJ,
1970: MatZeroEntries_MPIBAIJ,
1971: /*25*/ MatZeroRows_MPIBAIJ,
1972: 0,
1973: 0,
1974: 0,
1975: 0,
1976: /*30*/ MatSetUpPreallocation_MPIBAIJ,
1977: 0,
1978: 0,
1979: 0,
1980: 0,
1981: /*35*/ MatDuplicate_MPIBAIJ,
1982: 0,
1983: 0,
1984: 0,
1985: 0,
1986: /*40*/ MatAXPY_MPIBAIJ,
1987: MatGetSubMatrices_MPIBAIJ,
1988: MatIncreaseOverlap_MPIBAIJ,
1989: MatGetValues_MPIBAIJ,
1990: MatCopy_MPIBAIJ,
1991: /*45*/ 0,
1992: MatScale_MPIBAIJ,
1993: 0,
1994: 0,
1995: 0,
1996: /*50*/ 0,
1997: 0,
1998: 0,
1999: 0,
2000: 0,
2001: /*55*/ 0,
2002: 0,
2003: MatSetUnfactored_MPIBAIJ,
2004: 0,
2005: MatSetValuesBlocked_MPIBAIJ,
2006: /*60*/ 0,
2007: MatDestroy_MPIBAIJ,
2008: MatView_MPIBAIJ,
2009: 0,
2010: 0,
2011: /*65*/ 0,
2012: 0,
2013: 0,
2014: 0,
2015: 0,
2016: /*70*/ MatGetRowMaxAbs_MPIBAIJ,
2017: 0,
2018: 0,
2019: 0,
2020: 0,
2021: /*75*/ 0,
2022: 0,
2023: 0,
2024: 0,
2025: 0,
2026: /*80*/ 0,
2027: 0,
2028: 0,
2029: 0,
2030: MatLoad_MPIBAIJ,
2031: /*85*/ 0,
2032: 0,
2033: 0,
2034: 0,
2035: 0,
2036: /*90*/ 0,
2037: 0,
2038: 0,
2039: 0,
2040: 0,
2041: /*95*/ 0,
2042: 0,
2043: 0,
2044: 0,
2045: 0,
2046: /*100*/0,
2047: 0,
2048: 0,
2049: 0,
2050: 0,
2051: /*105*/0,
2052: MatRealPart_MPIBAIJ,
2053: MatImaginaryPart_MPIBAIJ};
2059: PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2060: {
2062: *a = ((Mat_MPIBAIJ *)A->data)->A;
2063: *iscopy = PETSC_FALSE;
2064: return(0);
2065: }
2074: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2075: {
2076: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)B->data;
2077: PetscInt m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2078: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2079: const PetscInt *JJ;
2080: PetscScalar *values;
2084: #if defined(PETSC_OPT_g)
2085: if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2086: #endif
2087: PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
2088: o_nnz = d_nnz + m;
2090: for (i=0; i<m; i++) {
2091: nnz = Ii[i+1]- Ii[i];
2092: JJ = J + Ii[i];
2093: nnz_max = PetscMax(nnz_max,nnz);
2094: #if defined(PETSC_OPT_g)
2095: if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2096: #endif
2097: for (j=0; j<nnz; j++) {
2098: if (*JJ >= cstart) break;
2099: JJ++;
2100: }
2101: d = 0;
2102: for (; j<nnz; j++) {
2103: if (*JJ++ >= cend) break;
2104: d++;
2105: }
2106: d_nnz[i] = d;
2107: o_nnz[i] = nnz - d;
2108: }
2109: MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2110: PetscFree(d_nnz);
2112: if (v) values = (PetscScalar*)v;
2113: else {
2114: PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);
2115: PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));
2116: }
2118: for (i=0; i<m; i++) {
2119: ii = i + rstart;
2120: nnz = Ii[i+1]- Ii[i];
2121: MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
2122: }
2123: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2124: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2126: if (!v) {
2127: PetscFree(values);
2128: }
2129: return(0);
2130: }
2134: /*@C
2135: MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2136: (the default parallel PETSc format).
2138: Collective on MPI_Comm
2140: Input Parameters:
2141: + A - the matrix
2142: . i - the indices into j for the start of each local row (starts with zero)
2143: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2144: - v - optional values in the matrix
2146: Level: developer
2148: .keywords: matrix, aij, compressed row, sparse, parallel
2150: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2151: @*/
2152: PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2153: {
2154: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2157: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);
2158: if (f) {
2159: (*f)(B,bs,i,j,v);
2160: }
2161: return(0);
2162: }
2167: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2168: {
2169: Mat_MPIBAIJ *b;
2171: PetscInt i;
2174: B->preallocated = PETSC_TRUE;
2175: PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");
2176: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);
2177: PetscOptionsEnd();
2179: if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2180: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2181: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2182: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2183: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2184:
2185: B->rmap.bs = bs;
2186: B->cmap.bs = bs;
2187: PetscMapSetUp(&B->rmap);
2188: PetscMapSetUp(&B->cmap);
2190: if (d_nnz) {
2191: for (i=0; i<B->rmap.n/bs; i++) {
2192: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2193: }
2194: }
2195: if (o_nnz) {
2196: for (i=0; i<B->rmap.n/bs; i++) {
2197: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2198: }
2199: }
2201: b = (Mat_MPIBAIJ*)B->data;
2202: b->bs2 = bs*bs;
2203: b->mbs = B->rmap.n/bs;
2204: b->nbs = B->cmap.n/bs;
2205: b->Mbs = B->rmap.N/bs;
2206: b->Nbs = B->cmap.N/bs;
2208: for (i=0; i<=b->size; i++) {
2209: b->rangebs[i] = B->rmap.range[i]/bs;
2210: }
2211: b->rstartbs = B->rmap.rstart/bs;
2212: b->rendbs = B->rmap.rend/bs;
2213: b->cstartbs = B->cmap.rstart/bs;
2214: b->cendbs = B->cmap.rend/bs;
2216: MatCreate(PETSC_COMM_SELF,&b->A);
2217: MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);
2218: MatSetType(b->A,MATSEQBAIJ);
2219: MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2220: PetscLogObjectParent(B,b->A);
2221: MatCreate(PETSC_COMM_SELF,&b->B);
2222: MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);
2223: MatSetType(b->B,MATSEQBAIJ);
2224: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2225: PetscLogObjectParent(B,b->B);
2227: MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);
2229: return(0);
2230: }
2234: EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2235: EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2238: /*MC
2239: MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2241: Options Database Keys:
2242: + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2243: . -mat_block_size <bs> - set the blocksize used to store the matrix
2244: - -mat_use_hash_table <fact>
2246: Level: beginner
2248: .seealso: MatCreateMPIBAIJ
2249: M*/
2254: PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2255: {
2256: Mat_MPIBAIJ *b;
2258: PetscTruth flg;
2261: PetscNewLog(B,Mat_MPIBAIJ,&b);
2262: B->data = (void*)b;
2265: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2266: B->mapping = 0;
2267: B->factor = 0;
2268: B->assembled = PETSC_FALSE;
2270: B->insertmode = NOT_SET_VALUES;
2271: MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
2272: MPI_Comm_size(((PetscObject)B)->comm,&b->size);
2274: /* build local table of row and column ownerships */
2275: PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);
2277: /* build cache for off array entries formed */
2278: MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
2279: b->donotstash = PETSC_FALSE;
2280: b->colmap = PETSC_NULL;
2281: b->garray = PETSC_NULL;
2282: b->roworiented = PETSC_TRUE;
2284: #if defined(PETSC_USE_MAT_SINGLE)
2285: /* stuff for MatSetValues_XXX in single precision */
2286: b->setvalueslen = 0;
2287: b->setvaluescopy = PETSC_NULL;
2288: #endif
2290: /* stuff used in block assembly */
2291: b->barray = 0;
2293: /* stuff used for matrix vector multiply */
2294: b->lvec = 0;
2295: b->Mvctx = 0;
2297: /* stuff for MatGetRow() */
2298: b->rowindices = 0;
2299: b->rowvalues = 0;
2300: b->getrowactive = PETSC_FALSE;
2302: /* hash table stuff */
2303: b->ht = 0;
2304: b->hd = 0;
2305: b->ht_size = 0;
2306: b->ht_flag = PETSC_FALSE;
2307: b->ht_fact = 0;
2308: b->ht_total_ct = 0;
2309: b->ht_insert_ct = 0;
2311: PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2312: PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);
2313: if (flg) {
2314: PetscReal fact = 1.39;
2315: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
2316: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);
2317: if (fact <= 1.0) fact = 1.39;
2318: MatMPIBAIJSetHashTableFactor(B,fact);
2319: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
2320: }
2321: PetscOptionsEnd();
2323: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2324: "MatStoreValues_MPIBAIJ",
2325: MatStoreValues_MPIBAIJ);
2326: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2327: "MatRetrieveValues_MPIBAIJ",
2328: MatRetrieveValues_MPIBAIJ);
2329: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2330: "MatGetDiagonalBlock_MPIBAIJ",
2331: MatGetDiagonalBlock_MPIBAIJ);
2332: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2333: "MatMPIBAIJSetPreallocation_MPIBAIJ",
2334: MatMPIBAIJSetPreallocation_MPIBAIJ);
2335: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2336: "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2337: MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
2338: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2339: "MatDiagonalScaleLocal_MPIBAIJ",
2340: MatDiagonalScaleLocal_MPIBAIJ);
2341: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2342: "MatSetHashTableFactor_MPIBAIJ",
2343: MatSetHashTableFactor_MPIBAIJ);
2344: PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);
2345: return(0);
2346: }
2349: /*MC
2350: MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2352: This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2353: and MATMPIBAIJ otherwise.
2355: Options Database Keys:
2356: . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2358: Level: beginner
2360: .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2361: M*/
2366: PetscErrorCode MatCreate_BAIJ(Mat A)
2367: {
2369: PetscMPIInt size;
2372: MPI_Comm_size(((PetscObject)A)->comm,&size);
2373: if (size == 1) {
2374: MatSetType(A,MATSEQBAIJ);
2375: } else {
2376: MatSetType(A,MATMPIBAIJ);
2377: }
2378: return(0);
2379: }
2384: /*@C
2385: MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2386: (block compressed row). For good matrix assembly performance
2387: the user should preallocate the matrix storage by setting the parameters
2388: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2389: performance can be increased by more than a factor of 50.
2391: Collective on Mat
2393: Input Parameters:
2394: + A - the matrix
2395: . bs - size of blockk
2396: . d_nz - number of block nonzeros per block row in diagonal portion of local
2397: submatrix (same for all local rows)
2398: . d_nnz - array containing the number of block nonzeros in the various block rows
2399: of the in diagonal portion of the local (possibly different for each block
2400: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
2401: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2402: submatrix (same for all local rows).
2403: - o_nnz - array containing the number of nonzeros in the various block rows of the
2404: off-diagonal portion of the local submatrix (possibly different for
2405: each block row) or PETSC_NULL.
2407: If the *_nnz parameter is given then the *_nz parameter is ignored
2409: Options Database Keys:
2410: + -mat_block_size - size of the blocks to use
2411: - -mat_use_hash_table <fact>
2413: Notes:
2414: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2415: than it must be used on all processors that share the object for that argument.
2417: Storage Information:
2418: For a square global matrix we define each processor's diagonal portion
2419: to be its local rows and the corresponding columns (a square submatrix);
2420: each processor's off-diagonal portion encompasses the remainder of the
2421: local matrix (a rectangular submatrix).
2423: The user can specify preallocated storage for the diagonal part of
2424: the local submatrix with either d_nz or d_nnz (not both). Set
2425: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2426: memory allocation. Likewise, specify preallocated storage for the
2427: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2429: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2430: the figure below we depict these three local rows and all columns (0-11).
2432: .vb
2433: 0 1 2 3 4 5 6 7 8 9 10 11
2434: -------------------
2435: row 3 | o o o d d d o o o o o o
2436: row 4 | o o o d d d o o o o o o
2437: row 5 | o o o d d d o o o o o o
2438: -------------------
2439: .ve
2440:
2441: Thus, any entries in the d locations are stored in the d (diagonal)
2442: submatrix, and any entries in the o locations are stored in the
2443: o (off-diagonal) submatrix. Note that the d and the o submatrices are
2444: stored simply in the MATSEQBAIJ format for compressed row storage.
2446: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2447: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2448: In general, for PDE problems in which most nonzeros are near the diagonal,
2449: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2450: or you will get TERRIBLE performance; see the users' manual chapter on
2451: matrices.
2453: You can call MatGetInfo() to get information on how effective the preallocation was;
2454: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2455: You can also run with the option -info and look for messages with the string
2456: malloc in them to see if additional memory allocation was needed.
2458: Level: intermediate
2460: .keywords: matrix, block, aij, compressed row, sparse, parallel
2462: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2463: @*/
2464: PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2465: {
2466: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2469: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);
2470: if (f) {
2471: (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
2472: }
2473: return(0);
2474: }
2478: /*@C
2479: MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2480: (block compressed row). For good matrix assembly performance
2481: the user should preallocate the matrix storage by setting the parameters
2482: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2483: performance can be increased by more than a factor of 50.
2485: Collective on MPI_Comm
2487: Input Parameters:
2488: + comm - MPI communicator
2489: . bs - size of blockk
2490: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2491: This value should be the same as the local size used in creating the
2492: y vector for the matrix-vector product y = Ax.
2493: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2494: This value should be the same as the local size used in creating the
2495: x vector for the matrix-vector product y = Ax.
2496: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2497: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2498: . d_nz - number of nonzero blocks per block row in diagonal portion of local
2499: submatrix (same for all local rows)
2500: . d_nnz - array containing the number of nonzero blocks in the various block rows
2501: of the in diagonal portion of the local (possibly different for each block
2502: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
2503: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
2504: submatrix (same for all local rows).
2505: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
2506: off-diagonal portion of the local submatrix (possibly different for
2507: each block row) or PETSC_NULL.
2509: Output Parameter:
2510: . A - the matrix
2512: Options Database Keys:
2513: + -mat_block_size - size of the blocks to use
2514: - -mat_use_hash_table <fact>
2516: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2517: MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2518: true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2519: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2521: Notes:
2522: If the *_nnz parameter is given then the *_nz parameter is ignored
2524: A nonzero block is any block that as 1 or more nonzeros in it
2526: The user MUST specify either the local or global matrix dimensions
2527: (possibly both).
2529: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2530: than it must be used on all processors that share the object for that argument.
2532: Storage Information:
2533: For a square global matrix we define each processor's diagonal portion
2534: to be its local rows and the corresponding columns (a square submatrix);
2535: each processor's off-diagonal portion encompasses the remainder of the
2536: local matrix (a rectangular submatrix).
2538: The user can specify preallocated storage for the diagonal part of
2539: the local submatrix with either d_nz or d_nnz (not both). Set
2540: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2541: memory allocation. Likewise, specify preallocated storage for the
2542: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2544: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2545: the figure below we depict these three local rows and all columns (0-11).
2547: .vb
2548: 0 1 2 3 4 5 6 7 8 9 10 11
2549: -------------------
2550: row 3 | o o o d d d o o o o o o
2551: row 4 | o o o d d d o o o o o o
2552: row 5 | o o o d d d o o o o o o
2553: -------------------
2554: .ve
2555:
2556: Thus, any entries in the d locations are stored in the d (diagonal)
2557: submatrix, and any entries in the o locations are stored in the
2558: o (off-diagonal) submatrix. Note that the d and the o submatrices are
2559: stored simply in the MATSEQBAIJ format for compressed row storage.
2561: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2562: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2563: In general, for PDE problems in which most nonzeros are near the diagonal,
2564: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2565: or you will get TERRIBLE performance; see the users' manual chapter on
2566: matrices.
2568: Level: intermediate
2570: .keywords: matrix, block, aij, compressed row, sparse, parallel
2572: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2573: @*/
2574: PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2575: {
2577: PetscMPIInt size;
2580: MatCreate(comm,A);
2581: MatSetSizes(*A,m,n,M,N);
2582: MPI_Comm_size(comm,&size);
2583: if (size > 1) {
2584: MatSetType(*A,MATMPIBAIJ);
2585: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2586: } else {
2587: MatSetType(*A,MATSEQBAIJ);
2588: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2589: }
2590: return(0);
2591: }
2595: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2596: {
2597: Mat mat;
2598: Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2600: PetscInt len=0;
2603: *newmat = 0;
2604: MatCreate(((PetscObject)matin)->comm,&mat);
2605: MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);
2606: MatSetType(mat,((PetscObject)matin)->type_name);
2607: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
2609: mat->factor = matin->factor;
2610: mat->preallocated = PETSC_TRUE;
2611: mat->assembled = PETSC_TRUE;
2612: mat->insertmode = NOT_SET_VALUES;
2614: a = (Mat_MPIBAIJ*)mat->data;
2615: mat->rmap.bs = matin->rmap.bs;
2616: a->bs2 = oldmat->bs2;
2617: a->mbs = oldmat->mbs;
2618: a->nbs = oldmat->nbs;
2619: a->Mbs = oldmat->Mbs;
2620: a->Nbs = oldmat->Nbs;
2621:
2622: PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);
2623: PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);
2625: a->size = oldmat->size;
2626: a->rank = oldmat->rank;
2627: a->donotstash = oldmat->donotstash;
2628: a->roworiented = oldmat->roworiented;
2629: a->rowindices = 0;
2630: a->rowvalues = 0;
2631: a->getrowactive = PETSC_FALSE;
2632: a->barray = 0;
2633: a->rstartbs = oldmat->rstartbs;
2634: a->rendbs = oldmat->rendbs;
2635: a->cstartbs = oldmat->cstartbs;
2636: a->cendbs = oldmat->cendbs;
2638: /* hash table stuff */
2639: a->ht = 0;
2640: a->hd = 0;
2641: a->ht_size = 0;
2642: a->ht_flag = oldmat->ht_flag;
2643: a->ht_fact = oldmat->ht_fact;
2644: a->ht_total_ct = 0;
2645: a->ht_insert_ct = 0;
2647: PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));
2648: MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);
2649: MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);
2650: if (oldmat->colmap) {
2651: #if defined (PETSC_USE_CTABLE)
2652: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2653: #else
2654: PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);
2655: PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2656: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
2657: #endif
2658: } else a->colmap = 0;
2660: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2661: PetscMalloc(len*sizeof(PetscInt),&a->garray);
2662: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2663: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
2664: } else a->garray = 0;
2665:
2666: VecDuplicate(oldmat->lvec,&a->lvec);
2667: PetscLogObjectParent(mat,a->lvec);
2668: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2669: PetscLogObjectParent(mat,a->Mvctx);
2671: MatDuplicate(oldmat->A,cpvalues,&a->A);
2672: PetscLogObjectParent(mat,a->A);
2673: MatDuplicate(oldmat->B,cpvalues,&a->B);
2674: PetscLogObjectParent(mat,a->B);
2675: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2676: *newmat = mat;
2678: return(0);
2679: }
2681: #include petscsys.h
2685: PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2686: {
2687: Mat A;
2689: int fd;
2690: PetscInt i,nz,j,rstart,rend;
2691: PetscScalar *vals,*buf;
2692: MPI_Comm comm = ((PetscObject)viewer)->comm;
2693: MPI_Status status;
2694: PetscMPIInt rank,size,maxnz;
2695: PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2696: PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2697: PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2698: PetscMPIInt tag = ((PetscObject)viewer)->tag;
2699: PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2700: PetscInt dcount,kmax,k,nzcount,tmp,mend;
2703: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");
2704: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2705: PetscOptionsEnd();
2707: MPI_Comm_size(comm,&size);
2708: MPI_Comm_rank(comm,&rank);
2709: if (!rank) {
2710: PetscViewerBinaryGetDescriptor(viewer,&fd);
2711: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2712: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2713: }
2715: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2716: M = header[1]; N = header[2];
2718: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2720: /*
2721: This code adds extra rows to make sure the number of rows is
2722: divisible by the blocksize
2723: */
2724: Mbs = M/bs;
2725: extra_rows = bs - M + bs*Mbs;
2726: if (extra_rows == bs) extra_rows = 0;
2727: else Mbs++;
2728: if (extra_rows && !rank) {
2729: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2730: }
2732: /* determine ownership of all rows */
2733: mbs = Mbs/size + ((Mbs % size) > rank);
2734: m = mbs*bs;
2735: PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);
2736: MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
2738: /* process 0 needs enough room for process with most rows */
2739: if (!rank) {
2740: mmax = rowners[1];
2741: for (i=2; i<size; i++) {
2742: mmax = PetscMax(mmax,rowners[i]);
2743: }
2744: mmax*=bs;
2745: } else mmax = m;
2747: rowners[0] = 0;
2748: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2749: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2750: rstart = rowners[rank];
2751: rend = rowners[rank+1];
2753: /* distribute row lengths to all processors */
2754: PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);
2755: if (!rank) {
2756: mend = m;
2757: if (size == 1) mend = mend - extra_rows;
2758: PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);
2759: for (j=mend; j<m; j++) locrowlens[j] = 1;
2760: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
2761: PetscMalloc(size*sizeof(PetscInt),&procsnz);
2762: PetscMemzero(procsnz,size*sizeof(PetscInt));
2763: for (j=0; j<m; j++) {
2764: procsnz[0] += locrowlens[j];
2765: }
2766: for (i=1; i<size; i++) {
2767: mend = browners[i+1] - browners[i];
2768: if (i == size-1) mend = mend - extra_rows;
2769: PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);
2770: for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2771: /* calculate the number of nonzeros on each processor */
2772: for (j=0; j<browners[i+1]-browners[i]; j++) {
2773: procsnz[i] += rowlengths[j];
2774: }
2775: MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);
2776: }
2777: PetscFree(rowlengths);
2778: } else {
2779: MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);
2780: }
2782: if (!rank) {
2783: /* determine max buffer needed and allocate it */
2784: maxnz = procsnz[0];
2785: for (i=1; i<size; i++) {
2786: maxnz = PetscMax(maxnz,procsnz[i]);
2787: }
2788: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
2790: /* read in my part of the matrix column indices */
2791: nz = procsnz[0];
2792: PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
2793: mycols = ibuf;
2794: if (size == 1) nz -= extra_rows;
2795: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2796: if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2798: /* read in every ones (except the last) and ship off */
2799: for (i=1; i<size-1; i++) {
2800: nz = procsnz[i];
2801: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2802: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2803: }
2804: /* read in the stuff for the last proc */
2805: if (size != 1) {
2806: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
2807: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2808: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2809: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
2810: }
2811: PetscFree(cols);
2812: } else {
2813: /* determine buffer space needed for message */
2814: nz = 0;
2815: for (i=0; i<m; i++) {
2816: nz += locrowlens[i];
2817: }
2818: PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
2819: mycols = ibuf;
2820: /* receive message of column indices*/
2821: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2822: MPI_Get_count(&status,MPIU_INT,&maxnz);
2823: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2824: }
2825:
2826: /* loop over local rows, determining number of off diagonal entries */
2827: PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);
2828: PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);
2829: PetscMemzero(mask,Mbs*sizeof(PetscInt));
2830: PetscMemzero(masked1,Mbs*sizeof(PetscInt));
2831: PetscMemzero(masked2,Mbs*sizeof(PetscInt));
2832: rowcount = 0; nzcount = 0;
2833: for (i=0; i<mbs; i++) {
2834: dcount = 0;
2835: odcount = 0;
2836: for (j=0; j<bs; j++) {
2837: kmax = locrowlens[rowcount];
2838: for (k=0; k<kmax; k++) {
2839: tmp = mycols[nzcount++]/bs;
2840: if (!mask[tmp]) {
2841: mask[tmp] = 1;
2842: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2843: else masked1[dcount++] = tmp;
2844: }
2845: }
2846: rowcount++;
2847: }
2848:
2849: dlens[i] = dcount;
2850: odlens[i] = odcount;
2852: /* zero out the mask elements we set */
2853: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2854: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2855: }
2857: /* create our matrix */
2858: MatCreate(comm,&A);
2859: MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);
2860: MatSetType(A,type);CHKERRQ(ierr)
2861: MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);
2863: if (!rank) {
2864: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);
2865: /* read in my part of the matrix numerical values */
2866: nz = procsnz[0];
2867: vals = buf;
2868: mycols = ibuf;
2869: if (size == 1) nz -= extra_rows;
2870: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2871: if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2873: /* insert into matrix */
2874: jj = rstart*bs;
2875: for (i=0; i<m; i++) {
2876: MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2877: mycols += locrowlens[i];
2878: vals += locrowlens[i];
2879: jj++;
2880: }
2881: /* read in other processors (except the last one) and ship out */
2882: for (i=1; i<size-1; i++) {
2883: nz = procsnz[i];
2884: vals = buf;
2885: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2886: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);
2887: }
2888: /* the last proc */
2889: if (size != 1){
2890: nz = procsnz[i] - extra_rows;
2891: vals = buf;
2892: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2893: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2894: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);
2895: }
2896: PetscFree(procsnz);
2897: } else {
2898: /* receive numeric values */
2899: PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);
2901: /* receive message of values*/
2902: vals = buf;
2903: mycols = ibuf;
2904: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);
2905: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2906: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2908: /* insert into matrix */
2909: jj = rstart*bs;
2910: for (i=0; i<m; i++) {
2911: MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2912: mycols += locrowlens[i];
2913: vals += locrowlens[i];
2914: jj++;
2915: }
2916: }
2917: PetscFree(locrowlens);
2918: PetscFree(buf);
2919: PetscFree(ibuf);
2920: PetscFree2(rowners,browners);
2921: PetscFree2(dlens,odlens);
2922: PetscFree3(mask,masked1,masked2);
2923: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2924: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2926: *newmat = A;
2927: return(0);
2928: }
2932: /*@
2933: MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2935: Input Parameters:
2936: . mat - the matrix
2937: . fact - factor
2939: Collective on Mat
2941: Level: advanced
2943: Notes:
2944: This can also be set by the command line option: -mat_use_hash_table <fact>
2946: .keywords: matrix, hashtable, factor, HT
2948: .seealso: MatSetOption()
2949: @*/
2950: PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2951: {
2952: PetscErrorCode ierr,(*f)(Mat,PetscReal);
2955: PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);
2956: if (f) {
2957: (*f)(mat,fact);
2958: }
2959: return(0);
2960: }
2965: PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2966: {
2967: Mat_MPIBAIJ *baij;
2970: baij = (Mat_MPIBAIJ*)mat->data;
2971: baij->ht_fact = fact;
2972: return(0);
2973: }
2978: PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2979: {
2980: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2982: *Ad = a->A;
2983: *Ao = a->B;
2984: *colmap = a->garray;
2985: return(0);
2986: }
2988: /*
2989: Special version for direct calls from Fortran (to eliminate two function call overheads
2990: */
2991: #if defined(PETSC_HAVE_FORTRAN_CAPS)
2992: #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
2993: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
2994: #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
2995: #endif
2999: /*@C
3000: MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3002: Collective on Mat
3004: Input Parameters:
3005: + mat - the matrix
3006: . min - number of input rows
3007: . im - input rows
3008: . nin - number of input columns
3009: . in - input columns
3010: . v - numerical values input
3011: - addvin - INSERT_VALUES or ADD_VALUES
3013: Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3015: Level: advanced
3017: .seealso: MatSetValuesBlocked()
3018: @*/
3019: PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3020: {
3021: /* convert input arguments to C version */
3022: Mat mat = *matin;
3023: PetscInt m = *min, n = *nin;
3024: InsertMode addv = *addvin;
3026: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
3027: const MatScalar *value;
3028: MatScalar *barray=baij->barray;
3029: PetscTruth roworiented = baij->roworiented;
3030: PetscErrorCode ierr;
3031: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
3032: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3033: PetscInt cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
3034:
3036: /* tasks normally handled by MatSetValuesBlocked() */
3037: if (mat->insertmode == NOT_SET_VALUES) {
3038: mat->insertmode = addv;
3039: }
3040: #if defined(PETSC_USE_DEBUG)
3041: else if (mat->insertmode != addv) {
3042: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3043: }
3044: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3045: #endif
3046: if (mat->assembled) {
3047: mat->was_assembled = PETSC_TRUE;
3048: mat->assembled = PETSC_FALSE;
3049: }
3053: if(!barray) {
3054: PetscMalloc(bs2*sizeof(MatScalar),&barray);
3055: baij->barray = barray;
3056: }
3058: if (roworiented) {
3059: stepval = (n-1)*bs;
3060: } else {
3061: stepval = (m-1)*bs;
3062: }
3063: for (i=0; i<m; i++) {
3064: if (im[i] < 0) continue;
3065: #if defined(PETSC_USE_DEBUG)
3066: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3067: #endif
3068: if (im[i] >= rstart && im[i] < rend) {
3069: row = im[i] - rstart;
3070: for (j=0; j<n; j++) {
3071: /* If NumCol = 1 then a copy is not required */
3072: if ((roworiented) && (n == 1)) {
3073: barray = (MatScalar*)v + i*bs2;
3074: } else if((!roworiented) && (m == 1)) {
3075: barray = (MatScalar*)v + j*bs2;
3076: } else { /* Here a copy is required */
3077: if (roworiented) {
3078: value = v + i*(stepval+bs)*bs + j*bs;
3079: } else {
3080: value = v + j*(stepval+bs)*bs + i*bs;
3081: }
3082: for (ii=0; ii<bs; ii++,value+=stepval) {
3083: for (jj=0; jj<bs; jj++) {
3084: *barray++ = *value++;
3085: }
3086: }
3087: barray -=bs2;
3088: }
3089:
3090: if (in[j] >= cstart && in[j] < cend){
3091: col = in[j] - cstart;
3092: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);
3093: }
3094: else if (in[j] < 0) continue;
3095: #if defined(PETSC_USE_DEBUG)
3096: else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3097: #endif
3098: else {
3099: if (mat->was_assembled) {
3100: if (!baij->colmap) {
3101: CreateColmap_MPIBAIJ_Private(mat);
3102: }
3104: #if defined(PETSC_USE_DEBUG)
3105: #if defined (PETSC_USE_CTABLE)
3106: { PetscInt data;
3107: PetscTableFind(baij->colmap,in[j]+1,&data);
3108: if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3109: }
3110: #else
3111: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3112: #endif
3113: #endif
3114: #if defined (PETSC_USE_CTABLE)
3115: PetscTableFind(baij->colmap,in[j]+1,&col);
3116: col = (col - 1)/bs;
3117: #else
3118: col = (baij->colmap[in[j]] - 1)/bs;
3119: #endif
3120: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3121: DisAssemble_MPIBAIJ(mat);
3122: col = in[j];
3123: }
3124: }
3125: else col = in[j];
3126: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);
3127: }
3128: }
3129: } else {
3130: if (!baij->donotstash) {
3131: if (roworiented) {
3132: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3133: } else {
3134: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3135: }
3136: }
3137: }
3138: }
3139:
3140: /* task normally handled by MatSetValuesBlocked() */
3142: return(0);
3143: }