Actual source code: mpisbaij.c
1: /*$Id: mpisbaij.c,v 1.52 2001/04/09 15:17:38 bsmith Exp $*/
3: #include "src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/
4: #include "src/vec/vecimpl.h"
5: #include mpisbaij.h
6: #include "src/mat/impls/sbaij/seq/sbaij.h"
8: extern int MatSetUpMultiply_MPISBAIJ(Mat);
9: extern int DisAssemble_MPISBAIJ(Mat);
10: extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11: extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12: extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13: extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14: extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15: extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16: extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17: extern int MatPrintHelp_SeqSBAIJ(Mat);
18: extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19: extern int MatZeroRows_SeqBAIJ(Mat,IS,Scalar *);
20: extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22: /* UGLY, ugly, ugly
23: When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
24: not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
25: inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
26: converts the entries into single precision and then calls ..._MatScalar() to put them
27: into the single precision data structures.
28: */
29: #if defined(PETSC_USE_MAT_SINGLE)
30: extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31: extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32: extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33: extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34: extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35: #else
36: #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ
37: #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ
38: #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ
39: #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT
40: #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT
41: #endif
43: EXTERN_C_BEGIN
44: int MatStoreValues_MPISBAIJ(Mat mat)
45: {
46: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
47: int ierr;
50: MatStoreValues(aij->A);
51: MatStoreValues(aij->B);
52: return(0);
53: }
54: EXTERN_C_END
56: EXTERN_C_BEGIN
57: int MatRetrieveValues_MPISBAIJ(Mat mat)
58: {
59: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
60: int ierr;
63: MatRetrieveValues(aij->A);
64: MatRetrieveValues(aij->B);
65: return(0);
66: }
67: EXTERN_C_END
69: /*
70: Local utility routine that creates a mapping from the global column
71: number to the local number in the off-diagonal part of the local
72: storage of the matrix. This is done in a non scable way since the
73: length of colmap equals the global matrix length.
74: */
75: static int CreateColmap_MPISBAIJ_Private(Mat mat)
76: {
78: SETERRQ(1,"Function not yet written for SBAIJ format");
79: /* return(0); */
80: }
82: #define CHUNKSIZE 10
84: #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv)
85: {
86:
87: brow = row/bs;
88: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
89: rmax = aimax[brow]; nrow = ailen[brow];
90: bcol = col/bs;
91: ridx = row % bs; cidx = col % bs;
92: low = 0; high = nrow;
93: while (high-low > 3) {
94: t = (low+high)/2;
95: if (rp[t] > bcol) high = t;
96: else low = t;
97: }
98: for (_i=low; _i<high; _i++) {
99: if (rp[_i] > bcol) break;
100: if (rp[_i] == bcol) {
101: bap = ap + bs2*_i + bs*cidx + ridx;
102: if (addv == ADD_VALUES) *bap += value;
103: else *bap = value;
104: goto a_noinsert;
105: }
106: }
107: if (a->nonew == 1) goto a_noinsert;
108: else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
109: if (nrow >= rmax) {
110: /* there is no extra room in row, therefore enlarge */
111: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
112: MatScalar *new_a;
113:
114: if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
115:
116: /* malloc new storage space */
117: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
118: ierr = PetscMalloc(len,&new_a);
119: new_j = (int*)(new_a + bs2*new_nz);
120: new_i = new_j + new_nz;
121:
122: /* copy over old data into new slots */
123: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
124: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
125: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
126: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
127: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
128: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
129: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
130: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
131: aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
132: /* free up old matrix storage */
133: PetscFree(a->a);
134: if (!a->singlemalloc) {
135: PetscFree(a->i);
136: PetscFree(a->j);
137: }
138: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
139: a->singlemalloc = PETSC_TRUE;
140:
141: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
142: rmax = aimax[brow] = aimax[brow] + CHUNKSIZE;
143: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
144: a->s_maxnz += bs2*CHUNKSIZE;
145: a->reallocs++;
146: a->s_nz++;
147: }
148: N = nrow++ - 1;
149: /* shift up all the later entries in this row */
150: for (ii=N; ii>=_i; ii--) {
151: rp[ii+1] = rp[ii];
152: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
153: }
154: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); }
155: rp[_i] = bcol;
156: ap[bs2*_i + bs*cidx + ridx] = value;
157: a_noinsert:;
158: ailen[brow] = nrow;
159: }
160: #ifndef MatSetValues_SeqBAIJ_B_Private
161: #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv)
162: {
163: brow = row/bs;
164: rp = bj + bi[brow]; ap = ba + bs2*bi[brow];
165: rmax = bimax[brow]; nrow = bilen[brow];
166: bcol = col/bs;
167: ridx = row % bs; cidx = col % bs;
168: low = 0; high = nrow;
169: while (high-low > 3) {
170: t = (low+high)/2;
171: if (rp[t] > bcol) high = t;
172: else low = t;
173: }
174: for (_i=low; _i<high; _i++) {
175: if (rp[_i] > bcol) break;
176: if (rp[_i] == bcol) {
177: bap = ap + bs2*_i + bs*cidx + ridx;
178: if (addv == ADD_VALUES) *bap += value;
179: else *bap = value;
180: goto b_noinsert;
181: }
182: }
183: if (b->nonew == 1) goto b_noinsert;
184: else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
185: if (nrow >= rmax) {
186: /* there is no extra room in row, therefore enlarge */
187: int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j;
188: MatScalar *new_a;
189:
190: if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
191:
192: /* malloc new storage space */
193: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int);
194: ierr = PetscMalloc(len,&new_a);
195: new_j = (int*)(new_a + bs2*new_nz);
196: new_i = new_j + new_nz;
197:
198: /* copy over old data into new slots */
199: for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];}
200: for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;}
201: PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));
202: len = (new_nz - CHUNKSIZE - bi[brow] - nrow);
203: PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));
204: PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));
205: PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
206: PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE),
207: ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));
208: /* free up old matrix storage */
209: PetscFree(b->a);
210: if (!b->singlemalloc) {
211: PetscFree(b->i);
212: PetscFree(b->j);
213: }
214: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;
215: b->singlemalloc = PETSC_TRUE;
216:
217: rp = bj + bi[brow]; ap = ba + bs2*bi[brow];
218: rmax = bimax[brow] = bimax[brow] + CHUNKSIZE;
219: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
220: b->maxnz += bs2*CHUNKSIZE;
221: b->reallocs++;
222: b->nz++;
223: }
224: N = nrow++ - 1;
225: /* shift up all the later entries in this row */
226: for (ii=N; ii>=_i; ii--) {
227: rp[ii+1] = rp[ii];
228: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
229: }
230: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));}
231: rp[_i] = bcol;
232: ap[bs2*_i + bs*cidx + ridx] = value;
233: b_noinsert:;
234: bilen[brow] = nrow;
235: }
236: #endif
238: #if defined(PETSC_USE_MAT_SINGLE)
239: int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
240: {
241: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
242: int ierr,i,N = m*n;
243: MatScalar *vsingle;
246: if (N > b->setvalueslen) {
247: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
248: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
249: b->setvalueslen = N;
250: }
251: vsingle = b->setvaluescopy;
253: for (i=0; i<N; i++) {
254: vsingle[i] = v[i];
255: }
256: MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
257: return(0);
258: }
260: int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
261: {
262: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
263: int ierr,i,N = m*n*b->bs2;
264: MatScalar *vsingle;
267: if (N > b->setvalueslen) {
268: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
269: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
270: b->setvalueslen = N;
271: }
272: vsingle = b->setvaluescopy;
273: for (i=0; i<N; i++) {
274: vsingle[i] = v[i];
275: }
276: MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
277: return(0);
278: }
280: int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
281: {
282: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283: int ierr,i,N = m*n;
284: MatScalar *vsingle;
287: SETERRQ(1,"Function not yet written for SBAIJ format");
288: /* return(0); */
289: }
291: int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
292: {
293: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
294: int ierr,i,N = m*n*b->bs2;
295: MatScalar *vsingle;
298: SETERRQ(1,"Function not yet written for SBAIJ format");
299: /* return(0); */
300: }
301: #endif
303: /* Only add/insert a(i,j) with i<=j (blocks).
304: Any a(i,j) with i>j input by user is ingored.
305: */
306: int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
307: {
308: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
309: MatScalar value;
310: PetscTruth roworiented = baij->roworiented;
311: int ierr,i,j,row,col;
312: int rstart_orig=baij->rstart_bs;
313: int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
314: int cend_orig=baij->cend_bs,bs=baij->bs;
316: /* Some Variables required in the macro */
317: Mat A = baij->A;
318: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
319: int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
320: MatScalar *aa=a->a;
322: Mat B = baij->B;
323: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
324: int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
325: MatScalar *ba=b->a;
327: int *rp,ii,nrow,_i,rmax,N,brow,bcol;
328: int low,high,t,ridx,cidx,bs2=a->bs2;
329: MatScalar *ap,*bap;
331: /* for stash */
332: int n_loc, *in_loc=0;
333: MatScalar *v_loc=0;
337: if(!baij->donotstash){
338: PetscMalloc(n*sizeof(int),&in_loc);
339: PetscMalloc(n*sizeof(MatScalar),&v_loc);
340: }
342: for (i=0; i<m; i++) {
343: if (im[i] < 0) continue;
344: #if defined(PETSC_USE_BOPT_g)
345: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
346: #endif
347: if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
348: row = im[i] - rstart_orig; /* local row index */
349: for (j=0; j<n; j++) {
350: if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
351: if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */
352: col = in[j] - cstart_orig; /* local col index */
353: brow = row/bs; bcol = col/bs;
354: if (brow > bcol) continue; /* ignore lower triangular blocks of A */
355: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
356: MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
357: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
358: } else if (in[j] < 0) continue;
359: #if defined(PETSC_USE_BOPT_g)
360: else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
361: #endif
362: else { /* off-diag entry (B) */
363: if (mat->was_assembled) {
364: if (!baij->colmap) {
365: CreateColmap_MPISBAIJ_Private(mat);
366: }
367: #if defined (PETSC_USE_CTABLE)
368: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
369: col = col - 1 + in[j]%bs;
370: #else
371: col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
372: #endif
373: if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
374: DisAssemble_MPISBAIJ(mat);
375: col = in[j];
376: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
377: B = baij->B;
378: b = (Mat_SeqBAIJ*)(B)->data;
379: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
380: ba=b->a;
381: }
382: } else col = in[j];
383: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
384: MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
385: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
386: }
387: }
388: } else { /* off processor entry */
389: if (!baij->donotstash) {
390: n_loc = 0;
391: for (j=0; j<n; j++){
392: if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
393: in_loc[n_loc] = in[j];
394: if (roworiented) {
395: v_loc[n_loc] = v[i*n+j];
396: } else {
397: v_loc[n_loc] = v[j*m+i];
398: }
399: n_loc++;
400: }
401: MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);
402: }
403: }
404: }
406: if(!baij->donotstash){
407: PetscFree(in_loc);
408: PetscFree(v_loc);
409: }
410: return(0);
411: }
413: int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
414: {
416: SETERRQ(1,"Function not yet written for SBAIJ format");
417: /* return(0); */
418: }
420: #define HASH_KEY 0.6180339887
421: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
422: /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
423: /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
424: int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
425: {
427: SETERRQ(1,"Function not yet written for SBAIJ format");
428: /* return(0); */
429: }
431: int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
432: {
434: SETERRQ(1,"Function not yet written for SBAIJ format");
435: /* return(0); */
436: }
438: int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
439: {
440: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
441: int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
442: int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
445: for (i=0; i<m; i++) {
446: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
447: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
448: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
449: row = idxm[i] - bsrstart;
450: for (j=0; j<n; j++) {
451: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
452: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
453: if (idxn[j] >= bscstart && idxn[j] < bscend){
454: col = idxn[j] - bscstart;
455: MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
456: } else {
457: if (!baij->colmap) {
458: CreateColmap_MPISBAIJ_Private(mat);
459: }
460: #if defined (PETSC_USE_CTABLE)
461: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
462: data --;
463: #else
464: data = baij->colmap[idxn[j]/bs]-1;
465: #endif
466: if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
467: else {
468: col = data + idxn[j]%bs;
469: MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
470: }
471: }
472: }
473: } else {
474: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
475: }
476: }
477: return(0);
478: }
480: int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
481: {
482: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
483: /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
484: /* Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; */
485: int ierr;
486: PetscReal sum[2],*lnorm2;
489: if (baij->size == 1) {
490: MatNorm(baij->A,type,norm);
491: } else {
492: if (type == NORM_FROBENIUS) {
493: PetscMalloc(2*sizeof(double),&lnorm2);
494: MatNorm(baij->A,type,lnorm2);
495: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */
496: MatNorm(baij->B,type,lnorm2);
497: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */
498: /*
499: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
500: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %gn",rank,lnorm2[0],lnorm2[1]);
501: */
502: MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);
503: /*
504: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %gn",rank,sum[0],sum[1]);
505: PetscSynchronizedFlush(PETSC_COMM_WORLD); */
506:
507: *norm = sqrt(sum[0] + 2*sum[1]);
508: PetscFree(lnorm2);
509: } else {
510: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
511: }
512: }
513: return(0);
514: }
516: /*
517: Creates the hash table, and sets the table
518: This table is created only once.
519: If new entried need to be added to the matrix
520: then the hash table has to be destroyed and
521: recreated.
522: */
523: int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
524: {
526: SETERRQ(1,"Function not yet written for SBAIJ format");
527: /* return(0); */
528: }
530: int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
531: {
532: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
533: int ierr,nstash,reallocs;
534: InsertMode addv;
537: if (baij->donotstash) {
538: return(0);
539: }
541: /* make sure all processors are either in INSERTMODE or ADDMODE */
542: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
543: if (addv == (ADD_VALUES|INSERT_VALUES)) {
544: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
545: }
546: mat->insertmode = addv; /* in case this processor had no cache */
548: MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);
549: MatStashScatterBegin_Private(&mat->bstash,baij->rowners);
550: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
551: PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
552: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
553: PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
554: return(0);
555: }
557: int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
558: {
559: Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
560: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data;
561: Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data;
562: int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
563: int *row,*col,other_disassembled;
564: PetscTruth r1,r2,r3;
565: MatScalar *val;
566: InsertMode addv = mat->insertmode;
567: /* int rank;*/
570: /* remove 2 line below later */
571: /*MPI_Comm_rank(PETSC_COMM_WORLD, &rank); */
573: if (!baij->donotstash) {
574: while (1) {
575: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
576: /*
577: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%dn",rank,flg);
578: PetscSynchronizedFlush(PETSC_COMM_WORLD);
579: */
580: if (!flg) break;
582: for (i=0; i<n;) {
583: /* Now identify the consecutive vals belonging to the same row */
584: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
585: if (j < n) ncols = j-i;
586: else ncols = n-i;
587: /* Now assemble all these values with a single function call */
588: MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);
589: i = j;
590: }
591: }
592: MatStashScatterEnd_Private(&mat->stash);
593: /* Now process the block-stash. Since the values are stashed column-oriented,
594: set the roworiented flag to column oriented, and after MatSetValues()
595: restore the original flags */
596: r1 = baij->roworiented;
597: r2 = a->roworiented;
598: r3 = b->roworiented;
599: baij->roworiented = PETSC_FALSE;
600: a->roworiented = PETSC_FALSE;
601: b->roworiented = PETSC_FALSE;
602: while (1) {
603: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
604: if (!flg) break;
605:
606: for (i=0; i<n;) {
607: /* Now identify the consecutive vals belonging to the same row */
608: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
609: if (j < n) ncols = j-i;
610: else ncols = n-i;
611: MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
612: i = j;
613: }
614: }
615: MatStashScatterEnd_Private(&mat->bstash);
616: baij->roworiented = r1;
617: a->roworiented = r2;
618: b->roworiented = r3;
619: }
621: MatAssemblyBegin(baij->A,mode);
622: MatAssemblyEnd(baij->A,mode);
624: /* determine if any processor has disassembled, if so we must
625: also disassemble ourselfs, in order that we may reassemble. */
626: /*
627: if nonzero structure of submatrix B cannot change then we know that
628: no processor disassembled thus we can skip this stuff
629: */
630: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
631: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
632: if (mat->was_assembled && !other_disassembled) {
633: DisAssemble_MPISBAIJ(mat);
634: }
635: }
637: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
638: MatSetUpMultiply_MPISBAIJ(mat);
639: }
640: MatAssemblyBegin(baij->B,mode);
641: MatAssemblyEnd(baij->B,mode);
642:
643: #if defined(PETSC_USE_BOPT_g)
644: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
645: PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2fn",((double)baij->ht_total_ct)/baij->ht_insert_ct);
646: baij->ht_total_ct = 0;
647: baij->ht_insert_ct = 0;
648: }
649: #endif
650: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
651: MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);
652: mat->ops->setvalues = MatSetValues_MPISBAIJ_HT;
653: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
654: }
656: if (baij->rowvalues) {
657: PetscFree(baij->rowvalues);
658: baij->rowvalues = 0;
659: }
660: return(0);
661: }
663: static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
664: {
665: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
666: int ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
667: PetscTruth isascii,isdraw;
668: PetscViewer sviewer;
669: PetscViewerFormat format;
672: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
673: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
674: if (isascii) {
675: PetscViewerGetFormat(viewer,&format);
676: if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
677: MatInfo info;
678: MPI_Comm_rank(mat->comm,&rank);
679: MatGetInfo(mat,MAT_LOCAL,&info);
680: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %dn",
681: rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
682: baij->bs,(int)info.memory);
683: MatGetInfo(baij->A,MAT_LOCAL,&info);
684: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used*bs);
685: MatGetInfo(baij->B,MAT_LOCAL,&info);
686: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used*bs);
687: PetscViewerFlush(viewer);
688: VecScatterView(baij->Mvctx,viewer);
689: return(0);
690: } else if (format == PETSC_VIEWER_ASCII_INFO) {
691: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
692: return(0);
693: }
694: }
696: if (isdraw) {
697: PetscDraw draw;
698: PetscTruth isnull;
699: PetscViewerDrawGetDraw(viewer,0,&draw);
700: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
701: }
703: if (size == 1) {
704: MatView(baij->A,viewer);
705: } else {
706: /* assemble the entire matrix onto first processor. */
707: Mat A;
708: Mat_SeqSBAIJ *Aloc;
709: Mat_SeqBAIJ *Bloc;
710: int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
711: MatScalar *a;
713: if (!rank) {
714: MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
715: } else {
716: MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
717: }
718: PetscLogObjectParent(mat,A);
720: /* copy over the A part */
721: Aloc = (Mat_SeqSBAIJ*)baij->A->data;
722: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
723: ierr = PetscMalloc(bs*sizeof(int),&rvals);
725: for (i=0; i<mbs; i++) {
726: rvals[0] = bs*(baij->rstart + i);
727: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
728: for (j=ai[i]; j<ai[i+1]; j++) {
729: col = (baij->cstart+aj[j])*bs;
730: for (k=0; k<bs; k++) {
731: MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
732: col++; a += bs;
733: }
734: }
735: }
736: /* copy over the B part */
737: Bloc = (Mat_SeqBAIJ*)baij->B->data;
738: ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
739: for (i=0; i<mbs; i++) {
740: rvals[0] = bs*(baij->rstart + i);
741: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
742: for (j=ai[i]; j<ai[i+1]; j++) {
743: col = baij->garray[aj[j]]*bs;
744: for (k=0; k<bs; k++) {
745: MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
746: col++; a += bs;
747: }
748: }
749: }
750: PetscFree(rvals);
751: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
752: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
753: /*
754: Everyone has to call to draw the matrix since the graphics waits are
755: synchronized across all processors that share the PetscDraw object
756: */
757: PetscViewerGetSingleton(viewer,&sviewer);
758: if (!rank) {
759: MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);
760: }
761: PetscViewerRestoreSingleton(viewer,&sviewer);
762: MatDestroy(A);
763: }
764: return(0);
765: }
767: int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
768: {
769: int ierr;
770: PetscTruth isascii,isdraw,issocket,isbinary;
773: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
774: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
775: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
776: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
777: if (isascii || isdraw || issocket || isbinary) {
778: MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);
779: } else {
780: SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
781: }
782: return(0);
783: }
785: int MatDestroy_MPISBAIJ(Mat mat)
786: {
787: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
788: int ierr;
791: #if defined(PETSC_USE_LOG)
792: PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
793: #endif
794: MatStashDestroy_Private(&mat->stash);
795: MatStashDestroy_Private(&mat->bstash);
796: PetscFree(baij->rowners);
797: MatDestroy(baij->A);
798: MatDestroy(baij->B);
799: #if defined (PETSC_USE_CTABLE)
800: if (baij->colmap) {PetscTableDelete(baij->colmap);}
801: #else
802: if (baij->colmap) {PetscFree(baij->colmap);}
803: #endif
804: if (baij->garray) {PetscFree(baij->garray);}
805: if (baij->lvec) {VecDestroy(baij->lvec);}
806: if (baij->Mvctx) {VecScatterDestroy(baij->Mvctx);}
807: if (baij->rowvalues) {PetscFree(baij->rowvalues);}
808: if (baij->barray) {PetscFree(baij->barray);}
809: if (baij->hd) {PetscFree(baij->hd);}
810: #if defined(PETSC_USE_MAT_SINGLE)
811: if (baij->setvaluescopy) {PetscFree(baij->setvaluescopy);}
812: #endif
813: PetscFree(baij);
814: return(0);
815: }
817: int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
818: {
819: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
820: int ierr,nt;
823: VecGetLocalSize(xx,&nt);
824: if (nt != A->n) {
825: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
826: }
827: VecGetLocalSize(yy,&nt);
828: if (nt != A->m) {
829: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
830: }
832: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
833: /* do diagonal part */
834: (*a->A->ops->mult)(a->A,xx,yy);
835: /* do supperdiagonal part */
836: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
837: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
838: /* do subdiagonal part */
839: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
840: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
841: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
843: return(0);
844: }
846: int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
847: {
848: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
849: int ierr;
852: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
853: /* do diagonal part */
854: (*a->A->ops->multadd)(a->A,xx,yy,zz);
855: /* do supperdiagonal part */
856: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
857: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
859: /* do subdiagonal part */
860: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
861: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
862: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
864: return(0);
865: }
867: int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
868: {
870: SETERRQ(1,"Matrix is symmetric. Call MatMult().");
871: /* return(0); */
872: }
874: int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
875: {
877: SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
878: /* return(0); */
879: }
881: /*
882: This only works correctly for square matrices where the subblock A->A is the
883: diagonal block
884: */
885: int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
886: {
887: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
888: int ierr;
891: /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
892: MatGetDiagonal(a->A,v);
893: return(0);
894: }
896: int MatScale_MPISBAIJ(Scalar *aa,Mat A)
897: {
898: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
899: int ierr;
902: MatScale(aa,a->A);
903: MatScale(aa,a->B);
904: return(0);
905: }
907: int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
908: {
909: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
912: if (m) *m = mat->rstart*mat->bs;
913: if (n) *n = mat->rend*mat->bs;
914: return(0);
915: }
917: int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
918: {
919: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
920: Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
921: int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
922: int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
923: int *cmap,*idx_p,cstart = mat->cstart;
926: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
927: mat->getrowactive = PETSC_TRUE;
929: if (!mat->rowvalues && (idx || v)) {
930: /*
931: allocate enough space to hold information from the longest row.
932: */
933: Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
934: Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data;
935: int max = 1,mbs = mat->mbs,tmp;
936: for (i=0; i<mbs; i++) {
937: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
938: if (max < tmp) { max = tmp; }
939: }
940: PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);
941: mat->rowindices = (int*)(mat->rowvalues + max*bs2);
942: }
943:
944: if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
945: lrow = row - brstart; /* local row index */
947: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
948: if (!v) {pvA = 0; pvB = 0;}
949: if (!idx) {pcA = 0; if (!v) pcB = 0;}
950: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
951: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
952: nztot = nzA + nzB;
954: cmap = mat->garray;
955: if (v || idx) {
956: if (nztot) {
957: /* Sort by increasing column numbers, assuming A and B already sorted */
958: int imark = -1;
959: if (v) {
960: *v = v_p = mat->rowvalues;
961: for (i=0; i<nzB; i++) {
962: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
963: else break;
964: }
965: imark = i;
966: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
967: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
968: }
969: if (idx) {
970: *idx = idx_p = mat->rowindices;
971: if (imark > -1) {
972: for (i=0; i<imark; i++) {
973: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
974: }
975: } else {
976: for (i=0; i<nzB; i++) {
977: if (cmap[cworkB[i]/bs] < cstart)
978: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
979: else break;
980: }
981: imark = i;
982: }
983: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
984: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
985: }
986: } else {
987: if (idx) *idx = 0;
988: if (v) *v = 0;
989: }
990: }
991: *nz = nztot;
992: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
993: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
994: return(0);
995: }
997: int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
998: {
999: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1002: if (baij->getrowactive == PETSC_FALSE) {
1003: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1004: }
1005: baij->getrowactive = PETSC_FALSE;
1006: return(0);
1007: }
1009: int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1010: {
1011: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1014: *bs = baij->bs;
1015: return(0);
1016: }
1018: int MatZeroEntries_MPISBAIJ(Mat A)
1019: {
1020: Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1021: int ierr;
1024: MatZeroEntries(l->A);
1025: MatZeroEntries(l->B);
1026: return(0);
1027: }
1029: int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1030: {
1031: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1032: Mat A = a->A,B = a->B;
1033: int ierr;
1034: PetscReal isend[5],irecv[5];
1037: info->block_size = (double)a->bs;
1038: MatGetInfo(A,MAT_LOCAL,info);
1039: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1040: isend[3] = info->memory; isend[4] = info->mallocs;
1041: MatGetInfo(B,MAT_LOCAL,info);
1042: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1043: isend[3] += info->memory; isend[4] += info->mallocs;
1044: if (flag == MAT_LOCAL) {
1045: info->nz_used = isend[0];
1046: info->nz_allocated = isend[1];
1047: info->nz_unneeded = isend[2];
1048: info->memory = isend[3];
1049: info->mallocs = isend[4];
1050: } else if (flag == MAT_GLOBAL_MAX) {
1051: MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
1052: info->nz_used = irecv[0];
1053: info->nz_allocated = irecv[1];
1054: info->nz_unneeded = irecv[2];
1055: info->memory = irecv[3];
1056: info->mallocs = irecv[4];
1057: } else if (flag == MAT_GLOBAL_SUM) {
1058: MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
1059: info->nz_used = irecv[0];
1060: info->nz_allocated = irecv[1];
1061: info->nz_unneeded = irecv[2];
1062: info->memory = irecv[3];
1063: info->mallocs = irecv[4];
1064: } else {
1065: SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1066: }
1067: info->rows_global = (double)A->M;
1068: info->columns_global = (double)A->N;
1069: info->rows_local = (double)A->m;
1070: info->columns_local = (double)A->N;
1071: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1072: info->fill_ratio_needed = 0;
1073: info->factor_mallocs = 0;
1074: return(0);
1075: }
1077: int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1078: {
1079: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1080: int ierr;
1083: if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1084: op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1085: op == MAT_COLUMNS_UNSORTED ||
1086: op == MAT_COLUMNS_SORTED ||
1087: op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1088: op == MAT_KEEP_ZEROED_ROWS ||
1089: op == MAT_NEW_NONZERO_LOCATION_ERR) {
1090: MatSetOption(a->A,op);
1091: MatSetOption(a->B,op);
1092: } else if (op == MAT_ROW_ORIENTED) {
1093: a->roworiented = PETSC_TRUE;
1094: MatSetOption(a->A,op);
1095: MatSetOption(a->B,op);
1096: } else if (op == MAT_ROWS_SORTED ||
1097: op == MAT_ROWS_UNSORTED ||
1098: op == MAT_YES_NEW_DIAGONALS ||
1099: op == MAT_USE_HASH_TABLE) {
1100: PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignoredn");
1101: } else if (op == MAT_COLUMN_ORIENTED) {
1102: a->roworiented = PETSC_FALSE;
1103: MatSetOption(a->A,op);
1104: MatSetOption(a->B,op);
1105: } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1106: a->donotstash = PETSC_TRUE;
1107: } else if (op == MAT_NO_NEW_DIAGONALS) {
1108: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1109: } else if (op == MAT_USE_HASH_TABLE) {
1110: a->ht_flag = PETSC_TRUE;
1111: } else {
1112: SETERRQ(PETSC_ERR_SUP,"unknown option");
1113: }
1114: return(0);
1115: }
1117: int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1118: {
1120: SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
1121: /* return(0); */
1122: }
1124: int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1125: {
1126: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1127: Mat a = baij->A,b = baij->B;
1128: int ierr,s1,s2,s3;
1131: if (ll != rr) {
1132: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be samen");
1133: }
1134: MatGetLocalSize(mat,&s2,&s3);
1135: if (rr) {
1136: VecGetLocalSize(rr,&s1);
1137: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1138: /* Overlap communication with computation. */
1139: VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1140: /*} if (ll) { */
1141: VecGetLocalSize(ll,&s1);
1142: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1143: (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1144: /* } */
1145: /* scale the diagonal block */
1146: (*a->ops->diagonalscale)(a,ll,rr);
1148: /* if (rr) { */
1149: /* Do a scatter end and then right scale the off-diagonal block */
1150: VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1151: (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1152: }
1153:
1154: return(0);
1155: }
1157: int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1158: {
1159: Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1160: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1161: int *procs,*nprocs,j,idx,nsends,*work,row;
1162: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1163: int *rvalues,tag = A->tag,count,base,slen,n,*source;
1164: int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1165: MPI_Comm comm = A->comm;
1166: MPI_Request *send_waits,*recv_waits;
1167: MPI_Status recv_status,*send_status;
1168: IS istmp;
1169: PetscTruth found;
1172: ISGetSize(is,&N);
1173: ISGetIndices(is,&rows);
1174:
1175: /* first count number of contributors to each processor */
1176: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
1177: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
1178: procs = nprocs + size;
1179: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
1180: for (i=0; i<N; i++) {
1181: idx = rows[i];
1182: found = PETSC_FALSE;
1183: for (j=0; j<size; j++) {
1184: if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1185: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1186: }
1187: }
1188: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1189: }
1190: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
1191:
1192: /* inform other processors of number of messages and max length*/
1193: ierr = PetscMalloc(2*size*sizeof(int),&work);
1194: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
1195: nmax = work[rank];
1196: nrecvs = work[size+rank];
1197: ierr = PetscFree(work);
1198:
1199: /* post receives: */
1200: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1201: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1202: for (i=0; i<nrecvs; i++) {
1203: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1204: }
1205:
1206: /* do sends:
1207: 1) starts[i] gives the starting index in svalues for stuff going to
1208: the ith processor
1209: */
1210: PetscMalloc((N+1)*sizeof(int),&svalues);
1211: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1212: PetscMalloc((size+1)*sizeof(int),&starts);
1213: starts[0] = 0;
1214: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1215: for (i=0; i<N; i++) {
1216: svalues[starts[owner[i]]++] = rows[i];
1217: }
1218: ISRestoreIndices(is,&rows);
1219:
1220: starts[0] = 0;
1221: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1222: count = 0;
1223: for (i=0; i<size; i++) {
1224: if (procs[i]) {
1225: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
1226: }
1227: }
1228: PetscFree(starts);
1230: base = owners[rank]*bs;
1231:
1232: /* wait on receives */
1233: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1234: source = lens + nrecvs;
1235: count = nrecvs; slen = 0;
1236: while (count) {
1237: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1238: /* unpack receives into our local space */
1239: MPI_Get_count(&recv_status,MPI_INT,&n);
1240: source[imdex] = recv_status.MPI_SOURCE;
1241: lens[imdex] = n;
1242: slen += n;
1243: count--;
1244: }
1245: PetscFree(recv_waits);
1246:
1247: /* move the data into the send scatter */
1248: PetscMalloc((slen+1)*sizeof(int),&lrows);
1249: count = 0;
1250: for (i=0; i<nrecvs; i++) {
1251: values = rvalues + i*nmax;
1252: for (j=0; j<lens[i]; j++) {
1253: lrows[count++] = values[j] - base;
1254: }
1255: }
1256: PetscFree(rvalues);
1257: PetscFree(lens);
1258: PetscFree(owner);
1259: PetscFree(nprocs);
1260:
1261: /* actually zap the local rows */
1262: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1263: PetscLogObjectParent(A,istmp);
1265: /*
1266: Zero the required rows. If the "diagonal block" of the matrix
1267: is square and the user wishes to set the diagonal we use seperate
1268: code so that MatSetValues() is not called for each diagonal allocating
1269: new memory, thus calling lots of mallocs and slowing things down.
1271: Contributed by: Mathew Knepley
1272: */
1273: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1274: MatZeroRows_SeqBAIJ(l->B,istmp,0);
1275: if (diag && (l->A->M == l->A->N)) {
1276: MatZeroRows_SeqSBAIJ(l->A,istmp,diag);
1277: } else if (diag) {
1278: MatZeroRows_SeqSBAIJ(l->A,istmp,0);
1279: if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1280: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options n
1281: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1282: }
1283: for (i=0; i<slen; i++) {
1284: row = lrows[i] + rstart_bs;
1285: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
1286: }
1287: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1288: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1289: } else {
1290: MatZeroRows_SeqSBAIJ(l->A,istmp,0);
1291: }
1293: ISDestroy(istmp);
1294: PetscFree(lrows);
1296: /* wait on sends */
1297: if (nsends) {
1298: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1299: ierr = MPI_Waitall(nsends,send_waits,send_status);
1300: ierr = PetscFree(send_status);
1301: }
1302: PetscFree(send_waits);
1303: PetscFree(svalues);
1305: return(0);
1306: }
1308: int MatPrintHelp_MPISBAIJ(Mat A)
1309: {
1310: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1311: MPI_Comm comm = A->comm;
1312: static int called = 0;
1313: int ierr;
1316: if (!a->rank) {
1317: MatPrintHelp_SeqSBAIJ(a->A);
1318: }
1319: if (called) {return(0);} else called = 1;
1320: (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):n");
1321: (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assemblyn");
1322: return(0);
1323: }
1325: int MatSetUnfactored_MPISBAIJ(Mat A)
1326: {
1327: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1328: int ierr;
1331: MatSetUnfactored(a->A);
1332: return(0);
1333: }
1335: static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1337: int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1338: {
1339: Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1340: Mat a,b,c,d;
1341: PetscTruth flg;
1342: int ierr;
1345: PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);
1346: if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1347: a = matA->A; b = matA->B;
1348: c = matB->A; d = matB->B;
1350: MatEqual(a,c,&flg);
1351: if (flg == PETSC_TRUE) {
1352: MatEqual(b,d,&flg);
1353: }
1354: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1355: return(0);
1356: }
1358: int MatSetUpPreallocation_MPISBAIJ(Mat A)
1359: {
1360: int ierr;
1363: MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1364: return(0);
1365: }
1366: /* -------------------------------------------------------------------*/
1367: static struct _MatOps MatOps_Values = {
1368: MatSetValues_MPISBAIJ,
1369: MatGetRow_MPISBAIJ,
1370: MatRestoreRow_MPISBAIJ,
1371: MatMult_MPISBAIJ,
1372: MatMultAdd_MPISBAIJ,
1373: MatMultTranspose_MPISBAIJ,
1374: MatMultTransposeAdd_MPISBAIJ,
1375: 0,
1376: 0,
1377: 0,
1378: 0,
1379: 0,
1380: 0,
1381: 0,
1382: MatTranspose_MPISBAIJ,
1383: MatGetInfo_MPISBAIJ,
1384: MatEqual_MPISBAIJ,
1385: MatGetDiagonal_MPISBAIJ,
1386: MatDiagonalScale_MPISBAIJ,
1387: MatNorm_MPISBAIJ,
1388: MatAssemblyBegin_MPISBAIJ,
1389: MatAssemblyEnd_MPISBAIJ,
1390: 0,
1391: MatSetOption_MPISBAIJ,
1392: MatZeroEntries_MPISBAIJ,
1393: MatZeroRows_MPISBAIJ,
1394: 0,
1395: 0,
1396: 0,
1397: 0,
1398: MatSetUpPreallocation_MPISBAIJ,
1399: 0,
1400: MatGetOwnershipRange_MPISBAIJ,
1401: 0,
1402: 0,
1403: 0,
1404: 0,
1405: MatDuplicate_MPISBAIJ,
1406: 0,
1407: 0,
1408: 0,
1409: 0,
1410: 0,
1411: MatGetSubMatrices_MPISBAIJ,
1412: MatIncreaseOverlap_MPISBAIJ,
1413: MatGetValues_MPISBAIJ,
1414: 0,
1415: MatPrintHelp_MPISBAIJ,
1416: MatScale_MPISBAIJ,
1417: 0,
1418: 0,
1419: 0,
1420: MatGetBlockSize_MPISBAIJ,
1421: 0,
1422: 0,
1423: 0,
1424: 0,
1425: 0,
1426: 0,
1427: MatSetUnfactored_MPISBAIJ,
1428: 0,
1429: MatSetValuesBlocked_MPISBAIJ,
1430: 0,
1431: 0,
1432: 0,
1433: MatGetMaps_Petsc,
1434: 0,
1435: 0,
1436: 0,
1437: 0,
1438: 0,
1439: 0,
1440: MatGetRowMax_MPISBAIJ};
1443: EXTERN_C_BEGIN
1444: int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1445: {
1447: *a = ((Mat_MPISBAIJ *)A->data)->A;
1448: *iscopy = PETSC_FALSE;
1449: return(0);
1450: }
1451: EXTERN_C_END
1453: EXTERN_C_BEGIN
1454: int MatCreate_MPISBAIJ(Mat B)
1455: {
1456: Mat_MPISBAIJ *b;
1457: int ierr;
1458: PetscTruth flg;
1462: ierr = PetscNew(Mat_MPISBAIJ,&b);
1463: B->data = (void*)b;
1464: ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));
1465: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1467: B->ops->destroy = MatDestroy_MPISBAIJ;
1468: B->ops->view = MatView_MPISBAIJ;
1469: B->mapping = 0;
1470: B->factor = 0;
1471: B->assembled = PETSC_FALSE;
1473: B->insertmode = NOT_SET_VALUES;
1474: MPI_Comm_rank(B->comm,&b->rank);
1475: MPI_Comm_size(B->comm,&b->size);
1477: /* build local table of row and column ownerships */
1478: ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);
1479: b->cowners = b->rowners + b->size + 2;
1480: b->rowners_bs = b->cowners + b->size + 2;
1481: PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1483: /* build cache for off array entries formed */
1484: MatStashCreate_Private(B->comm,1,&B->stash);
1485: b->donotstash = PETSC_FALSE;
1486: b->colmap = PETSC_NULL;
1487: b->garray = PETSC_NULL;
1488: b->roworiented = PETSC_TRUE;
1490: #if defined(PETSC_USE_MAT_SINGLE)
1491: /* stuff for MatSetValues_XXX in single precision */
1492: b->setvalueslen = 0;
1493: b->setvaluescopy = PETSC_NULL;
1494: #endif
1496: /* stuff used in block assembly */
1497: b->barray = 0;
1499: /* stuff used for matrix vector multiply */
1500: b->lvec = 0;
1501: b->Mvctx = 0;
1503: /* stuff for MatGetRow() */
1504: b->rowindices = 0;
1505: b->rowvalues = 0;
1506: b->getrowactive = PETSC_FALSE;
1508: /* hash table stuff */
1509: b->ht = 0;
1510: b->hd = 0;
1511: b->ht_size = 0;
1512: b->ht_flag = PETSC_FALSE;
1513: b->ht_fact = 0;
1514: b->ht_total_ct = 0;
1515: b->ht_insert_ct = 0;
1517: PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);
1518: if (flg) {
1519: double fact = 1.39;
1520: MatSetOption(B,MAT_USE_HASH_TABLE);
1521: PetscOptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);
1522: if (fact <= 1.0) fact = 1.39;
1523: MatMPIBAIJSetHashTableFactor(B,fact);
1524: PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2fn",fact);
1525: }
1526: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1527: "MatStoreValues_MPISBAIJ",
1528: MatStoreValues_MPISBAIJ);
1529: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1530: "MatRetrieveValues_MPISBAIJ",
1531: MatRetrieveValues_MPISBAIJ);
1532: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1533: "MatGetDiagonalBlock_MPISBAIJ",
1534: MatGetDiagonalBlock_MPISBAIJ);
1535: return(0);
1536: }
1537: EXTERN_C_END
1539: /*@C
1540: MatMPISBAIJSetPreallocation - For good matrix assembly performance
1541: the user should preallocate the matrix storage by setting the parameters
1542: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
1543: performance can be increased by more than a factor of 50.
1545: Collective on Mat
1547: Input Parameters:
1548: + A - the matrix
1549: . bs - size of blockk
1550: . d_nz - number of block nonzeros per block row in diagonal portion of local
1551: submatrix (same for all local rows)
1552: . d_nnz - array containing the number of block nonzeros in the various block rows
1553: of the in diagonal portion of the local (possibly different for each block
1554: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
1555: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
1556: submatrix (same for all local rows).
1557: - o_nnz - array containing the number of nonzeros in the various block rows of the
1558: off-diagonal portion of the local submatrix (possibly different for
1559: each block row) or PETSC_NULL.
1562: Options Database Keys:
1563: . -mat_no_unroll - uses code that does not unroll the loops in the
1564: block calculations (much slower)
1565: . -mat_block_size - size of the blocks to use
1567: Notes:
1569: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1570: than it must be used on all processors that share the object for that argument.
1572: Storage Information:
1573: For a square global matrix we define each processor's diagonal portion
1574: to be its local rows and the corresponding columns (a square submatrix);
1575: each processor's off-diagonal portion encompasses the remainder of the
1576: local matrix (a rectangular submatrix).
1578: The user can specify preallocated storage for the diagonal part of
1579: the local submatrix with either d_nz or d_nnz (not both). Set
1580: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1581: memory allocation. Likewise, specify preallocated storage for the
1582: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1584: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1585: the figure below we depict these three local rows and all columns (0-11).
1587: .vb
1588: 0 1 2 3 4 5 6 7 8 9 10 11
1589: -------------------
1590: row 3 | o o o d d d o o o o o o
1591: row 4 | o o o d d d o o o o o o
1592: row 5 | o o o d d d o o o o o o
1593: -------------------
1594: .ve
1595:
1596: Thus, any entries in the d locations are stored in the d (diagonal)
1597: submatrix, and any entries in the o locations are stored in the
1598: o (off-diagonal) submatrix. Note that the d and the o submatrices are
1599: stored simply in the MATSEQBAIJ format for compressed row storage.
1601: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1602: and o_nz should indicate the number of block nonzeros per row in the o matrix.
1603: In general, for PDE problems in which most nonzeros are near the diagonal,
1604: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1605: or you will get TERRIBLE performance; see the users' manual chapter on
1606: matrices.
1608: Level: intermediate
1610: .keywords: matrix, block, aij, compressed row, sparse, parallel
1612: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1613: @*/
1615: int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1616: {
1617: Mat_MPISBAIJ *b;
1618: int ierr,i,mbs,Mbs;
1619: PetscTruth flg2;
1622: PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);
1623: if (!flg2) return(0);
1625: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
1627: if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1628: if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1629: if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1630: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1631: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1632: if (d_nnz) {
1633: for (i=0; i<B->m/bs; i++) {
1634: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1635: }
1636: }
1637: if (o_nnz) {
1638: for (i=0; i<B->m/bs; i++) {
1639: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1640: }
1641: }
1642: B->preallocated = PETSC_TRUE;
1643: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
1644: PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);
1645: MapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1646: MapCreateMPI(B->comm,B->m,B->M,&B->cmap);
1648: b = (Mat_MPISBAIJ*)B->data;
1649: mbs = B->m/bs;
1650: Mbs = B->M/bs;
1651: if (mbs*bs != B->m) {
1652: SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1653: }
1655: b->bs = bs;
1656: b->bs2 = bs*bs;
1657: b->mbs = mbs;
1658: b->nbs = mbs;
1659: b->Mbs = Mbs;
1660: b->Nbs = Mbs;
1662: MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1663: b->rowners[0] = 0;
1664: for (i=2; i<=b->size; i++) {
1665: b->rowners[i] += b->rowners[i-1];
1666: }
1667: b->rstart = b->rowners[b->rank];
1668: b->rend = b->rowners[b->rank+1];
1669: b->cstart = b->rstart;
1670: b->cend = b->rend;
1671: for (i=0; i<=b->size; i++) {
1672: b->rowners_bs[i] = b->rowners[i]*bs;
1673: }
1674: b->rstart_bs = b-> rstart*bs;
1675: b->rend_bs = b->rend*bs;
1676:
1677: b->cstart_bs = b->cstart*bs;
1678: b->cend_bs = b->cend*bs;
1679:
1681: MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);
1682: PetscLogObjectParent(B,b->A);
1683: MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);
1684: PetscLogObjectParent(B,b->B);
1686: /* build cache for off array entries formed */
1687: MatStashCreate_Private(B->comm,bs,&B->bstash);
1689: return(0);
1690: }
1692: /*@C
1693: MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1694: (block compressed row). For good matrix assembly performance
1695: the user should preallocate the matrix storage by setting the parameters
1696: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
1697: performance can be increased by more than a factor of 50.
1699: Collective on MPI_Comm
1701: Input Parameters:
1702: + comm - MPI communicator
1703: . bs - size of blockk
1704: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1705: This value should be the same as the local size used in creating the
1706: y vector for the matrix-vector product y = Ax.
1707: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1708: This value should be the same as the local size used in creating the
1709: x vector for the matrix-vector product y = Ax.
1710: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1711: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1712: . d_nz - number of block nonzeros per block row in diagonal portion of local
1713: submatrix (same for all local rows)
1714: . d_nnz - array containing the number of block nonzeros in the various block rows
1715: of the in diagonal portion of the local (possibly different for each block
1716: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
1717: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
1718: submatrix (same for all local rows).
1719: - o_nnz - array containing the number of nonzeros in the various block rows of the
1720: off-diagonal portion of the local submatrix (possibly different for
1721: each block row) or PETSC_NULL.
1723: Output Parameter:
1724: . A - the matrix
1726: Options Database Keys:
1727: . -mat_no_unroll - uses code that does not unroll the loops in the
1728: block calculations (much slower)
1729: . -mat_block_size - size of the blocks to use
1730: . -mat_mpi - use the parallel matrix data structures even on one processor
1731: (defaults to using SeqBAIJ format on one processor)
1733: Notes:
1734: The user MUST specify either the local or global matrix dimensions
1735: (possibly both).
1737: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1738: than it must be used on all processors that share the object for that argument.
1740: Storage Information:
1741: For a square global matrix we define each processor's diagonal portion
1742: to be its local rows and the corresponding columns (a square submatrix);
1743: each processor's off-diagonal portion encompasses the remainder of the
1744: local matrix (a rectangular submatrix).
1746: The user can specify preallocated storage for the diagonal part of
1747: the local submatrix with either d_nz or d_nnz (not both). Set
1748: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1749: memory allocation. Likewise, specify preallocated storage for the
1750: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1752: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1753: the figure below we depict these three local rows and all columns (0-11).
1755: .vb
1756: 0 1 2 3 4 5 6 7 8 9 10 11
1757: -------------------
1758: row 3 | o o o d d d o o o o o o
1759: row 4 | o o o d d d o o o o o o
1760: row 5 | o o o d d d o o o o o o
1761: -------------------
1762: .ve
1763:
1764: Thus, any entries in the d locations are stored in the d (diagonal)
1765: submatrix, and any entries in the o locations are stored in the
1766: o (off-diagonal) submatrix. Note that the d and the o submatrices are
1767: stored simply in the MATSEQBAIJ format for compressed row storage.
1769: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1770: and o_nz should indicate the number of block nonzeros per row in the o matrix.
1771: In general, for PDE problems in which most nonzeros are near the diagonal,
1772: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1773: or you will get TERRIBLE performance; see the users' manual chapter on
1774: matrices.
1776: Level: intermediate
1778: .keywords: matrix, block, aij, compressed row, sparse, parallel
1780: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1781: @*/
1783: int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1784: {
1785: int ierr,size;
1788: MatCreate(comm,m,n,M,N,A);
1789: MPI_Comm_size(comm,&size);
1790: if (size > 1) {
1791: MatSetType(*A,MATMPISBAIJ);
1792: MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
1793: } else {
1794: MatSetType(*A,MATSEQSBAIJ);
1795: MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
1796: }
1797: return(0);
1798: }
1801: static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1802: {
1803: Mat mat;
1804: Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1805: int ierr,len=0;
1808: *newmat = 0;
1809: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1810: MatSetType(mat,MATMPISBAIJ);
1811: mat->preallocated = PETSC_TRUE;
1812: a = (Mat_MPISBAIJ*)mat->data;
1813: a->bs = oldmat->bs;
1814: a->bs2 = oldmat->bs2;
1815: a->mbs = oldmat->mbs;
1816: a->nbs = oldmat->nbs;
1817: a->Mbs = oldmat->Mbs;
1818: a->Nbs = oldmat->Nbs;
1819:
1820: a->rstart = oldmat->rstart;
1821: a->rend = oldmat->rend;
1822: a->cstart = oldmat->cstart;
1823: a->cend = oldmat->cend;
1824: a->size = oldmat->size;
1825: a->rank = oldmat->rank;
1826: a->donotstash = oldmat->donotstash;
1827: a->roworiented = oldmat->roworiented;
1828: a->rowindices = 0;
1829: a->rowvalues = 0;
1830: a->getrowactive = PETSC_FALSE;
1831: a->barray = 0;
1832: a->rstart_bs = oldmat->rstart_bs;
1833: a->rend_bs = oldmat->rend_bs;
1834: a->cstart_bs = oldmat->cstart_bs;
1835: a->cend_bs = oldmat->cend_bs;
1837: /* hash table stuff */
1838: a->ht = 0;
1839: a->hd = 0;
1840: a->ht_size = 0;
1841: a->ht_flag = oldmat->ht_flag;
1842: a->ht_fact = oldmat->ht_fact;
1843: a->ht_total_ct = 0;
1844: a->ht_insert_ct = 0;
1846: PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);
1847: PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1848: a->cowners = a->rowners + a->size + 2;
1849: a->rowners_bs = a->cowners + a->size + 2;
1850: PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
1851: MatStashCreate_Private(matin->comm,1,&mat->stash);
1852: MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);
1853: if (oldmat->colmap) {
1854: #if defined (PETSC_USE_CTABLE)
1855: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1856: #else
1857: PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);
1858: PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1859: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
1860: #endif
1861: } else a->colmap = 0;
1862: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1863: PetscMalloc(len*sizeof(int),&a->garray);
1864: PetscLogObjectMemory(mat,len*sizeof(int));
1865: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1866: } else a->garray = 0;
1867:
1868: VecDuplicate(oldmat->lvec,&a->lvec);
1869: PetscLogObjectParent(mat,a->lvec);
1870: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1872: PetscLogObjectParent(mat,a->Mvctx);
1873: MatDuplicate(oldmat->A,cpvalues,&a->A);
1874: PetscLogObjectParent(mat,a->A);
1875: MatDuplicate(oldmat->B,cpvalues,&a->B);
1876: PetscLogObjectParent(mat,a->B);
1877: PetscFListDuplicate(mat->qlist,&matin->qlist);
1878: *newmat = mat;
1879: return(0);
1880: }
1882: #include "petscsys.h"
1884: EXTERN_C_BEGIN
1885: int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1886: {
1887: Mat A;
1888: int i,nz,ierr,j,rstart,rend,fd;
1889: Scalar *vals,*buf;
1890: MPI_Comm comm = ((PetscObject)viewer)->comm;
1891: MPI_Status status;
1892: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
1893: int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
1894: int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
1895: int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1896: int dcount,kmax,k,nzcount,tmp;
1897:
1899: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1901: MPI_Comm_size(comm,&size);
1902: MPI_Comm_rank(comm,&rank);
1903: if (!rank) {
1904: PetscViewerBinaryGetDescriptor(viewer,&fd);
1905: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1906: if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1907: if (header[3] < 0) {
1908: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
1909: }
1910: }
1912: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1913: M = header[1]; N = header[2];
1915: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1917: /*
1918: This code adds extra rows to make sure the number of rows is
1919: divisible by the blocksize
1920: */
1921: Mbs = M/bs;
1922: extra_rows = bs - M + bs*(Mbs);
1923: if (extra_rows == bs) extra_rows = 0;
1924: else Mbs++;
1925: if (extra_rows &&!rank) {
1926: PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksizen");
1927: }
1929: /* determine ownership of all rows */
1930: mbs = Mbs/size + ((Mbs % size) > rank);
1931: m = mbs*bs;
1932: ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);
1933: browners = rowners + size + 1;
1934: ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1935: rowners[0] = 0;
1936: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
1937: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
1938: rstart = rowners[rank];
1939: rend = rowners[rank+1];
1940:
1941: /* distribute row lengths to all processors */
1942: PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);
1943: if (!rank) {
1944: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1945: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1946: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1947: PetscMalloc(size*sizeof(int),&sndcounts);
1948: for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
1949: MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
1950: PetscFree(sndcounts);
1951: } else {
1952: MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
1953: }
1954:
1955: if (!rank) { /* procs[0] */
1956: /* calculate the number of nonzeros on each processor */
1957: PetscMalloc(size*sizeof(int),&procsnz);
1958: PetscMemzero(procsnz,size*sizeof(int));
1959: for (i=0; i<size; i++) {
1960: for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
1961: procsnz[i] += rowlengths[j];
1962: }
1963: }
1964: PetscFree(rowlengths);
1965:
1966: /* determine max buffer needed and allocate it */
1967: maxnz = 0;
1968: for (i=0; i<size; i++) {
1969: maxnz = PetscMax(maxnz,procsnz[i]);
1970: }
1971: PetscMalloc(maxnz*sizeof(int),&cols);
1973: /* read in my part of the matrix column indices */
1974: nz = procsnz[0];
1975: ierr = PetscMalloc(nz*sizeof(int),&ibuf);
1976: mycols = ibuf;
1977: if (size == 1) nz -= extra_rows;
1978: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1979: if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1981: /* read in every ones (except the last) and ship off */
1982: for (i=1; i<size-1; i++) {
1983: nz = procsnz[i];
1984: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1985: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1986: }
1987: /* read in the stuff for the last proc */
1988: if (size != 1) {
1989: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
1990: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1991: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1992: MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1993: }
1994: PetscFree(cols);
1995: } else { /* procs[i], i>0 */
1996: /* determine buffer space needed for message */
1997: nz = 0;
1998: for (i=0; i<m; i++) {
1999: nz += locrowlens[i];
2000: }
2001: ierr = PetscMalloc(nz*sizeof(int),&ibuf);
2002: mycols = ibuf;
2003: /* receive message of column indices*/
2004: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
2005: MPI_Get_count(&status,MPI_INT,&maxnz);
2006: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2007: }
2009: /* loop over local rows, determining number of off diagonal entries */
2010: ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);
2011: odlens = dlens + (rend-rstart);
2012: ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);
2013: ierr = PetscMemzero(mask,3*Mbs*sizeof(int));
2014: masked1 = mask + Mbs;
2015: masked2 = masked1 + Mbs;
2016: rowcount = 0; nzcount = 0;
2017: for (i=0; i<mbs; i++) {
2018: dcount = 0;
2019: odcount = 0;
2020: for (j=0; j<bs; j++) {
2021: kmax = locrowlens[rowcount];
2022: for (k=0; k<kmax; k++) {
2023: tmp = mycols[nzcount++]/bs; /* block col. index */
2024: if (!mask[tmp]) {
2025: mask[tmp] = 1;
2026: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2027: else masked1[dcount++] = tmp; /* entry in diag portion */
2028: }
2029: }
2030: rowcount++;
2031: }
2032:
2033: dlens[i] = dcount; /* d_nzz[i] */
2034: odlens[i] = odcount; /* o_nzz[i] */
2036: /* zero out the mask elements we set */
2037: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2038: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2039: }
2040:
2041: /* create our matrix */
2042: MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2043:
2044: A = *newmat;
2045: MatSetOption(A,MAT_COLUMNS_SORTED);
2046:
2047: if (!rank) {
2048: PetscMalloc(maxnz*sizeof(Scalar),&buf);
2049: /* read in my part of the matrix numerical values */
2050: nz = procsnz[0];
2051: vals = buf;
2052: mycols = ibuf;
2053: if (size == 1) nz -= extra_rows;
2054: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2055: if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2057: /* insert into matrix */
2058: jj = rstart*bs;
2059: for (i=0; i<m; i++) {
2060: MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2061: mycols += locrowlens[i];
2062: vals += locrowlens[i];
2063: jj++;
2064: }
2066: /* read in other processors (except the last one) and ship out */
2067: for (i=1; i<size-1; i++) {
2068: nz = procsnz[i];
2069: vals = buf;
2070: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2071: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2072: }
2073: /* the last proc */
2074: if (size != 1){
2075: nz = procsnz[i] - extra_rows;
2076: vals = buf;
2077: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2078: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2079: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
2080: }
2081: PetscFree(procsnz);
2083: } else {
2084: /* receive numeric values */
2085: PetscMalloc(nz*sizeof(Scalar),&buf);
2087: /* receive message of values*/
2088: vals = buf;
2089: mycols = ibuf;
2090: ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2091: ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2092: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2094: /* insert into matrix */
2095: jj = rstart*bs;
2096: for (i=0; i<m; i++) {
2097: ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2098: mycols += locrowlens[i];
2099: vals += locrowlens[i];
2100: jj++;
2101: }
2102: }
2104: PetscFree(locrowlens);
2105: PetscFree(buf);
2106: PetscFree(ibuf);
2107: PetscFree(rowners);
2108: PetscFree(dlens);
2109: PetscFree(mask);
2110: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2111: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2112: return(0);
2113: }
2114: EXTERN_C_END
2116: /*@
2117: MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2119: Input Parameters:
2120: . mat - the matrix
2121: . fact - factor
2123: Collective on Mat
2125: Level: advanced
2127: Notes:
2128: This can also be set by the command line option: -mat_use_hash_table fact
2130: .keywords: matrix, hashtable, factor, HT
2132: .seealso: MatSetOption()
2133: @*/
2134: int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2135: {
2137: SETERRQ(1,"Function not yet written for SBAIJ format");
2138: /* return(0); */
2139: }
2141: int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2142: {
2143: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2144: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data;
2145: PetscReal atmp;
2146: double *work,*svalues,*rvalues;
2147: int ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2148: int rank,size,*rowners_bs,dest,count,source;
2149: Scalar *va;
2150: MatScalar *ba;
2151: MPI_Status stat;
2154: MatGetRowMax(a->A,v);
2155: VecGetArray(v,&va);
2157: MPI_Comm_size(PETSC_COMM_WORLD,&size);
2158: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
2160: bs = a->bs;
2161: mbs = a->mbs;
2162: Mbs = a->Mbs;
2163: ba = b->a;
2164: bi = b->i;
2165: bj = b->j;
2166: /*
2167: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d n",rank,bs*Mbs,bs,mbs);
2168: PetscSynchronizedFlush(PETSC_COMM_WORLD);
2169: */
2171: /* find ownerships */
2172: rowners_bs = a->rowners_bs;
2173: /*
2174: if (!rank){
2175: for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %dn",i,rowners_bs[i]);
2176: }
2177: */
2179: /* each proc creates an array to be distributed */
2180: PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);
2181: PetscMemzero(work,bs*Mbs*sizeof(PetscReal));
2183: /* row_max for B */
2184: if (rank != size-1){
2185: for (i=0; i<mbs; i++) {
2186: ncols = bi[1] - bi[0]; bi++;
2187: brow = bs*i;
2188: for (j=0; j<ncols; j++){
2189: bcol = bs*(*bj);
2190: for (kcol=0; kcol<bs; kcol++){
2191: col = bcol + kcol; /* local col index */
2192: col += rowners_bs[rank+1]; /* global col index */
2193: /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %dn",rank,col); */
2194: for (krow=0; krow<bs; krow++){
2195: atmp = PetscAbsScalar(*ba); ba++;
2196: row = brow + krow; /* local row index */
2197: /* printf("val[%d,%d]: %gn",row,col,atmp); */
2198: if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2199: if (work[col] < atmp) work[col] = atmp;
2200: }
2201: }
2202: bj++;
2203: }
2204: }
2205: /*
2206: PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2207: for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2208: PetscPrintf(PETSC_COMM_SELF,"[%d]: n");
2209: */
2211: /* send values to its owners */
2212: for (dest=rank+1; dest<size; dest++){
2213: svalues = work + rowners_bs[dest];
2214: count = rowners_bs[dest+1]-rowners_bs[dest];
2215: MPI_Send(svalues,count,MPI_DOUBLE,dest,rank,PETSC_COMM_WORLD);
2216: /*
2217: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %gn",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2218: PetscSynchronizedFlush(PETSC_COMM_WORLD);
2219: */
2220: }
2221: }
2222:
2223: /* receive values */
2224: if (rank){
2225: rvalues = work;
2226: count = rowners_bs[rank+1]-rowners_bs[rank];
2227: for (source=0; source<rank; source++){
2228: MPI_Recv(rvalues,count,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);
2229: /* process values */
2230: for (i=0; i<count; i++){
2231: if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2232: }
2233: /*
2234: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2235: PetscSynchronizedFlush(PETSC_COMM_WORLD);
2236: */
2237: }
2238: }
2240: VecRestoreArray(v,&va);
2241: PetscFree(work);
2242: return(0);
2243: }