Actual source code: mpiaij.c
1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/vec/vecimpl.h
5: #include src/inline/spops.h
7: /*
8: Local utility routine that creates a mapping from the global column
9: number to the local number in the off-diagonal part of the local
10: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
11: a slightly higher hash table cost; without it it is not scalable (each processor
12: has an order N integer array but is fast to acess.
13: */
16: int CreateColmap_MPIAIJ_Private(Mat mat)
17: {
18: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19: int n = aij->B->n,i,ierr;
22: #if defined (PETSC_USE_CTABLE)
23: PetscTableCreate(n,&aij->colmap);
24: for (i=0; i<n; i++){
25: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
26: }
27: #else
28: PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
29: PetscLogObjectMemory(mat,mat->N*sizeof(int));
30: PetscMemzero(aij->colmap,mat->N*sizeof(int));
31: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32: #endif
33: return(0);
34: }
36: #define CHUNKSIZE 15
37: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38: { \
39: \
40: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
41: rmax = aimax[row]; nrow = ailen[row]; \
42: col1 = col - shift; \
43: \
44: low = 0; high = nrow; \
45: while (high-low > 5) { \
46: t = (low+high)/2; \
47: if (rp[t] > col) high = t; \
48: else low = t; \
49: } \
50: for (_i=low; _i<high; _i++) { \
51: if (rp[_i] > col1) break; \
52: if (rp[_i] == col1) { \
53: if (addv == ADD_VALUES) ap[_i] += value; \
54: else ap[_i] = value; \
55: goto a_noinsert; \
56: } \
57: } \
58: if (nonew == 1) goto a_noinsert; \
59: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
60: if (nrow >= rmax) { \
61: /* there is no extra room in row, therefore enlarge */ \
62: int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63: PetscScalar *new_a; \
64: \
65: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
66: \
67: /* malloc new storage space */ \
68: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
69: PetscMalloc(len,&new_a); \
70: new_j = (int*)(new_a + new_nz); \
71: new_i = new_j + new_nz; \
72: \
73: /* copy over old data into new slots */ \
74: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75: for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76: PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \
77: len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78: PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79: len*sizeof(int)); \
80: PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); \
81: PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82: len*sizeof(PetscScalar)); \
83: /* free up old matrix storage */ \
84: \
85: PetscFree(a->a); \
86: if (!a->singlemalloc) { \
87: PetscFree(a->i); \
88: PetscFree(a->j); \
89: } \
90: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \
91: a->singlemalloc = PETSC_TRUE; \
92: \
93: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94: rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
96: a->maxnz += CHUNKSIZE; \
97: a->reallocs++; \
98: } \
99: N = nrow++ - 1; a->nz++; \
100: /* shift up all the later entries in this row */ \
101: for (ii=N; ii>=_i; ii--) { \
102: rp[ii+1] = rp[ii]; \
103: ap[ii+1] = ap[ii]; \
104: } \
105: rp[_i] = col1; \
106: ap[_i] = value; \
107: a_noinsert: ; \
108: ailen[row] = nrow; \
109: }
111: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112: { \
113: \
114: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115: rmax = bimax[row]; nrow = bilen[row]; \
116: col1 = col - shift; \
117: \
118: low = 0; high = nrow; \
119: while (high-low > 5) { \
120: t = (low+high)/2; \
121: if (rp[t] > col) high = t; \
122: else low = t; \
123: } \
124: for (_i=low; _i<high; _i++) { \
125: if (rp[_i] > col1) break; \
126: if (rp[_i] == col1) { \
127: if (addv == ADD_VALUES) ap[_i] += value; \
128: else ap[_i] = value; \
129: goto b_noinsert; \
130: } \
131: } \
132: if (nonew == 1) goto b_noinsert; \
133: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
134: if (nrow >= rmax) { \
135: /* there is no extra room in row, therefore enlarge */ \
136: int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
137: PetscScalar *new_a; \
138: \
139: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
140: \
141: /* malloc new storage space */ \
142: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
143: PetscMalloc(len,&new_a); \
144: new_j = (int*)(new_a + new_nz); \
145: new_i = new_j + new_nz; \
146: \
147: /* copy over old data into new slots */ \
148: for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
149: for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
150: PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \
151: len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
152: PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
153: len*sizeof(int)); \
154: PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); \
155: PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
156: len*sizeof(PetscScalar)); \
157: /* free up old matrix storage */ \
158: \
159: PetscFree(b->a); \
160: if (!b->singlemalloc) { \
161: PetscFree(b->i); \
162: PetscFree(b->j); \
163: } \
164: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \
165: b->singlemalloc = PETSC_TRUE; \
166: \
167: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168: rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
170: b->maxnz += CHUNKSIZE; \
171: b->reallocs++; \
172: } \
173: N = nrow++ - 1; b->nz++; \
174: /* shift up all the later entries in this row */ \
175: for (ii=N; ii>=_i; ii--) { \
176: rp[ii+1] = rp[ii]; \
177: ap[ii+1] = ap[ii]; \
178: } \
179: rp[_i] = col1; \
180: ap[_i] = value; \
181: b_noinsert: ; \
182: bilen[row] = nrow; \
183: }
187: int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
188: {
189: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
190: PetscScalar value;
191: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
192: int cstart = aij->cstart,cend = aij->cend,row,col;
193: PetscTruth roworiented = aij->roworiented;
195: /* Some Variables required in the macro */
196: Mat A = aij->A;
197: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
198: int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199: PetscScalar *aa = a->a;
200: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201: Mat B = aij->B;
202: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
203: int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204: PetscScalar *ba = b->a;
206: int *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207: int nonew = a->nonew,shift=0;
208: PetscScalar *ap;
211: for (i=0; i<m; i++) {
212: if (im[i] < 0) continue;
213: #if defined(PETSC_USE_BOPT_g)
214: if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215: #endif
216: if (im[i] >= rstart && im[i] < rend) {
217: row = im[i] - rstart;
218: for (j=0; j<n; j++) {
219: if (in[j] >= cstart && in[j] < cend){
220: col = in[j] - cstart;
221: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222: if (ignorezeroentries && value == 0.0) continue;
223: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224: /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
225: } else if (in[j] < 0) continue;
226: #if defined(PETSC_USE_BOPT_g)
227: else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228: #endif
229: else {
230: if (mat->was_assembled) {
231: if (!aij->colmap) {
232: CreateColmap_MPIAIJ_Private(mat);
233: }
234: #if defined (PETSC_USE_CTABLE)
235: PetscTableFind(aij->colmap,in[j]+1,&col);
236: col--;
237: #else
238: col = aij->colmap[in[j]] - 1;
239: #endif
240: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241: DisAssemble_MPIAIJ(mat);
242: col = in[j];
243: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244: B = aij->B;
245: b = (Mat_SeqAIJ*)B->data;
246: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247: ba = b->a;
248: }
249: } else col = in[j];
250: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251: if (ignorezeroentries && value == 0.0) continue;
252: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253: /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
254: }
255: }
256: } else {
257: if (!aij->donotstash) {
258: if (roworiented) {
259: if (ignorezeroentries && v[i*n] == 0.0) continue;
260: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
261: } else {
262: if (ignorezeroentries && v[i] == 0.0) continue;
263: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
264: }
265: }
266: }
267: }
268: return(0);
269: }
273: int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274: {
275: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
277: int cstart = aij->cstart,cend = aij->cend,row,col;
280: for (i=0; i<m; i++) {
281: if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
282: if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
283: if (idxm[i] >= rstart && idxm[i] < rend) {
284: row = idxm[i] - rstart;
285: for (j=0; j<n; j++) {
286: if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
287: if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
288: if (idxn[j] >= cstart && idxn[j] < cend){
289: col = idxn[j] - cstart;
290: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
291: } else {
292: if (!aij->colmap) {
293: CreateColmap_MPIAIJ_Private(mat);
294: }
295: #if defined (PETSC_USE_CTABLE)
296: PetscTableFind(aij->colmap,idxn[j]+1,&col);
297: col --;
298: #else
299: col = aij->colmap[idxn[j]] - 1;
300: #endif
301: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
302: else {
303: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
304: }
305: }
306: }
307: } else {
308: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
309: }
310: }
311: return(0);
312: }
316: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317: {
318: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
319: int ierr,nstash,reallocs;
320: InsertMode addv;
323: if (aij->donotstash) {
324: return(0);
325: }
327: /* make sure all processors are either in INSERTMODE or ADDMODE */
328: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
329: if (addv == (ADD_VALUES|INSERT_VALUES)) {
330: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331: }
332: mat->insertmode = addv; /* in case this processor had no cache */
334: MatStashScatterBegin_Private(&mat->stash,aij->rowners);
335: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
336: PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
337: return(0);
338: }
343: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
344: {
345: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
346: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
347: int i,j,rstart,ncols,n,ierr,flg;
348: int *row,*col,other_disassembled;
349: PetscScalar *val;
350: InsertMode addv = mat->insertmode;
353: if (!aij->donotstash) {
354: while (1) {
355: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
356: if (!flg) break;
358: for (i=0; i<n;) {
359: /* Now identify the consecutive vals belonging to the same row */
360: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361: if (j < n) ncols = j-i;
362: else ncols = n-i;
363: /* Now assemble all these values with a single function call */
364: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
365: i = j;
366: }
367: }
368: MatStashScatterEnd_Private(&mat->stash);
369: }
370:
371: MatAssemblyBegin(aij->A,mode);
372: MatAssemblyEnd(aij->A,mode);
374: /* determine if any processor has disassembled, if so we must
375: also disassemble ourselfs, in order that we may reassemble. */
376: /*
377: if nonzero structure of submatrix B cannot change then we know that
378: no processor disassembled thus we can skip this stuff
379: */
380: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
381: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
382: if (mat->was_assembled && !other_disassembled) {
383: DisAssemble_MPIAIJ(mat);
384: }
385: }
387: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388: MatSetUpMultiply_MPIAIJ(mat);
389: }
390: MatAssemblyBegin(aij->B,mode);
391: MatAssemblyEnd(aij->B,mode);
393: if (aij->rowvalues) {
394: PetscFree(aij->rowvalues);
395: aij->rowvalues = 0;
396: }
398: /* used by MatAXPY() */
399: a->xtoy = 0; b->xtoy = 0;
400: a->XtoY = 0; b->XtoY = 0;
402: return(0);
403: }
407: int MatZeroEntries_MPIAIJ(Mat A)
408: {
409: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
410: int ierr;
413: MatZeroEntries(l->A);
414: MatZeroEntries(l->B);
415: return(0);
416: }
420: int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
421: {
422: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
424: int *nprocs,j,idx,nsends,row;
425: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
426: int *rvalues,tag = A->tag,count,base,slen,n,*source;
427: int *lens,imdex,*lrows,*values,rstart=l->rstart;
428: MPI_Comm comm = A->comm;
429: MPI_Request *send_waits,*recv_waits;
430: MPI_Status recv_status,*send_status;
431: IS istmp;
432: PetscTruth found;
435: ISGetLocalSize(is,&N);
436: ISGetIndices(is,&rows);
438: /* first count number of contributors to each processor */
439: PetscMalloc(2*size*sizeof(int),&nprocs);
440: PetscMemzero(nprocs,2*size*sizeof(int));
441: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
442: for (i=0; i<N; i++) {
443: idx = rows[i];
444: found = PETSC_FALSE;
445: for (j=0; j<size; j++) {
446: if (idx >= owners[j] && idx < owners[j+1]) {
447: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
448: }
449: }
450: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
451: }
452: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
454: /* inform other processors of number of messages and max length*/
455: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
457: /* post receives: */
458: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
459: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
460: for (i=0; i<nrecvs; i++) {
461: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
462: }
464: /* do sends:
465: 1) starts[i] gives the starting index in svalues for stuff going to
466: the ith processor
467: */
468: PetscMalloc((N+1)*sizeof(int),&svalues);
469: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
470: PetscMalloc((size+1)*sizeof(int),&starts);
471: starts[0] = 0;
472: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
473: for (i=0; i<N; i++) {
474: svalues[starts[owner[i]]++] = rows[i];
475: }
476: ISRestoreIndices(is,&rows);
478: starts[0] = 0;
479: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480: count = 0;
481: for (i=0; i<size; i++) {
482: if (nprocs[2*i+1]) {
483: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
484: }
485: }
486: PetscFree(starts);
488: base = owners[rank];
490: /* wait on receives */
491: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
492: source = lens + nrecvs;
493: count = nrecvs; slen = 0;
494: while (count) {
495: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
496: /* unpack receives into our local space */
497: MPI_Get_count(&recv_status,MPI_INT,&n);
498: source[imdex] = recv_status.MPI_SOURCE;
499: lens[imdex] = n;
500: slen += n;
501: count--;
502: }
503: PetscFree(recv_waits);
504:
505: /* move the data into the send scatter */
506: PetscMalloc((slen+1)*sizeof(int),&lrows);
507: count = 0;
508: for (i=0; i<nrecvs; i++) {
509: values = rvalues + i*nmax;
510: for (j=0; j<lens[i]; j++) {
511: lrows[count++] = values[j] - base;
512: }
513: }
514: PetscFree(rvalues);
515: PetscFree(lens);
516: PetscFree(owner);
517: PetscFree(nprocs);
518:
519: /* actually zap the local rows */
520: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
521: PetscLogObjectParent(A,istmp);
523: /*
524: Zero the required rows. If the "diagonal block" of the matrix
525: is square and the user wishes to set the diagonal we use seperate
526: code so that MatSetValues() is not called for each diagonal allocating
527: new memory, thus calling lots of mallocs and slowing things down.
529: Contributed by: Mathew Knepley
530: */
531: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532: MatZeroRows(l->B,istmp,0);
533: if (diag && (l->A->M == l->A->N)) {
534: MatZeroRows(l->A,istmp,diag);
535: } else if (diag) {
536: MatZeroRows(l->A,istmp,0);
537: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
539: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540: }
541: for (i = 0; i < slen; i++) {
542: row = lrows[i] + rstart;
543: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
544: }
545: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
546: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
547: } else {
548: MatZeroRows(l->A,istmp,0);
549: }
550: ISDestroy(istmp);
551: PetscFree(lrows);
553: /* wait on sends */
554: if (nsends) {
555: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
556: MPI_Waitall(nsends,send_waits,send_status);
557: PetscFree(send_status);
558: }
559: PetscFree(send_waits);
560: PetscFree(svalues);
562: return(0);
563: }
567: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
568: {
569: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
570: int ierr,nt;
573: VecGetLocalSize(xx,&nt);
574: if (nt != A->n) {
575: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
576: }
577: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
578: (*a->A->ops->mult)(a->A,xx,yy);
579: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
580: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
581: return(0);
582: }
586: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
587: {
588: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589: int ierr;
592: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
593: (*a->A->ops->multadd)(a->A,xx,yy,zz);
594: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
595: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
596: return(0);
597: }
601: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602: {
603: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604: int ierr;
607: /* do nondiagonal part */
608: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
609: /* send it on its way */
610: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
611: /* do local part */
612: (*a->A->ops->multtranspose)(a->A,xx,yy);
613: /* receive remote parts: note this assumes the values are not actually */
614: /* inserted in yy until the next line, which is true for my implementation*/
615: /* but is not perhaps always true. */
616: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
617: return(0);
618: }
620: EXTERN_C_BEGIN
623: int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
624: {
625: MPI_Comm comm;
626: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
627: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
628: IS Me,Notme;
629: int M,N,first,last,*notme,ntids,i, ierr;
633: /* Easy test: symmetric diagonal block */
634: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
635: MatIsSymmetric(Adia,Bdia,f);
636: if (!*f) return(0);
637: PetscObjectGetComm((PetscObject)Amat,&comm);
638: MPI_Comm_size(comm,&ntids);
639: if (ntids==1) return(0);
641: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
642: MatGetSize(Amat,&M,&N);
643: MatGetOwnershipRange(Amat,&first,&last);
644: PetscMalloc((N-last+first)*sizeof(int),¬me);
645: for (i=0; i<first; i++) notme[i] = i;
646: for (i=last; i<M; i++) notme[i-last+first] = i;
647: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
648: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
649: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
650: Aoff = Aoffs[0];
651: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
652: Boff = Boffs[0];
653: MatIsSymmetric(Aoff,Boff,f);
654: MatDestroyMatrices(1,&Aoffs);
655: MatDestroyMatrices(1,&Boffs);
656: ISDestroy(Me);
657: ISDestroy(Notme);
659: return(0);
660: }
661: EXTERN_C_END
665: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
666: {
667: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
668: int ierr;
671: /* do nondiagonal part */
672: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
673: /* send it on its way */
674: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
675: /* do local part */
676: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
677: /* receive remote parts: note this assumes the values are not actually */
678: /* inserted in yy until the next line, which is true for my implementation*/
679: /* but is not perhaps always true. */
680: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
681: return(0);
682: }
684: /*
685: This only works correctly for square matrices where the subblock A->A is the
686: diagonal block
687: */
690: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
691: {
692: int ierr;
693: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
696: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
697: if (a->rstart != a->cstart || a->rend != a->cend) {
698: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
699: }
700: MatGetDiagonal(a->A,v);
701: return(0);
702: }
706: int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
707: {
708: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
709: int ierr;
712: MatScale(aa,a->A);
713: MatScale(aa,a->B);
714: return(0);
715: }
719: int MatDestroy_MPIAIJ(Mat mat)
720: {
721: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
722: int ierr;
725: #if defined(PETSC_USE_LOG)
726: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
727: #endif
728: MatStashDestroy_Private(&mat->stash);
729: PetscFree(aij->rowners);
730: MatDestroy(aij->A);
731: MatDestroy(aij->B);
732: #if defined (PETSC_USE_CTABLE)
733: if (aij->colmap) {PetscTableDelete(aij->colmap);}
734: #else
735: if (aij->colmap) {PetscFree(aij->colmap);}
736: #endif
737: if (aij->garray) {PetscFree(aij->garray);}
738: if (aij->lvec) {VecDestroy(aij->lvec);}
739: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
740: if (aij->rowvalues) {PetscFree(aij->rowvalues);}
741: PetscFree(aij);
742: return(0);
743: }
747: int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
748: {
749: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
750: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
751: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
752: int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
753: int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
754: PetscScalar *column_values;
757: MPI_Comm_rank(mat->comm,&rank);
758: MPI_Comm_size(mat->comm,&size);
759: nz = A->nz + B->nz;
760: if (rank == 0) {
761: header[0] = MAT_FILE_COOKIE;
762: header[1] = mat->M;
763: header[2] = mat->N;
764: MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);
765: PetscViewerBinaryGetDescriptor(viewer,&fd);
766: PetscBinaryWrite(fd,header,4,PETSC_INT,1);
767: /* get largest number of rows any processor has */
768: rlen = mat->m;
769: PetscMapGetGlobalRange(mat->rmap,&range);
770: for (i=1; i<size; i++) {
771: rlen = PetscMax(rlen,range[i+1] - range[i]);
772: }
773: } else {
774: MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);
775: rlen = mat->m;
776: }
778: /* load up the local row counts */
779: PetscMalloc((rlen+1)*sizeof(int),&row_lengths);
780: for (i=0; i<mat->m; i++) {
781: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
782: }
784: /* store the row lengths to the file */
785: if (rank == 0) {
786: MPI_Status status;
787: PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);
788: for (i=1; i<size; i++) {
789: rlen = range[i+1] - range[i];
790: MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);
791: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);
792: }
793: } else {
794: MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);
795: }
796: PetscFree(row_lengths);
798: /* load up the local column indices */
799: nzmax = nz; /* )th processor needs space a largest processor needs */
800: MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);
801: PetscMalloc((nzmax+1)*sizeof(int),&column_indices);
802: cnt = 0;
803: for (i=0; i<mat->m; i++) {
804: for (j=B->i[i]; j<B->i[i+1]; j++) {
805: if ( (col = garray[B->j[j]]) > cstart) break;
806: column_indices[cnt++] = col;
807: }
808: for (k=A->i[i]; k<A->i[i+1]; k++) {
809: column_indices[cnt++] = A->j[k] + cstart;
810: }
811: for (; j<B->i[i+1]; j++) {
812: column_indices[cnt++] = garray[B->j[j]];
813: }
814: }
815: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
817: /* store the column indices to the file */
818: if (rank == 0) {
819: MPI_Status status;
820: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);
821: for (i=1; i<size; i++) {
822: MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
823: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
824: MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);
825: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);
826: }
827: } else {
828: MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
829: MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);
830: }
831: PetscFree(column_indices);
833: /* load up the local column values */
834: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
835: cnt = 0;
836: for (i=0; i<mat->m; i++) {
837: for (j=B->i[i]; j<B->i[i+1]; j++) {
838: if ( garray[B->j[j]] > cstart) break;
839: column_values[cnt++] = B->a[j];
840: }
841: for (k=A->i[i]; k<A->i[i+1]; k++) {
842: column_values[cnt++] = A->a[k];
843: }
844: for (; j<B->i[i+1]; j++) {
845: column_values[cnt++] = B->a[j];
846: }
847: }
848: if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
850: /* store the column values to the file */
851: if (rank == 0) {
852: MPI_Status status;
853: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);
854: for (i=1; i<size; i++) {
855: MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
856: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
857: MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
858: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);
859: }
860: } else {
861: MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
862: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
863: }
864: PetscFree(column_values);
865: return(0);
866: }
870: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
871: {
872: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
873: int ierr,rank = aij->rank,size = aij->size;
874: PetscTruth isdraw,isascii,flg,isbinary;
875: PetscViewer sviewer;
876: PetscViewerFormat format;
879: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
880: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
881: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
882: if (isascii) {
883: PetscViewerGetFormat(viewer,&format);
884: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885: MatInfo info;
886: MPI_Comm_rank(mat->comm,&rank);
887: MatGetInfo(mat,MAT_LOCAL,&info);
888: PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
889: if (flg) {
890: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
891: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
892: } else {
893: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
894: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
895: }
896: MatGetInfo(aij->A,MAT_LOCAL,&info);
897: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
898: MatGetInfo(aij->B,MAT_LOCAL,&info);
899: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
900: PetscViewerFlush(viewer);
901: VecScatterView(aij->Mvctx,viewer);
902: return(0);
903: } else if (format == PETSC_VIEWER_ASCII_INFO) {
904: return(0);
905: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
906: return(0);
907: }
908: } else if (isbinary) {
909: if (size == 1) {
910: PetscObjectSetName((PetscObject)aij->A,mat->name);
911: MatView(aij->A,viewer);
912: } else {
913: MatView_MPIAIJ_Binary(mat,viewer);
914: }
915: return(0);
916: } else if (isdraw) {
917: PetscDraw draw;
918: PetscTruth isnull;
919: PetscViewerDrawGetDraw(viewer,0,&draw);
920: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
921: }
923: if (size == 1) {
924: PetscObjectSetName((PetscObject)aij->A,mat->name);
925: MatView(aij->A,viewer);
926: } else {
927: /* assemble the entire matrix onto first processor. */
928: Mat A;
929: Mat_SeqAIJ *Aloc;
930: int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
931: PetscScalar *a;
933: if (!rank) {
934: MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
935: } else {
936: MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
937: }
938: PetscLogObjectParent(mat,A);
940: /* copy over the A part */
941: Aloc = (Mat_SeqAIJ*)aij->A->data;
942: m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943: row = aij->rstart;
944: for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
945: for (i=0; i<m; i++) {
946: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
947: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
948: }
949: aj = Aloc->j;
950: for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
952: /* copy over the B part */
953: Aloc = (Mat_SeqAIJ*)aij->B->data;
954: m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955: row = aij->rstart;
956: PetscMalloc((ai[m]+1)*sizeof(int),&cols);
957: ct = cols;
958: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
959: for (i=0; i<m; i++) {
960: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
961: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
962: }
963: PetscFree(ct);
964: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
965: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
966: /*
967: Everyone has to call to draw the matrix since the graphics waits are
968: synchronized across all processors that share the PetscDraw object
969: */
970: PetscViewerGetSingleton(viewer,&sviewer);
971: if (!rank) {
972: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
973: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
974: }
975: PetscViewerRestoreSingleton(viewer,&sviewer);
976: MatDestroy(A);
977: }
978: return(0);
979: }
983: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
984: {
985: int ierr;
986: PetscTruth isascii,isdraw,issocket,isbinary;
987:
989: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
990: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
991: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
992: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
993: if (isascii || isdraw || isbinary || issocket) {
994: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
995: } else {
996: SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
997: }
998: return(0);
999: }
1005: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1006: {
1007: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1008: int ierr;
1009: Vec bb1;
1010: PetscScalar mone=-1.0;
1013: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1015: VecDuplicate(bb,&bb1);
1017: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1018: if (flag & SOR_ZERO_INITIAL_GUESS) {
1019: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1020: its--;
1021: }
1022:
1023: while (its--) {
1024: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1025: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1027: /* update rhs: bb1 = bb - B*x */
1028: VecScale(&mone,mat->lvec);
1029: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1031: /* local sweep */
1032: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1033:
1034: }
1035: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1036: if (flag & SOR_ZERO_INITIAL_GUESS) {
1037: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1038: its--;
1039: }
1040: while (its--) {
1041: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1042: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1044: /* update rhs: bb1 = bb - B*x */
1045: VecScale(&mone,mat->lvec);
1046: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1048: /* local sweep */
1049: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1050:
1051: }
1052: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1053: if (flag & SOR_ZERO_INITIAL_GUESS) {
1054: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1055: its--;
1056: }
1057: while (its--) {
1058: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1059: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1061: /* update rhs: bb1 = bb - B*x */
1062: VecScale(&mone,mat->lvec);
1063: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1065: /* local sweep */
1066: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1067:
1068: }
1069: } else {
1070: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1071: }
1073: VecDestroy(bb1);
1074: return(0);
1075: }
1079: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080: {
1081: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1082: Mat A = mat->A,B = mat->B;
1083: int ierr;
1084: PetscReal isend[5],irecv[5];
1087: info->block_size = 1.0;
1088: MatGetInfo(A,MAT_LOCAL,info);
1089: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090: isend[3] = info->memory; isend[4] = info->mallocs;
1091: MatGetInfo(B,MAT_LOCAL,info);
1092: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093: isend[3] += info->memory; isend[4] += info->mallocs;
1094: if (flag == MAT_LOCAL) {
1095: info->nz_used = isend[0];
1096: info->nz_allocated = isend[1];
1097: info->nz_unneeded = isend[2];
1098: info->memory = isend[3];
1099: info->mallocs = isend[4];
1100: } else if (flag == MAT_GLOBAL_MAX) {
1101: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1102: info->nz_used = irecv[0];
1103: info->nz_allocated = irecv[1];
1104: info->nz_unneeded = irecv[2];
1105: info->memory = irecv[3];
1106: info->mallocs = irecv[4];
1107: } else if (flag == MAT_GLOBAL_SUM) {
1108: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1109: info->nz_used = irecv[0];
1110: info->nz_allocated = irecv[1];
1111: info->nz_unneeded = irecv[2];
1112: info->memory = irecv[3];
1113: info->mallocs = irecv[4];
1114: }
1115: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1116: info->fill_ratio_needed = 0;
1117: info->factor_mallocs = 0;
1118: info->rows_global = (double)matin->M;
1119: info->columns_global = (double)matin->N;
1120: info->rows_local = (double)matin->m;
1121: info->columns_local = (double)matin->N;
1123: return(0);
1124: }
1128: int MatSetOption_MPIAIJ(Mat A,MatOption op)
1129: {
1130: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1131: int ierr;
1134: switch (op) {
1135: case MAT_NO_NEW_NONZERO_LOCATIONS:
1136: case MAT_YES_NEW_NONZERO_LOCATIONS:
1137: case MAT_COLUMNS_UNSORTED:
1138: case MAT_COLUMNS_SORTED:
1139: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1140: case MAT_KEEP_ZEROED_ROWS:
1141: case MAT_NEW_NONZERO_LOCATION_ERR:
1142: case MAT_USE_INODES:
1143: case MAT_DO_NOT_USE_INODES:
1144: case MAT_IGNORE_ZERO_ENTRIES:
1145: MatSetOption(a->A,op);
1146: MatSetOption(a->B,op);
1147: break;
1148: case MAT_ROW_ORIENTED:
1149: a->roworiented = PETSC_TRUE;
1150: MatSetOption(a->A,op);
1151: MatSetOption(a->B,op);
1152: break;
1153: case MAT_ROWS_SORTED:
1154: case MAT_ROWS_UNSORTED:
1155: case MAT_YES_NEW_DIAGONALS:
1156: PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1157: break;
1158: case MAT_COLUMN_ORIENTED:
1159: a->roworiented = PETSC_FALSE;
1160: MatSetOption(a->A,op);
1161: MatSetOption(a->B,op);
1162: break;
1163: case MAT_IGNORE_OFF_PROC_ENTRIES:
1164: a->donotstash = PETSC_TRUE;
1165: break;
1166: case MAT_NO_NEW_DIAGONALS:
1167: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1168: default:
1169: SETERRQ(PETSC_ERR_SUP,"unknown option");
1170: }
1171: return(0);
1172: }
1176: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1177: {
1178: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1179: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1180: int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1181: int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1182: int *cmap,*idx_p;
1185: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1186: mat->getrowactive = PETSC_TRUE;
1188: if (!mat->rowvalues && (idx || v)) {
1189: /*
1190: allocate enough space to hold information from the longest row.
1191: */
1192: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1193: int max = 1,tmp;
1194: for (i=0; i<matin->m; i++) {
1195: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1196: if (max < tmp) { max = tmp; }
1197: }
1198: PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1199: mat->rowindices = (int*)(mat->rowvalues + max);
1200: }
1202: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1203: lrow = row - rstart;
1205: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1206: if (!v) {pvA = 0; pvB = 0;}
1207: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1208: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1209: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1210: nztot = nzA + nzB;
1212: cmap = mat->garray;
1213: if (v || idx) {
1214: if (nztot) {
1215: /* Sort by increasing column numbers, assuming A and B already sorted */
1216: int imark = -1;
1217: if (v) {
1218: *v = v_p = mat->rowvalues;
1219: for (i=0; i<nzB; i++) {
1220: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1221: else break;
1222: }
1223: imark = i;
1224: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1225: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1226: }
1227: if (idx) {
1228: *idx = idx_p = mat->rowindices;
1229: if (imark > -1) {
1230: for (i=0; i<imark; i++) {
1231: idx_p[i] = cmap[cworkB[i]];
1232: }
1233: } else {
1234: for (i=0; i<nzB; i++) {
1235: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1236: else break;
1237: }
1238: imark = i;
1239: }
1240: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1241: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1242: }
1243: } else {
1244: if (idx) *idx = 0;
1245: if (v) *v = 0;
1246: }
1247: }
1248: *nz = nztot;
1249: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1250: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1251: return(0);
1252: }
1256: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1257: {
1258: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1261: if (aij->getrowactive == PETSC_FALSE) {
1262: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1263: }
1264: aij->getrowactive = PETSC_FALSE;
1265: return(0);
1266: }
1270: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1271: {
1272: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1273: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1274: int ierr,i,j,cstart = aij->cstart;
1275: PetscReal sum = 0.0;
1276: PetscScalar *v;
1279: if (aij->size == 1) {
1280: MatNorm(aij->A,type,norm);
1281: } else {
1282: if (type == NORM_FROBENIUS) {
1283: v = amat->a;
1284: for (i=0; i<amat->nz; i++) {
1285: #if defined(PETSC_USE_COMPLEX)
1286: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1287: #else
1288: sum += (*v)*(*v); v++;
1289: #endif
1290: }
1291: v = bmat->a;
1292: for (i=0; i<bmat->nz; i++) {
1293: #if defined(PETSC_USE_COMPLEX)
1294: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1295: #else
1296: sum += (*v)*(*v); v++;
1297: #endif
1298: }
1299: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1300: *norm = sqrt(*norm);
1301: } else if (type == NORM_1) { /* max column norm */
1302: PetscReal *tmp,*tmp2;
1303: int *jj,*garray = aij->garray;
1304: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1305: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1306: PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1307: *norm = 0.0;
1308: v = amat->a; jj = amat->j;
1309: for (j=0; j<amat->nz; j++) {
1310: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1311: }
1312: v = bmat->a; jj = bmat->j;
1313: for (j=0; j<bmat->nz; j++) {
1314: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1315: }
1316: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1317: for (j=0; j<mat->N; j++) {
1318: if (tmp2[j] > *norm) *norm = tmp2[j];
1319: }
1320: PetscFree(tmp);
1321: PetscFree(tmp2);
1322: } else if (type == NORM_INFINITY) { /* max row norm */
1323: PetscReal ntemp = 0.0;
1324: for (j=0; j<aij->A->m; j++) {
1325: v = amat->a + amat->i[j];
1326: sum = 0.0;
1327: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1328: sum += PetscAbsScalar(*v); v++;
1329: }
1330: v = bmat->a + bmat->i[j];
1331: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1332: sum += PetscAbsScalar(*v); v++;
1333: }
1334: if (sum > ntemp) ntemp = sum;
1335: }
1336: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1337: } else {
1338: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1339: }
1340: }
1341: return(0);
1342: }
1346: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1347: {
1348: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1349: Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1350: int ierr;
1351: int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1352: Mat B;
1353: PetscScalar *array;
1356: if (!matout && M != N) {
1357: SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1358: }
1360: MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1362: /* copy over the A part */
1363: Aloc = (Mat_SeqAIJ*)a->A->data;
1364: m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1365: row = a->rstart;
1366: for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1367: for (i=0; i<m; i++) {
1368: MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1369: row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1370: }
1371: aj = Aloc->j;
1372: for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1374: /* copy over the B part */
1375: Aloc = (Mat_SeqAIJ*)a->B->data;
1376: m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1377: row = a->rstart;
1378: PetscMalloc((1+ai[m])*sizeof(int),&cols);
1379: ct = cols;
1380: for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1381: for (i=0; i<m; i++) {
1382: MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1383: row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1384: }
1385: PetscFree(ct);
1386: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1387: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1388: if (matout) {
1389: *matout = B;
1390: } else {
1391: MatHeaderCopy(A,B);
1392: }
1393: return(0);
1394: }
1398: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1399: {
1400: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1401: Mat a = aij->A,b = aij->B;
1402: int ierr,s1,s2,s3;
1405: MatGetLocalSize(mat,&s2,&s3);
1406: if (rr) {
1407: VecGetLocalSize(rr,&s1);
1408: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1409: /* Overlap communication with computation. */
1410: VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1411: }
1412: if (ll) {
1413: VecGetLocalSize(ll,&s1);
1414: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1415: (*b->ops->diagonalscale)(b,ll,0);
1416: }
1417: /* scale the diagonal block */
1418: (*a->ops->diagonalscale)(a,ll,rr);
1420: if (rr) {
1421: /* Do a scatter end and then right scale the off-diagonal block */
1422: VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1423: (*b->ops->diagonalscale)(b,0,aij->lvec);
1424: }
1425:
1426: return(0);
1427: }
1432: int MatPrintHelp_MPIAIJ(Mat A)
1433: {
1434: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1435: int ierr;
1438: if (!a->rank) {
1439: MatPrintHelp_SeqAIJ(a->A);
1440: }
1441: return(0);
1442: }
1446: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1447: {
1449: *bs = 1;
1450: return(0);
1451: }
1454: int MatSetUnfactored_MPIAIJ(Mat A)
1455: {
1456: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1457: int ierr;
1460: MatSetUnfactored(a->A);
1461: return(0);
1462: }
1466: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1467: {
1468: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1469: Mat a,b,c,d;
1470: PetscTruth flg;
1471: int ierr;
1474: a = matA->A; b = matA->B;
1475: c = matB->A; d = matB->B;
1477: MatEqual(a,c,&flg);
1478: if (flg == PETSC_TRUE) {
1479: MatEqual(b,d,&flg);
1480: }
1481: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1482: return(0);
1483: }
1487: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1488: {
1489: int ierr;
1490: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1491: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1494: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1495: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1496: /* because of the column compression in the off-processor part of the matrix a->B,
1497: the number of columns in a->B and b->B may be different, hence we cannot call
1498: the MatCopy() directly on the two parts. If need be, we can provide a more
1499: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1500: then copying the submatrices */
1501: MatCopy_Basic(A,B,str);
1502: } else {
1503: MatCopy(a->A,b->A,str);
1504: MatCopy(a->B,b->B,str);
1505: }
1506: return(0);
1507: }
1511: int MatSetUpPreallocation_MPIAIJ(Mat A)
1512: {
1513: int ierr;
1516: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1517: return(0);
1518: }
1520: #include petscblaslapack.h
1523: int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1524: {
1525: int ierr,one=1,i;
1526: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1527: Mat_SeqAIJ *x,*y;
1530: if (str == SAME_NONZERO_PATTERN) {
1531: x = (Mat_SeqAIJ *)xx->A->data;
1532: y = (Mat_SeqAIJ *)yy->A->data;
1533: BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1534: x = (Mat_SeqAIJ *)xx->B->data;
1535: y = (Mat_SeqAIJ *)yy->B->data;
1536: BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1537: } else if (str == SUBSET_NONZERO_PATTERN) {
1538: MatAXPY_SeqAIJ(a,xx->A,yy->A,str);
1540: x = (Mat_SeqAIJ *)xx->B->data;
1541: y = (Mat_SeqAIJ *)yy->B->data;
1542: if (y->xtoy && y->XtoY != xx->B) {
1543: PetscFree(y->xtoy);
1544: MatDestroy(y->XtoY);
1545: }
1546: if (!y->xtoy) { /* get xtoy */
1547: MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1548: y->XtoY = xx->B;
1549: }
1550: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1551: } else {
1552: MatAXPY_Basic(a,X,Y,str);
1553: }
1554: return(0);
1555: }
1557: /* -------------------------------------------------------------------*/
1558: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1559: MatGetRow_MPIAIJ,
1560: MatRestoreRow_MPIAIJ,
1561: MatMult_MPIAIJ,
1562: /* 4*/ MatMultAdd_MPIAIJ,
1563: MatMultTranspose_MPIAIJ,
1564: MatMultTransposeAdd_MPIAIJ,
1565: 0,
1566: 0,
1567: 0,
1568: /*10*/ 0,
1569: 0,
1570: 0,
1571: MatRelax_MPIAIJ,
1572: MatTranspose_MPIAIJ,
1573: /*15*/ MatGetInfo_MPIAIJ,
1574: MatEqual_MPIAIJ,
1575: MatGetDiagonal_MPIAIJ,
1576: MatDiagonalScale_MPIAIJ,
1577: MatNorm_MPIAIJ,
1578: /*20*/ MatAssemblyBegin_MPIAIJ,
1579: MatAssemblyEnd_MPIAIJ,
1580: 0,
1581: MatSetOption_MPIAIJ,
1582: MatZeroEntries_MPIAIJ,
1583: /*25*/ MatZeroRows_MPIAIJ,
1584: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1585: MatLUFactorSymbolic_MPIAIJ_TFS,
1586: #else
1587: 0,
1588: #endif
1589: 0,
1590: 0,
1591: 0,
1592: /*30*/ MatSetUpPreallocation_MPIAIJ,
1593: 0,
1594: 0,
1595: 0,
1596: 0,
1597: /*35*/ MatDuplicate_MPIAIJ,
1598: 0,
1599: 0,
1600: 0,
1601: 0,
1602: /*40*/ MatAXPY_MPIAIJ,
1603: MatGetSubMatrices_MPIAIJ,
1604: MatIncreaseOverlap_MPIAIJ,
1605: MatGetValues_MPIAIJ,
1606: MatCopy_MPIAIJ,
1607: /*45*/ MatPrintHelp_MPIAIJ,
1608: MatScale_MPIAIJ,
1609: 0,
1610: 0,
1611: 0,
1612: /*50*/ MatGetBlockSize_MPIAIJ,
1613: 0,
1614: 0,
1615: 0,
1616: 0,
1617: /*55*/ MatFDColoringCreate_MPIAIJ,
1618: 0,
1619: MatSetUnfactored_MPIAIJ,
1620: 0,
1621: 0,
1622: /*60*/ MatGetSubMatrix_MPIAIJ,
1623: MatDestroy_MPIAIJ,
1624: MatView_MPIAIJ,
1625: MatGetPetscMaps_Petsc,
1626: 0,
1627: /*65*/ 0,
1628: 0,
1629: 0,
1630: 0,
1631: 0,
1632: /*70*/ 0,
1633: 0,
1634: MatSetColoring_MPIAIJ,
1635: MatSetValuesAdic_MPIAIJ,
1636: MatSetValuesAdifor_MPIAIJ,
1637: /*75*/ 0,
1638: 0,
1639: 0,
1640: 0,
1641: 0,
1642: /*80*/ 0,
1643: 0,
1644: 0,
1645: 0,
1646: 0,
1647: /*85*/ MatLoad_MPIAIJ};
1649: /* ----------------------------------------------------------------------------------------*/
1651: EXTERN_C_BEGIN
1654: int MatStoreValues_MPIAIJ(Mat mat)
1655: {
1656: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1657: int ierr;
1660: MatStoreValues(aij->A);
1661: MatStoreValues(aij->B);
1662: return(0);
1663: }
1664: EXTERN_C_END
1666: EXTERN_C_BEGIN
1669: int MatRetrieveValues_MPIAIJ(Mat mat)
1670: {
1671: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1672: int ierr;
1675: MatRetrieveValues(aij->A);
1676: MatRetrieveValues(aij->B);
1677: return(0);
1678: }
1679: EXTERN_C_END
1681: #include petscpc.h
1682: EXTERN_C_BEGIN
1685: int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1686: {
1687: Mat_MPIAIJ *b;
1688: int ierr,i;
1691: B->preallocated = PETSC_TRUE;
1692: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1693: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1694: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1695: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1696: if (d_nnz) {
1697: for (i=0; i<B->m; i++) {
1698: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1699: }
1700: }
1701: if (o_nnz) {
1702: for (i=0; i<B->m; i++) {
1703: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1704: }
1705: }
1706: b = (Mat_MPIAIJ*)B->data;
1708: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
1709: PetscLogObjectParent(B,b->A);
1710: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
1711: PetscLogObjectParent(B,b->B);
1713: return(0);
1714: }
1715: EXTERN_C_END
1717: /*MC
1718: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1720: Options Database Keys:
1721: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1723: Level: beginner
1725: .seealso: MatCreateMPIAIJ
1726: M*/
1728: EXTERN_C_BEGIN
1731: int MatCreate_MPIAIJ(Mat B)
1732: {
1733: Mat_MPIAIJ *b;
1734: int ierr,i,size;
1737: MPI_Comm_size(B->comm,&size);
1739: PetscNew(Mat_MPIAIJ,&b);
1740: B->data = (void*)b;
1741: PetscMemzero(b,sizeof(Mat_MPIAIJ));
1742: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1743: B->factor = 0;
1744: B->assembled = PETSC_FALSE;
1745: B->mapping = 0;
1747: B->insertmode = NOT_SET_VALUES;
1748: b->size = size;
1749: MPI_Comm_rank(B->comm,&b->rank);
1751: PetscSplitOwnership(B->comm,&B->m,&B->M);
1752: PetscSplitOwnership(B->comm,&B->n,&B->N);
1754: /* the information in the maps duplicates the information computed below, eventually
1755: we should remove the duplicate information that is not contained in the maps */
1756: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1757: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1759: /* build local table of row and column ownerships */
1760: PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1761: PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1762: b->cowners = b->rowners + b->size + 2;
1763: MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1764: b->rowners[0] = 0;
1765: for (i=2; i<=b->size; i++) {
1766: b->rowners[i] += b->rowners[i-1];
1767: }
1768: b->rstart = b->rowners[b->rank];
1769: b->rend = b->rowners[b->rank+1];
1770: MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1771: b->cowners[0] = 0;
1772: for (i=2; i<=b->size; i++) {
1773: b->cowners[i] += b->cowners[i-1];
1774: }
1775: b->cstart = b->cowners[b->rank];
1776: b->cend = b->cowners[b->rank+1];
1778: /* build cache for off array entries formed */
1779: MatStashCreate_Private(B->comm,1,&B->stash);
1780: b->donotstash = PETSC_FALSE;
1781: b->colmap = 0;
1782: b->garray = 0;
1783: b->roworiented = PETSC_TRUE;
1785: /* stuff used for matrix vector multiply */
1786: b->lvec = PETSC_NULL;
1787: b->Mvctx = PETSC_NULL;
1789: /* stuff for MatGetRow() */
1790: b->rowindices = 0;
1791: b->rowvalues = 0;
1792: b->getrowactive = PETSC_FALSE;
1794: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1795: "MatStoreValues_MPIAIJ",
1796: MatStoreValues_MPIAIJ);
1797: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1798: "MatRetrieveValues_MPIAIJ",
1799: MatRetrieveValues_MPIAIJ);
1800: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1801: "MatGetDiagonalBlock_MPIAIJ",
1802: MatGetDiagonalBlock_MPIAIJ);
1803: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1804: "MatIsSymmetric_MPIAIJ",
1805: MatIsSymmetric_MPIAIJ);
1806: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1807: "MatMPIAIJSetPreallocation_MPIAIJ",
1808: MatMPIAIJSetPreallocation_MPIAIJ);
1809: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1810: "MatDiagonalScaleLocal_MPIAIJ",
1811: MatDiagonalScaleLocal_MPIAIJ);
1812: return(0);
1813: }
1814: EXTERN_C_END
1816: /*MC
1817: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1819: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1820: and MATMPIAIJ otherwise.
1822: Options Database Keys:
1823: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1825: Level: beginner
1827: .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1828: M*/
1830: EXTERN_C_BEGIN
1833: int MatCreate_AIJ(Mat A) {
1834: int ierr,size;
1837: PetscObjectChangeTypeName((PetscObject)A,MATAIJ);
1838: MPI_Comm_size(A->comm,&size);
1839: if (size == 1) {
1840: MatSetType(A,MATSEQAIJ);
1841: } else {
1842: MatSetType(A,MATMPIAIJ);
1843: }
1844: return(0);
1845: }
1846: EXTERN_C_END
1850: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1851: {
1852: Mat mat;
1853: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1854: int ierr;
1857: *newmat = 0;
1858: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1859: MatSetType(mat,MATMPIAIJ);
1860: a = (Mat_MPIAIJ*)mat->data;
1861: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1862: mat->factor = matin->factor;
1863: mat->assembled = PETSC_TRUE;
1864: mat->insertmode = NOT_SET_VALUES;
1865: mat->preallocated = PETSC_TRUE;
1867: a->rstart = oldmat->rstart;
1868: a->rend = oldmat->rend;
1869: a->cstart = oldmat->cstart;
1870: a->cend = oldmat->cend;
1871: a->size = oldmat->size;
1872: a->rank = oldmat->rank;
1873: a->donotstash = oldmat->donotstash;
1874: a->roworiented = oldmat->roworiented;
1875: a->rowindices = 0;
1876: a->rowvalues = 0;
1877: a->getrowactive = PETSC_FALSE;
1879: PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1880: MatStashCreate_Private(matin->comm,1,&mat->stash);
1881: if (oldmat->colmap) {
1882: #if defined (PETSC_USE_CTABLE)
1883: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1884: #else
1885: PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1886: PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1887: PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1888: #endif
1889: } else a->colmap = 0;
1890: if (oldmat->garray) {
1891: int len;
1892: len = oldmat->B->n;
1893: PetscMalloc((len+1)*sizeof(int),&a->garray);
1894: PetscLogObjectMemory(mat,len*sizeof(int));
1895: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1896: } else a->garray = 0;
1897:
1898: VecDuplicate(oldmat->lvec,&a->lvec);
1899: PetscLogObjectParent(mat,a->lvec);
1900: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1901: PetscLogObjectParent(mat,a->Mvctx);
1902: MatDuplicate(oldmat->A,cpvalues,&a->A);
1903: PetscLogObjectParent(mat,a->A);
1904: MatDuplicate(oldmat->B,cpvalues,&a->B);
1905: PetscLogObjectParent(mat,a->B);
1906: PetscFListDuplicate(matin->qlist,&mat->qlist);
1907: *newmat = mat;
1908: return(0);
1909: }
1911: #include petscsys.h
1915: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1916: {
1917: Mat A;
1918: PetscScalar *vals,*svals;
1919: MPI_Comm comm = ((PetscObject)viewer)->comm;
1920: MPI_Status status;
1921: int i,nz,ierr,j,rstart,rend,fd;
1922: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1923: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1924: int tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1927: MPI_Comm_size(comm,&size);
1928: MPI_Comm_rank(comm,&rank);
1929: if (!rank) {
1930: PetscViewerBinaryGetDescriptor(viewer,&fd);
1931: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1932: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1933: if (header[3] < 0) {
1934: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1935: }
1936: }
1938: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1939: M = header[1]; N = header[2];
1940: /* determine ownership of all rows */
1941: m = M/size + ((M % size) > rank);
1942: PetscMalloc((size+2)*sizeof(int),&rowners);
1943: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1944: rowners[0] = 0;
1945: for (i=2; i<=size; i++) {
1946: rowners[i] += rowners[i-1];
1947: }
1948: rstart = rowners[rank];
1949: rend = rowners[rank+1];
1951: /* distribute row lengths to all processors */
1952: PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1953: offlens = ourlens + (rend-rstart);
1954: if (!rank) {
1955: PetscMalloc(M*sizeof(int),&rowlengths);
1956: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1957: PetscMalloc(size*sizeof(int),&sndcounts);
1958: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1959: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1960: PetscFree(sndcounts);
1961: } else {
1962: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1963: }
1965: if (!rank) {
1966: /* calculate the number of nonzeros on each processor */
1967: PetscMalloc(size*sizeof(int),&procsnz);
1968: PetscMemzero(procsnz,size*sizeof(int));
1969: for (i=0; i<size; i++) {
1970: for (j=rowners[i]; j< rowners[i+1]; j++) {
1971: procsnz[i] += rowlengths[j];
1972: }
1973: }
1974: PetscFree(rowlengths);
1976: /* determine max buffer needed and allocate it */
1977: maxnz = 0;
1978: for (i=0; i<size; i++) {
1979: maxnz = PetscMax(maxnz,procsnz[i]);
1980: }
1981: PetscMalloc(maxnz*sizeof(int),&cols);
1983: /* read in my part of the matrix column indices */
1984: nz = procsnz[0];
1985: PetscMalloc(nz*sizeof(int),&mycols);
1986: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1988: /* read in every one elses and ship off */
1989: for (i=1; i<size; i++) {
1990: nz = procsnz[i];
1991: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1992: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1993: }
1994: PetscFree(cols);
1995: } else {
1996: /* determine buffer space needed for message */
1997: nz = 0;
1998: for (i=0; i<m; i++) {
1999: nz += ourlens[i];
2000: }
2001: PetscMalloc((nz+1)*sizeof(int),&mycols);
2003: /* receive message of column indices*/
2004: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
2005: MPI_Get_count(&status,MPI_INT,&maxnz);
2006: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2007: }
2009: /* determine column ownership if matrix is not square */
2010: if (N != M) {
2011: n = N/size + ((N % size) > rank);
2012: MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
2013: cstart = cend - n;
2014: } else {
2015: cstart = rstart;
2016: cend = rend;
2017: n = cend - cstart;
2018: }
2020: /* loop over local rows, determining number of off diagonal entries */
2021: PetscMemzero(offlens,m*sizeof(int));
2022: jj = 0;
2023: for (i=0; i<m; i++) {
2024: for (j=0; j<ourlens[i]; j++) {
2025: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2026: jj++;
2027: }
2028: }
2030: /* create our matrix */
2031: for (i=0; i<m; i++) {
2032: ourlens[i] -= offlens[i];
2033: }
2034: MatCreate(comm,m,n,M,N,&A);
2035: MatSetType(A,type);
2036: MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);
2038: MatSetOption(A,MAT_COLUMNS_SORTED);
2039: for (i=0; i<m; i++) {
2040: ourlens[i] += offlens[i];
2041: }
2043: if (!rank) {
2044: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
2046: /* read in my part of the matrix numerical values */
2047: nz = procsnz[0];
2048: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2049:
2050: /* insert into matrix */
2051: jj = rstart;
2052: smycols = mycols;
2053: svals = vals;
2054: for (i=0; i<m; i++) {
2055: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2056: smycols += ourlens[i];
2057: svals += ourlens[i];
2058: jj++;
2059: }
2061: /* read in other processors and ship out */
2062: for (i=1; i<size; i++) {
2063: nz = procsnz[i];
2064: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2065: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2066: }
2067: PetscFree(procsnz);
2068: } else {
2069: /* receive numeric values */
2070: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2072: /* receive message of values*/
2073: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2074: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2075: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2077: /* insert into matrix */
2078: jj = rstart;
2079: smycols = mycols;
2080: svals = vals;
2081: for (i=0; i<m; i++) {
2082: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2083: smycols += ourlens[i];
2084: svals += ourlens[i];
2085: jj++;
2086: }
2087: }
2088: PetscFree(ourlens);
2089: PetscFree(vals);
2090: PetscFree(mycols);
2091: PetscFree(rowners);
2093: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2094: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2095: *newmat = A;
2096: return(0);
2097: }
2101: /*
2102: Not great since it makes two copies of the submatrix, first an SeqAIJ
2103: in local and then by concatenating the local matrices the end result.
2104: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2105: */
2106: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2107: {
2108: int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2109: int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2110: Mat *local,M,Mreuse;
2111: PetscScalar *vwork,*aa;
2112: MPI_Comm comm = mat->comm;
2113: Mat_SeqAIJ *aij;
2117: MPI_Comm_rank(comm,&rank);
2118: MPI_Comm_size(comm,&size);
2120: if (call == MAT_REUSE_MATRIX) {
2121: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2122: if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2123: local = &Mreuse;
2124: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2125: } else {
2126: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2127: Mreuse = *local;
2128: PetscFree(local);
2129: }
2131: /*
2132: m - number of local rows
2133: n - number of columns (same on all processors)
2134: rstart - first row in new global matrix generated
2135: */
2136: MatGetSize(Mreuse,&m,&n);
2137: if (call == MAT_INITIAL_MATRIX) {
2138: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2139: ii = aij->i;
2140: jj = aij->j;
2142: /*
2143: Determine the number of non-zeros in the diagonal and off-diagonal
2144: portions of the matrix in order to do correct preallocation
2145: */
2147: /* first get start and end of "diagonal" columns */
2148: if (csize == PETSC_DECIDE) {
2149: ISGetSize(isrow,&mglobal);
2150: if (mglobal == n) { /* square matrix */
2151: nlocal = m;
2152: } else {
2153: nlocal = n/size + ((n % size) > rank);
2154: }
2155: } else {
2156: nlocal = csize;
2157: }
2158: MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
2159: rstart = rend - nlocal;
2160: if (rank == size - 1 && rend != n) {
2161: SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2162: }
2164: /* next, compute all the lengths */
2165: PetscMalloc((2*m+1)*sizeof(int),&dlens);
2166: olens = dlens + m;
2167: for (i=0; i<m; i++) {
2168: jend = ii[i+1] - ii[i];
2169: olen = 0;
2170: dlen = 0;
2171: for (j=0; j<jend; j++) {
2172: if (*jj < rstart || *jj >= rend) olen++;
2173: else dlen++;
2174: jj++;
2175: }
2176: olens[i] = olen;
2177: dlens[i] = dlen;
2178: }
2179: MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
2180: PetscFree(dlens);
2181: } else {
2182: int ml,nl;
2184: M = *newmat;
2185: MatGetLocalSize(M,&ml,&nl);
2186: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2187: MatZeroEntries(M);
2188: /*
2189: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2190: rather than the slower MatSetValues().
2191: */
2192: M->was_assembled = PETSC_TRUE;
2193: M->assembled = PETSC_FALSE;
2194: }
2195: MatGetOwnershipRange(M,&rstart,&rend);
2196: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2197: ii = aij->i;
2198: jj = aij->j;
2199: aa = aij->a;
2200: for (i=0; i<m; i++) {
2201: row = rstart + i;
2202: nz = ii[i+1] - ii[i];
2203: cwork = jj; jj += nz;
2204: vwork = aa; aa += nz;
2205: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2206: }
2208: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2209: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2210: *newmat = M;
2212: /* save submatrix used in processor for next request */
2213: if (call == MAT_INITIAL_MATRIX) {
2214: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2215: PetscObjectDereference((PetscObject)Mreuse);
2216: }
2218: return(0);
2219: }
2223: /*@C
2224: MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2225: (the default parallel PETSc format). For good matrix assembly performance
2226: the user should preallocate the matrix storage by setting the parameters
2227: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2228: performance can be increased by more than a factor of 50.
2230: Collective on MPI_Comm
2232: Input Parameters:
2233: + A - the matrix
2234: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2235: (same value is used for all local rows)
2236: . d_nnz - array containing the number of nonzeros in the various rows of the
2237: DIAGONAL portion of the local submatrix (possibly different for each row)
2238: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2239: The size of this array is equal to the number of local rows, i.e 'm'.
2240: You must leave room for the diagonal entry even if it is zero.
2241: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2242: submatrix (same value is used for all local rows).
2243: - o_nnz - array containing the number of nonzeros in the various rows of the
2244: OFF-DIAGONAL portion of the local submatrix (possibly different for
2245: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2246: structure. The size of this array is equal to the number
2247: of local rows, i.e 'm'.
2249: The AIJ format (also called the Yale sparse matrix format or
2250: compressed row storage), is fully compatible with standard Fortran 77
2251: storage. That is, the stored row and column indices can begin at
2252: either one (as in Fortran) or zero. See the users manual for details.
2254: The user MUST specify either the local or global matrix dimensions
2255: (possibly both).
2257: The parallel matrix is partitioned such that the first m0 rows belong to
2258: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2259: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2261: The DIAGONAL portion of the local submatrix of a processor can be defined
2262: as the submatrix which is obtained by extraction the part corresponding
2263: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2264: first row that belongs to the processor, and r2 is the last row belonging
2265: to the this processor. This is a square mxm matrix. The remaining portion
2266: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2268: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2270: By default, this format uses inodes (identical nodes) when possible.
2271: We search for consecutive rows with the same nonzero structure, thereby
2272: reusing matrix information to achieve increased efficiency.
2274: Options Database Keys:
2275: + -mat_aij_no_inode - Do not use inodes
2276: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2277: - -mat_aij_oneindex - Internally use indexing starting at 1
2278: rather than 0. Note that when calling MatSetValues(),
2279: the user still MUST index entries starting at 0!
2281: Example usage:
2282:
2283: Consider the following 8x8 matrix with 34 non-zero values, that is
2284: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2285: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2286: as follows:
2288: .vb
2289: 1 2 0 | 0 3 0 | 0 4
2290: Proc0 0 5 6 | 7 0 0 | 8 0
2291: 9 0 10 | 11 0 0 | 12 0
2292: -------------------------------------
2293: 13 0 14 | 15 16 17 | 0 0
2294: Proc1 0 18 0 | 19 20 21 | 0 0
2295: 0 0 0 | 22 23 0 | 24 0
2296: -------------------------------------
2297: Proc2 25 26 27 | 0 0 28 | 29 0
2298: 30 0 0 | 31 32 33 | 0 34
2299: .ve
2301: This can be represented as a collection of submatrices as:
2303: .vb
2304: A B C
2305: D E F
2306: G H I
2307: .ve
2309: Where the submatrices A,B,C are owned by proc0, D,E,F are
2310: owned by proc1, G,H,I are owned by proc2.
2312: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2313: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2314: The 'M','N' parameters are 8,8, and have the same values on all procs.
2316: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2317: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2318: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2319: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2320: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2321: matrix, ans [DF] as another SeqAIJ matrix.
2323: When d_nz, o_nz parameters are specified, d_nz storage elements are
2324: allocated for every row of the local diagonal submatrix, and o_nz
2325: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2326: One way to choose d_nz and o_nz is to use the max nonzerors per local
2327: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2328: In this case, the values of d_nz,o_nz are:
2329: .vb
2330: proc0 : dnz = 2, o_nz = 2
2331: proc1 : dnz = 3, o_nz = 2
2332: proc2 : dnz = 1, o_nz = 4
2333: .ve
2334: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2335: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2336: for proc3. i.e we are using 12+15+10=37 storage locations to store
2337: 34 values.
2339: When d_nnz, o_nnz parameters are specified, the storage is specified
2340: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2341: In the above case the values for d_nnz,o_nnz are:
2342: .vb
2343: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2344: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2345: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2346: .ve
2347: Here the space allocated is sum of all the above values i.e 34, and
2348: hence pre-allocation is perfect.
2350: Level: intermediate
2352: .keywords: matrix, aij, compressed row, sparse, parallel
2354: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2355: @*/
2356: int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2357: {
2358: int ierr,(*f)(Mat,int,const int[],int,const int[]);
2361: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
2362: if (f) {
2363: (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
2364: }
2365: return(0);
2366: }
2370: /*@C
2371: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2372: (the default parallel PETSc format). For good matrix assembly performance
2373: the user should preallocate the matrix storage by setting the parameters
2374: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2375: performance can be increased by more than a factor of 50.
2377: Collective on MPI_Comm
2379: Input Parameters:
2380: + comm - MPI communicator
2381: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2382: This value should be the same as the local size used in creating the
2383: y vector for the matrix-vector product y = Ax.
2384: . n - This value should be the same as the local size used in creating the
2385: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2386: calculated if N is given) For square matrices n is almost always m.
2387: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2388: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2389: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2390: (same value is used for all local rows)
2391: . d_nnz - array containing the number of nonzeros in the various rows of the
2392: DIAGONAL portion of the local submatrix (possibly different for each row)
2393: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2394: The size of this array is equal to the number of local rows, i.e 'm'.
2395: You must leave room for the diagonal entry even if it is zero.
2396: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2397: submatrix (same value is used for all local rows).
2398: - o_nnz - array containing the number of nonzeros in the various rows of the
2399: OFF-DIAGONAL portion of the local submatrix (possibly different for
2400: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2401: structure. The size of this array is equal to the number
2402: of local rows, i.e 'm'.
2404: Output Parameter:
2405: . A - the matrix
2407: Notes:
2408: m,n,M,N parameters specify the size of the matrix, and its partitioning across
2409: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2410: storage requirements for this matrix.
2412: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
2413: processor than it must be used on all processors that share the object for
2414: that argument.
2416: The AIJ format (also called the Yale sparse matrix format or
2417: compressed row storage), is fully compatible with standard Fortran 77
2418: storage. That is, the stored row and column indices can begin at
2419: either one (as in Fortran) or zero. See the users manual for details.
2421: The user MUST specify either the local or global matrix dimensions
2422: (possibly both).
2424: The parallel matrix is partitioned such that the first m0 rows belong to
2425: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2426: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2428: The DIAGONAL portion of the local submatrix of a processor can be defined
2429: as the submatrix which is obtained by extraction the part corresponding
2430: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2431: first row that belongs to the processor, and r2 is the last row belonging
2432: to the this processor. This is a square mxm matrix. The remaining portion
2433: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2435: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2437: When calling this routine with a single process communicator, a matrix of
2438: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
2439: type of communicator, use the construction mechanism:
2440: MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2442: By default, this format uses inodes (identical nodes) when possible.
2443: We search for consecutive rows with the same nonzero structure, thereby
2444: reusing matrix information to achieve increased efficiency.
2446: Options Database Keys:
2447: + -mat_aij_no_inode - Do not use inodes
2448: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2449: - -mat_aij_oneindex - Internally use indexing starting at 1
2450: rather than 0. Note that when calling MatSetValues(),
2451: the user still MUST index entries starting at 0!
2454: Example usage:
2455:
2456: Consider the following 8x8 matrix with 34 non-zero values, that is
2457: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2458: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2459: as follows:
2461: .vb
2462: 1 2 0 | 0 3 0 | 0 4
2463: Proc0 0 5 6 | 7 0 0 | 8 0
2464: 9 0 10 | 11 0 0 | 12 0
2465: -------------------------------------
2466: 13 0 14 | 15 16 17 | 0 0
2467: Proc1 0 18 0 | 19 20 21 | 0 0
2468: 0 0 0 | 22 23 0 | 24 0
2469: -------------------------------------
2470: Proc2 25 26 27 | 0 0 28 | 29 0
2471: 30 0 0 | 31 32 33 | 0 34
2472: .ve
2474: This can be represented as a collection of submatrices as:
2476: .vb
2477: A B C
2478: D E F
2479: G H I
2480: .ve
2482: Where the submatrices A,B,C are owned by proc0, D,E,F are
2483: owned by proc1, G,H,I are owned by proc2.
2485: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2486: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2487: The 'M','N' parameters are 8,8, and have the same values on all procs.
2489: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2490: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2491: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2492: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2493: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2494: matrix, ans [DF] as another SeqAIJ matrix.
2496: When d_nz, o_nz parameters are specified, d_nz storage elements are
2497: allocated for every row of the local diagonal submatrix, and o_nz
2498: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2499: One way to choose d_nz and o_nz is to use the max nonzerors per local
2500: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2501: In this case, the values of d_nz,o_nz are:
2502: .vb
2503: proc0 : dnz = 2, o_nz = 2
2504: proc1 : dnz = 3, o_nz = 2
2505: proc2 : dnz = 1, o_nz = 4
2506: .ve
2507: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2508: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2509: for proc3. i.e we are using 12+15+10=37 storage locations to store
2510: 34 values.
2512: When d_nnz, o_nnz parameters are specified, the storage is specified
2513: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2514: In the above case the values for d_nnz,o_nnz are:
2515: .vb
2516: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2517: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2518: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2519: .ve
2520: Here the space allocated is sum of all the above values i.e 34, and
2521: hence pre-allocation is perfect.
2523: Level: intermediate
2525: .keywords: matrix, aij, compressed row, sparse, parallel
2527: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2528: @*/
2529: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2530: {
2531: int ierr,size;
2534: MatCreate(comm,m,n,M,N,A);
2535: MPI_Comm_size(comm,&size);
2536: if (size > 1) {
2537: MatSetType(*A,MATMPIAIJ);
2538: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2539: } else {
2540: MatSetType(*A,MATSEQAIJ);
2541: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2542: }
2543: return(0);
2544: }
2548: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2549: {
2550: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2552: *Ad = a->A;
2553: *Ao = a->B;
2554: *colmap = a->garray;
2555: return(0);
2556: }
2560: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2561: {
2562: int ierr,i;
2563: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2566: if (coloring->ctype == IS_COLORING_LOCAL) {
2567: ISColoringValue *allcolors,*colors;
2568: ISColoring ocoloring;
2570: /* set coloring for diagonal portion */
2571: MatSetColoring_SeqAIJ(a->A,coloring);
2573: /* set coloring for off-diagonal portion */
2574: ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2575: PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2576: for (i=0; i<a->B->n; i++) {
2577: colors[i] = allcolors[a->garray[i]];
2578: }
2579: PetscFree(allcolors);
2580: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2581: MatSetColoring_SeqAIJ(a->B,ocoloring);
2582: ISColoringDestroy(ocoloring);
2583: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2584: ISColoringValue *colors;
2585: int *larray;
2586: ISColoring ocoloring;
2588: /* set coloring for diagonal portion */
2589: PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2590: for (i=0; i<a->A->n; i++) {
2591: larray[i] = i + a->cstart;
2592: }
2593: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2594: PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);
2595: for (i=0; i<a->A->n; i++) {
2596: colors[i] = coloring->colors[larray[i]];
2597: }
2598: PetscFree(larray);
2599: ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2600: MatSetColoring_SeqAIJ(a->A,ocoloring);
2601: ISColoringDestroy(ocoloring);
2603: /* set coloring for off-diagonal portion */
2604: PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2605: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2606: PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2607: for (i=0; i<a->B->n; i++) {
2608: colors[i] = coloring->colors[larray[i]];
2609: }
2610: PetscFree(larray);
2611: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2612: MatSetColoring_SeqAIJ(a->B,ocoloring);
2613: ISColoringDestroy(ocoloring);
2614: } else {
2615: SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2616: }
2618: return(0);
2619: }
2623: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2624: {
2625: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2626: int ierr;
2629: MatSetValuesAdic_SeqAIJ(a->A,advalues);
2630: MatSetValuesAdic_SeqAIJ(a->B,advalues);
2631: return(0);
2632: }
2636: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2637: {
2638: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2639: int ierr;
2642: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2643: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2644: return(0);
2645: }
2649: /*@C
2650: MatMerge - Creates a single large PETSc matrix by concatinating sequential
2651: matrices from each processor
2653: Collective on MPI_Comm
2655: Input Parameters:
2656: + comm - the communicators the parallel matrix will live on
2657: - inmat - the input sequential matrices
2659: Output Parameter:
2660: . outmat - the parallel matrix generated
2662: Level: advanced
2664: Notes: The number of columns of the matrix in EACH of the seperate files
2665: MUST be the same.
2667: @*/
2668: int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2669: {
2670: int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2671: PetscScalar *values;
2672: PetscMap columnmap,rowmap;
2675:
2676: MatGetSize(inmat,&m,&n);
2678: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2679: PetscMapCreate(comm,&columnmap);
2680: PetscMapSetSize(columnmap,n);
2681: PetscMapSetType(columnmap,MAP_MPI);
2682: PetscMapGetLocalSize(columnmap,&n);
2683: PetscMapDestroy(columnmap);
2685: PetscMapCreate(comm,&rowmap);
2686: PetscMapSetLocalSize(rowmap,m);
2687: PetscMapSetType(rowmap,MAP_MPI);
2688: PetscMapGetLocalRange(rowmap,&rstart,0);
2689: PetscMapDestroy(rowmap);
2691: MatPreallocateInitialize(comm,m,n,dnz,onz);
2692: for (i=0;i<m;i++) {
2693: MatGetRow(inmat,i,&nnz,&indx,&values);
2694: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2695: MatRestoreRow(inmat,i,&nnz,&indx,&values);
2696: }
2697: MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);
2698: MatPreallocateFinalize(dnz,onz);
2700: for (i=0;i<m;i++) {
2701: MatGetRow(inmat,i,&nnz,&indx,&values);
2702: I = i + rstart;
2703: MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2704: MatRestoreRow(inmat,i,&nnz,&indx,&values);
2705: }
2706: MatDestroy(inmat);
2707: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2708: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
2710: return(0);
2711: }
2715: int MatFileSplit(Mat A,char *outfile)
2716: {
2717: int ierr,rank,len,m,N,i,rstart,*indx,nnz;
2718: PetscViewer out;
2719: char *name;
2720: Mat B;
2721: PetscScalar *values;
2724:
2725: MatGetLocalSize(A,&m,0);
2726: MatGetSize(A,0,&N);
2727: MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);
2728: MatGetOwnershipRange(A,&rstart,0);
2729: for (i=0;i<m;i++) {
2730: MatGetRow(A,i+rstart,&nnz,&indx,&values);
2731: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2732: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2733: }
2734: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2735: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2737: MPI_Comm_rank(A->comm,&rank);
2738: PetscStrlen(outfile,&len);
2739: PetscMalloc((len+5)*sizeof(char),&name);
2740: sprintf(name,"%s.%d",outfile,rank);
2741: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);
2742: PetscFree(name);
2743: MatView(B,out);
2744: PetscViewerDestroy(out);
2745: MatDestroy(B);
2746: return(0);
2747: }