Actual source code: mpiaij.c
1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/vec/vecimpl.h
5: #include src/inline/spops.h
7: EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8: EXTERN int DisAssemble_MPIAIJ(Mat);
9: EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10: EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11: EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12: EXTERN int MatPrintHelp_SeqAIJ(Mat);
13: EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
15: /*
16: Local utility routine that creates a mapping from the global column
17: number to the local number in the off-diagonal part of the local
18: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
19: a slightly higher hash table cost; without it it is not scalable (each processor
20: has an order N integer array but is fast to acess.
21: */
22: int CreateColmap_MPIAIJ_Private(Mat mat)
23: {
24: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
25: int n = aij->B->n,i,ierr;
28: #if defined (PETSC_USE_CTABLE)
29: PetscTableCreate(n,&aij->colmap);
30: for (i=0; i<n; i++){
31: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
32: }
33: #else
34: PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
35: PetscLogObjectMemory(mat,mat->N*sizeof(int));
36: PetscMemzero(aij->colmap,mat->N*sizeof(int));
37: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
38: #endif
39: return(0);
40: }
42: #define CHUNKSIZE 15
43: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv)
44: {
45:
46: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift;
47: rmax = aimax[row]; nrow = ailen[row];
48: col1 = col - shift;
49:
50: low = 0; high = nrow;
51: while (high-low > 5) {
52: t = (low+high)/2;
53: if (rp[t] > col) high = t;
54: else low = t;
55: }
56: for (_i=low; _i<high; _i++) {
57: if (rp[_i] > col1) break;
58: if (rp[_i] == col1) {
59: if (addv == ADD_VALUES) ap[_i] += value;
60: else ap[_i] = value;
61: goto a_noinsert;
62: }
63: }
64: if (nonew == 1) goto a_noinsert;
65: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
66: if (nrow >= rmax) {
67: /* there is no extra room in row, therefore enlarge */
68: int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j;
69: PetscScalar *new_a;
70:
71: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
72:
73: /* malloc new storage space */
74: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int);
75: ierr = PetscMalloc(len,&new_a);
76: new_j = (int*)(new_a + new_nz);
77: new_i = new_j + new_nz;
78:
79: /* copy over old data into new slots */
80: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
81: for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
82: PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));
83: len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift);
84: PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow,
85: len*sizeof(int));
86: PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));
87: PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow,
88: len*sizeof(PetscScalar));
89: /* free up old matrix storage */
90:
91: PetscFree(a->a);
92: if (!a->singlemalloc) {
93: PetscFree(a->i);
94: PetscFree(a->j);
95: }
96: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
97: a->singlemalloc = PETSC_TRUE;
98:
99: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift;
100: rmax = aimax[row] = aimax[row] + CHUNKSIZE;
101: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar)));
102: a->maxnz += CHUNKSIZE;
103: a->reallocs++;
104: }
105: N = nrow++ - 1; a->nz++;
106: /* shift up all the later entries in this row */
107: for (ii=N; ii>=_i; ii--) {
108: rp[ii+1] = rp[ii];
109: ap[ii+1] = ap[ii];
110: }
111: rp[_i] = col1;
112: ap[_i] = value;
113: a_noinsert: ;
114: ailen[row] = nrow;
115: }
117: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv)
118: {
119:
120: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift;
121: rmax = bimax[row]; nrow = bilen[row];
122: col1 = col - shift;
123:
124: low = 0; high = nrow;
125: while (high-low > 5) {
126: t = (low+high)/2;
127: if (rp[t] > col) high = t;
128: else low = t;
129: }
130: for (_i=low; _i<high; _i++) {
131: if (rp[_i] > col1) break;
132: if (rp[_i] == col1) {
133: if (addv == ADD_VALUES) ap[_i] += value;
134: else ap[_i] = value;
135: goto b_noinsert;
136: }
137: }
138: if (nonew == 1) goto b_noinsert;
139: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
140: if (nrow >= rmax) {
141: /* there is no extra room in row, therefore enlarge */
142: int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j;
143: PetscScalar *new_a;
144:
145: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
146:
147: /* malloc new storage space */
148: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int);
149: ierr = PetscMalloc(len,&new_a);
150: new_j = (int*)(new_a + new_nz);
151: new_i = new_j + new_nz;
152:
153: /* copy over old data into new slots */
154: for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];}
155: for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;}
156: PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));
157: len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift);
158: PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow,
159: len*sizeof(int));
160: PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));
161: PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow,
162: len*sizeof(PetscScalar));
163: /* free up old matrix storage */
164:
165: PetscFree(b->a);
166: if (!b->singlemalloc) {
167: PetscFree(b->i);
168: PetscFree(b->j);
169: }
170: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;
171: b->singlemalloc = PETSC_TRUE;
172:
173: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift;
174: rmax = bimax[row] = bimax[row] + CHUNKSIZE;
175: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar)));
176: b->maxnz += CHUNKSIZE;
177: b->reallocs++;
178: }
179: N = nrow++ - 1; b->nz++;
180: /* shift up all the later entries in this row */
181: for (ii=N; ii>=_i; ii--) {
182: rp[ii+1] = rp[ii];
183: ap[ii+1] = ap[ii];
184: }
185: rp[_i] = col1;
186: ap[_i] = value;
187: b_noinsert: ;
188: bilen[row] = nrow;
189: }
191: int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
192: {
193: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
194: PetscScalar value;
195: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
196: int cstart = aij->cstart,cend = aij->cend,row,col;
197: PetscTruth roworiented = aij->roworiented;
199: /* Some Variables required in the macro */
200: Mat A = aij->A;
201: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
202: int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
203: PetscScalar *aa = a->a;
204: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
205: Mat B = aij->B;
206: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
207: int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
208: PetscScalar *ba = b->a;
210: int *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
211: int nonew = a->nonew,shift = a->indexshift;
212: PetscScalar *ap;
215: for (i=0; i<m; i++) {
216: if (im[i] < 0) continue;
217: #if defined(PETSC_USE_BOPT_g)
218: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
219: #endif
220: if (im[i] >= rstart && im[i] < rend) {
221: row = im[i] - rstart;
222: for (j=0; j<n; j++) {
223: if (in[j] >= cstart && in[j] < cend){
224: col = in[j] - cstart;
225: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226: if (ignorezeroentries && value == 0.0) continue;
227: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
228: /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
229: } else if (in[j] < 0) continue;
230: #if defined(PETSC_USE_BOPT_g)
231: else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
232: #endif
233: else {
234: if (mat->was_assembled) {
235: if (!aij->colmap) {
236: CreateColmap_MPIAIJ_Private(mat);
237: }
238: #if defined (PETSC_USE_CTABLE)
239: PetscTableFind(aij->colmap,in[j]+1,&col);
240: col--;
241: #else
242: col = aij->colmap[in[j]] - 1;
243: #endif
244: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245: DisAssemble_MPIAIJ(mat);
246: col = in[j];
247: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248: B = aij->B;
249: b = (Mat_SeqAIJ*)B->data;
250: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251: ba = b->a;
252: }
253: } else col = in[j];
254: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
255: if (ignorezeroentries && value == 0.0) continue;
256: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
257: /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
258: }
259: }
260: } else {
261: if (!aij->donotstash) {
262: if (roworiented) {
263: if (ignorezeroentries && v[i*n] == 0.0) continue;
264: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
265: } else {
266: if (ignorezeroentries && v[i] == 0.0) continue;
267: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
268: }
269: }
270: }
271: }
272: return(0);
273: }
275: int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
276: {
277: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
278: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
279: int cstart = aij->cstart,cend = aij->cend,row,col;
282: for (i=0; i<m; i++) {
283: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
284: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
285: if (idxm[i] >= rstart && idxm[i] < rend) {
286: row = idxm[i] - rstart;
287: for (j=0; j<n; j++) {
288: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
289: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
290: if (idxn[j] >= cstart && idxn[j] < cend){
291: col = idxn[j] - cstart;
292: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
293: } else {
294: if (!aij->colmap) {
295: CreateColmap_MPIAIJ_Private(mat);
296: }
297: #if defined (PETSC_USE_CTABLE)
298: PetscTableFind(aij->colmap,idxn[j]+1,&col);
299: col --;
300: #else
301: col = aij->colmap[idxn[j]] - 1;
302: #endif
303: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
304: else {
305: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
306: }
307: }
308: }
309: } else {
310: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
311: }
312: }
313: return(0);
314: }
316: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317: {
318: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
319: int ierr,nstash,reallocs;
320: InsertMode addv;
323: if (aij->donotstash) {
324: return(0);
325: }
327: /* make sure all processors are either in INSERTMODE or ADDMODE */
328: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
329: if (addv == (ADD_VALUES|INSERT_VALUES)) {
330: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331: }
332: mat->insertmode = addv; /* in case this processor had no cache */
334: MatStashScatterBegin_Private(&mat->stash,aij->rowners);
335: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
336: PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
337: return(0);
338: }
341: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
342: {
343: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
344: int i,j,rstart,ncols,n,ierr,flg;
345: int *row,*col,other_disassembled;
346: PetscScalar *val;
347: InsertMode addv = mat->insertmode;
348: #if defined(PETSC_HAVE_SUPERLUDIST)
349: PetscTruth flag;
350: #endif
353: if (!aij->donotstash) {
354: while (1) {
355: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
356: if (!flg) break;
358: for (i=0; i<n;) {
359: /* Now identify the consecutive vals belonging to the same row */
360: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361: if (j < n) ncols = j-i;
362: else ncols = n-i;
363: /* Now assemble all these values with a single function call */
364: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
365: i = j;
366: }
367: }
368: MatStashScatterEnd_Private(&mat->stash);
369: }
370:
371: MatAssemblyBegin(aij->A,mode);
372: MatAssemblyEnd(aij->A,mode);
374: /* determine if any processor has disassembled, if so we must
375: also disassemble ourselfs, in order that we may reassemble. */
376: /*
377: if nonzero structure of submatrix B cannot change then we know that
378: no processor disassembled thus we can skip this stuff
379: */
380: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
381: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
382: if (mat->was_assembled && !other_disassembled) {
383: DisAssemble_MPIAIJ(mat);
384: }
385: }
387: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388: MatSetUpMultiply_MPIAIJ(mat);
389: }
390: MatAssemblyBegin(aij->B,mode);
391: MatAssemblyEnd(aij->B,mode);
393: if (aij->rowvalues) {
394: PetscFree(aij->rowvalues);
395: aij->rowvalues = 0;
396: }
397: #if defined(PETSC_HAVE_SUPERLUDIST)
398: PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);
399: if (flag) { MatUseSuperLU_DIST_MPIAIJ(mat); }
400: #endif
401: return(0);
402: }
404: int MatZeroEntries_MPIAIJ(Mat A)
405: {
406: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
407: int ierr;
410: MatZeroEntries(l->A);
411: MatZeroEntries(l->B);
412: return(0);
413: }
415: int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
416: {
417: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
418: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
419: int *procs,*nprocs,j,idx,nsends,*work,row;
420: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
421: int *rvalues,tag = A->tag,count,base,slen,n,*source;
422: int *lens,imdex,*lrows,*values,rstart=l->rstart;
423: MPI_Comm comm = A->comm;
424: MPI_Request *send_waits,*recv_waits;
425: MPI_Status recv_status,*send_status;
426: IS istmp;
427: PetscTruth found;
430: ISGetLocalSize(is,&N);
431: ISGetIndices(is,&rows);
433: /* first count number of contributors to each processor */
434: PetscMalloc(2*size*sizeof(int),&nprocs);
435: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
436: procs = nprocs + size;
437: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
438: for (i=0; i<N; i++) {
439: idx = rows[i];
440: found = PETSC_FALSE;
441: for (j=0; j<size; j++) {
442: if (idx >= owners[j] && idx < owners[j+1]) {
443: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
444: }
445: }
446: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
447: }
448: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
450: /* inform other processors of number of messages and max length*/
451: PetscMalloc(2*size*sizeof(int),&work);
452: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
453: nrecvs = work[size+rank];
454: nmax = work[rank];
455: ierr = PetscFree(work);
457: /* post receives: */
458: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
459: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
460: for (i=0; i<nrecvs; i++) {
461: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
462: }
464: /* do sends:
465: 1) starts[i] gives the starting index in svalues for stuff going to
466: the ith processor
467: */
468: PetscMalloc((N+1)*sizeof(int),&svalues);
469: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
470: PetscMalloc((size+1)*sizeof(int),&starts);
471: starts[0] = 0;
472: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
473: for (i=0; i<N; i++) {
474: svalues[starts[owner[i]]++] = rows[i];
475: }
476: ISRestoreIndices(is,&rows);
478: starts[0] = 0;
479: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
480: count = 0;
481: for (i=0; i<size; i++) {
482: if (procs[i]) {
483: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
484: }
485: }
486: PetscFree(starts);
488: base = owners[rank];
490: /* wait on receives */
491: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
492: source = lens + nrecvs;
493: count = nrecvs; slen = 0;
494: while (count) {
495: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
496: /* unpack receives into our local space */
497: MPI_Get_count(&recv_status,MPI_INT,&n);
498: source[imdex] = recv_status.MPI_SOURCE;
499: lens[imdex] = n;
500: slen += n;
501: count--;
502: }
503: PetscFree(recv_waits);
504:
505: /* move the data into the send scatter */
506: PetscMalloc((slen+1)*sizeof(int),&lrows);
507: count = 0;
508: for (i=0; i<nrecvs; i++) {
509: values = rvalues + i*nmax;
510: for (j=0; j<lens[i]; j++) {
511: lrows[count++] = values[j] - base;
512: }
513: }
514: PetscFree(rvalues);
515: PetscFree(lens);
516: PetscFree(owner);
517: PetscFree(nprocs);
518:
519: /* actually zap the local rows */
520: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
521: PetscLogObjectParent(A,istmp);
523: /*
524: Zero the required rows. If the "diagonal block" of the matrix
525: is square and the user wishes to set the diagonal we use seperate
526: code so that MatSetValues() is not called for each diagonal allocating
527: new memory, thus calling lots of mallocs and slowing things down.
529: Contributed by: Mathew Knepley
530: */
531: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532: MatZeroRows(l->B,istmp,0);
533: if (diag && (l->A->M == l->A->N)) {
534: ierr = MatZeroRows(l->A,istmp,diag);
535: } else if (diag) {
536: MatZeroRows(l->A,istmp,0);
537: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat optionsn
539: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540: }
541: for (i = 0; i < slen; i++) {
542: row = lrows[i] + rstart;
543: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
544: }
545: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
546: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
547: } else {
548: MatZeroRows(l->A,istmp,0);
549: }
550: ISDestroy(istmp);
551: PetscFree(lrows);
553: /* wait on sends */
554: if (nsends) {
555: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
556: MPI_Waitall(nsends,send_waits,send_status);
557: PetscFree(send_status);
558: }
559: PetscFree(send_waits);
560: PetscFree(svalues);
562: return(0);
563: }
565: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
566: {
567: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
568: int ierr,nt;
571: VecGetLocalSize(xx,&nt);
572: if (nt != A->n) {
573: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
574: }
575: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
576: (*a->A->ops->mult)(a->A,xx,yy);
577: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
578: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
579: return(0);
580: }
582: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
583: {
584: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
585: int ierr;
588: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
589: (*a->A->ops->multadd)(a->A,xx,yy,zz);
590: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
591: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
592: return(0);
593: }
595: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
596: {
597: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
598: int ierr;
601: /* do nondiagonal part */
602: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
603: /* send it on its way */
604: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
605: /* do local part */
606: (*a->A->ops->multtranspose)(a->A,xx,yy);
607: /* receive remote parts: note this assumes the values are not actually */
608: /* inserted in yy until the next line, which is true for my implementation*/
609: /* but is not perhaps always true. */
610: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
611: return(0);
612: }
614: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
615: {
616: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617: int ierr;
620: /* do nondiagonal part */
621: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
622: /* send it on its way */
623: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
624: /* do local part */
625: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
626: /* receive remote parts: note this assumes the values are not actually */
627: /* inserted in yy until the next line, which is true for my implementation*/
628: /* but is not perhaps always true. */
629: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
630: return(0);
631: }
633: /*
634: This only works correctly for square matrices where the subblock A->A is the
635: diagonal block
636: */
637: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
638: {
639: int ierr;
640: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
643: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
644: if (a->rstart != a->cstart || a->rend != a->cend) {
645: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
646: }
647: MatGetDiagonal(a->A,v);
648: return(0);
649: }
651: int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
652: {
653: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
654: int ierr;
657: MatScale(aa,a->A);
658: MatScale(aa,a->B);
659: return(0);
660: }
662: int MatDestroy_MPIAIJ(Mat mat)
663: {
664: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
665: int ierr;
668: #if defined(PETSC_USE_LOG)
669: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
670: #endif
671: MatStashDestroy_Private(&mat->stash);
672: PetscFree(aij->rowners);
673: MatDestroy(aij->A);
674: MatDestroy(aij->B);
675: #if defined (PETSC_USE_CTABLE)
676: if (aij->colmap) {PetscTableDelete(aij->colmap);}
677: #else
678: if (aij->colmap) {PetscFree(aij->colmap);}
679: #endif
680: if (aij->garray) {PetscFree(aij->garray);}
681: if (aij->lvec) {VecDestroy(aij->lvec);}
682: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
683: if (aij->rowvalues) {PetscFree(aij->rowvalues);}
684: PetscFree(aij);
685: return(0);
686: }
688: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
689: {
690: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
691: Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
692: int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
693: PetscTruth isdraw,isascii,flg;
694: PetscViewer sviewer;
695: PetscViewerFormat format;
698: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
699: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
700: if (isascii) {
701: PetscViewerGetFormat(viewer,&format);
702: if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
703: MatInfo info;
704: MPI_Comm_rank(mat->comm,&rank);
705: MatGetInfo(mat,MAT_LOCAL,&info);
706: PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
707: if (flg) {
708: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routinesn",
709: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
710: } else {
711: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routinesn",
712: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
713: }
714: MatGetInfo(aij->A,MAT_LOCAL,&info);
715: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used);
716: MatGetInfo(aij->B,MAT_LOCAL,&info);
717: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used);
718: PetscViewerFlush(viewer);
719: VecScatterView(aij->Mvctx,viewer);
720: return(0);
721: } else if (format == PETSC_VIEWER_ASCII_INFO) {
722: return(0);
723: }
724: } else if (isdraw) {
725: PetscDraw draw;
726: PetscTruth isnull;
727: PetscViewerDrawGetDraw(viewer,0,&draw);
728: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
729: }
731: if (size == 1) {
732: PetscObjectSetName((PetscObject)aij->A,mat->name);
733: MatView(aij->A,viewer);
734: } else {
735: /* assemble the entire matrix onto first processor. */
736: Mat A;
737: Mat_SeqAIJ *Aloc;
738: int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
739: PetscScalar *a;
741: if (!rank) {
742: MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
743: } else {
744: MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
745: }
746: PetscLogObjectParent(mat,A);
748: /* copy over the A part */
749: Aloc = (Mat_SeqAIJ*)aij->A->data;
750: m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
751: row = aij->rstart;
752: for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
753: for (i=0; i<m; i++) {
754: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
755: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
756: }
757: aj = Aloc->j;
758: for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
760: /* copy over the B part */
761: Aloc = (Mat_SeqAIJ*)aij->B->data;
762: m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
763: row = aij->rstart;
764: PetscMalloc((ai[m]+1)*sizeof(int),&cols);
765: ct = cols;
766: for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
767: for (i=0; i<m; i++) {
768: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
769: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
770: }
771: PetscFree(ct);
772: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
773: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
774: /*
775: Everyone has to call to draw the matrix since the graphics waits are
776: synchronized across all processors that share the PetscDraw object
777: */
778: PetscViewerGetSingleton(viewer,&sviewer);
779: if (!rank) {
780: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
781: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
782: }
783: PetscViewerRestoreSingleton(viewer,&sviewer);
784: MatDestroy(A);
785: }
786: return(0);
787: }
789: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
790: {
791: int ierr;
792: PetscTruth isascii,isdraw,issocket,isbinary;
793:
795: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
796: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
797: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
798: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
799: if (isascii || isdraw || isbinary || issocket) {
800: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
801: } else {
802: SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
803: }
804: return(0);
805: }
809: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
810: {
811: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
812: int ierr;
813: Vec bb1;
814: PetscScalar mone=-1.0;
817: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
819: VecDuplicate(bb,&bb1);
821: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
822: if (flag & SOR_ZERO_INITIAL_GUESS) {
823: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
824: its--;
825: }
826:
827: while (its--) {
828: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
829: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
831: /* update rhs: bb1 = bb - B*x */
832: VecScale(&mone,mat->lvec);
833: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
835: /* local sweep */
836: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);
837:
838: }
839: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
840: if (flag & SOR_ZERO_INITIAL_GUESS) {
841: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
842: its--;
843: }
844: while (its--) {
845: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
846: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
848: /* update rhs: bb1 = bb - B*x */
849: VecScale(&mone,mat->lvec);
850: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
852: /* local sweep */
853: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
854:
855: }
856: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
857: if (flag & SOR_ZERO_INITIAL_GUESS) {
858: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
859: its--;
860: }
861: while (its--) {
862: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
863: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
865: /* update rhs: bb1 = bb - B*x */
866: VecScale(&mone,mat->lvec);
867: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
869: /* local sweep */
870: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
871:
872: }
873: } else {
874: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
875: }
877: VecDestroy(bb1);
878: return(0);
879: }
881: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
882: {
883: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
884: Mat A = mat->A,B = mat->B;
885: int ierr;
886: PetscReal isend[5],irecv[5];
889: info->block_size = 1.0;
890: MatGetInfo(A,MAT_LOCAL,info);
891: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
892: isend[3] = info->memory; isend[4] = info->mallocs;
893: MatGetInfo(B,MAT_LOCAL,info);
894: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
895: isend[3] += info->memory; isend[4] += info->mallocs;
896: if (flag == MAT_LOCAL) {
897: info->nz_used = isend[0];
898: info->nz_allocated = isend[1];
899: info->nz_unneeded = isend[2];
900: info->memory = isend[3];
901: info->mallocs = isend[4];
902: } else if (flag == MAT_GLOBAL_MAX) {
903: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
904: info->nz_used = irecv[0];
905: info->nz_allocated = irecv[1];
906: info->nz_unneeded = irecv[2];
907: info->memory = irecv[3];
908: info->mallocs = irecv[4];
909: } else if (flag == MAT_GLOBAL_SUM) {
910: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
911: info->nz_used = irecv[0];
912: info->nz_allocated = irecv[1];
913: info->nz_unneeded = irecv[2];
914: info->memory = irecv[3];
915: info->mallocs = irecv[4];
916: }
917: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
918: info->fill_ratio_needed = 0;
919: info->factor_mallocs = 0;
920: info->rows_global = (double)matin->M;
921: info->columns_global = (double)matin->N;
922: info->rows_local = (double)matin->m;
923: info->columns_local = (double)matin->N;
925: return(0);
926: }
928: int MatSetOption_MPIAIJ(Mat A,MatOption op)
929: {
930: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
931: int ierr;
934: switch (op) {
935: case MAT_NO_NEW_NONZERO_LOCATIONS:
936: case MAT_YES_NEW_NONZERO_LOCATIONS:
937: case MAT_COLUMNS_UNSORTED:
938: case MAT_COLUMNS_SORTED:
939: case MAT_NEW_NONZERO_ALLOCATION_ERR:
940: case MAT_KEEP_ZEROED_ROWS:
941: case MAT_NEW_NONZERO_LOCATION_ERR:
942: case MAT_USE_INODES:
943: case MAT_DO_NOT_USE_INODES:
944: case MAT_IGNORE_ZERO_ENTRIES:
945: MatSetOption(a->A,op);
946: MatSetOption(a->B,op);
947: break;
948: case MAT_ROW_ORIENTED:
949: a->roworiented = PETSC_TRUE;
950: MatSetOption(a->A,op);
951: MatSetOption(a->B,op);
952: break;
953: case MAT_ROWS_SORTED:
954: case MAT_ROWS_UNSORTED:
955: case MAT_YES_NEW_DIAGONALS:
956: case MAT_USE_SINGLE_PRECISION_SOLVES:
957: PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignoredn");
958: break;
959: case MAT_COLUMN_ORIENTED:
960: a->roworiented = PETSC_FALSE;
961: MatSetOption(a->A,op);
962: MatSetOption(a->B,op);
963: break;
964: case MAT_IGNORE_OFF_PROC_ENTRIES:
965: a->donotstash = PETSC_TRUE;
966: break;
967: case MAT_NO_NEW_DIAGONALS:
968: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
969: default:
970: SETERRQ(PETSC_ERR_SUP,"unknown option");
971: }
972: return(0);
973: }
975: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
976: {
977: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
978: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
979: int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
980: int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
981: int *cmap,*idx_p;
984: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
985: mat->getrowactive = PETSC_TRUE;
987: if (!mat->rowvalues && (idx || v)) {
988: /*
989: allocate enough space to hold information from the longest row.
990: */
991: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
992: int max = 1,tmp;
993: for (i=0; i<matin->m; i++) {
994: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
995: if (max < tmp) { max = tmp; }
996: }
997: PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
998: mat->rowindices = (int*)(mat->rowvalues + max);
999: }
1001: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1002: lrow = row - rstart;
1004: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1005: if (!v) {pvA = 0; pvB = 0;}
1006: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1007: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1008: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1009: nztot = nzA + nzB;
1011: cmap = mat->garray;
1012: if (v || idx) {
1013: if (nztot) {
1014: /* Sort by increasing column numbers, assuming A and B already sorted */
1015: int imark = -1;
1016: if (v) {
1017: *v = v_p = mat->rowvalues;
1018: for (i=0; i<nzB; i++) {
1019: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1020: else break;
1021: }
1022: imark = i;
1023: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1024: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1025: }
1026: if (idx) {
1027: *idx = idx_p = mat->rowindices;
1028: if (imark > -1) {
1029: for (i=0; i<imark; i++) {
1030: idx_p[i] = cmap[cworkB[i]];
1031: }
1032: } else {
1033: for (i=0; i<nzB; i++) {
1034: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1035: else break;
1036: }
1037: imark = i;
1038: }
1039: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1040: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1041: }
1042: } else {
1043: if (idx) *idx = 0;
1044: if (v) *v = 0;
1045: }
1046: }
1047: *nz = nztot;
1048: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1049: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1050: return(0);
1051: }
1053: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1054: {
1055: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1058: if (aij->getrowactive == PETSC_FALSE) {
1059: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1060: }
1061: aij->getrowactive = PETSC_FALSE;
1062: return(0);
1063: }
1065: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1066: {
1067: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1068: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1069: int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1070: PetscReal sum = 0.0;
1071: PetscScalar *v;
1074: if (aij->size == 1) {
1075: MatNorm(aij->A,type,norm);
1076: } else {
1077: if (type == NORM_FROBENIUS) {
1078: v = amat->a;
1079: for (i=0; i<amat->nz; i++) {
1080: #if defined(PETSC_USE_COMPLEX)
1081: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1082: #else
1083: sum += (*v)*(*v); v++;
1084: #endif
1085: }
1086: v = bmat->a;
1087: for (i=0; i<bmat->nz; i++) {
1088: #if defined(PETSC_USE_COMPLEX)
1089: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1090: #else
1091: sum += (*v)*(*v); v++;
1092: #endif
1093: }
1094: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1095: *norm = sqrt(*norm);
1096: } else if (type == NORM_1) { /* max column norm */
1097: PetscReal *tmp,*tmp2;
1098: int *jj,*garray = aij->garray;
1099: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1100: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1101: PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1102: *norm = 0.0;
1103: v = amat->a; jj = amat->j;
1104: for (j=0; j<amat->nz; j++) {
1105: tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++;
1106: }
1107: v = bmat->a; jj = bmat->j;
1108: for (j=0; j<bmat->nz; j++) {
1109: tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1110: }
1111: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1112: for (j=0; j<mat->N; j++) {
1113: if (tmp2[j] > *norm) *norm = tmp2[j];
1114: }
1115: PetscFree(tmp);
1116: PetscFree(tmp2);
1117: } else if (type == NORM_INFINITY) { /* max row norm */
1118: PetscReal ntemp = 0.0;
1119: for (j=0; j<aij->A->m; j++) {
1120: v = amat->a + amat->i[j] + shift;
1121: sum = 0.0;
1122: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1123: sum += PetscAbsScalar(*v); v++;
1124: }
1125: v = bmat->a + bmat->i[j] + shift;
1126: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1127: sum += PetscAbsScalar(*v); v++;
1128: }
1129: if (sum > ntemp) ntemp = sum;
1130: }
1131: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1132: } else {
1133: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1134: }
1135: }
1136: return(0);
1137: }
1139: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1140: {
1141: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1142: Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1143: int ierr,shift = Aloc->indexshift;
1144: int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1145: Mat B;
1146: PetscScalar *array;
1149: if (!matout && M != N) {
1150: SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1151: }
1153: MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1155: /* copy over the A part */
1156: Aloc = (Mat_SeqAIJ*)a->A->data;
1157: m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1158: row = a->rstart;
1159: for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1160: for (i=0; i<m; i++) {
1161: MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1162: row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1163: }
1164: aj = Aloc->j;
1165: for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1167: /* copy over the B part */
1168: Aloc = (Mat_SeqAIJ*)a->B->data;
1169: m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1170: row = a->rstart;
1171: PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);
1172: ct = cols;
1173: for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1174: for (i=0; i<m; i++) {
1175: MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1176: row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1177: }
1178: PetscFree(ct);
1179: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1180: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1181: if (matout) {
1182: *matout = B;
1183: } else {
1184: MatHeaderCopy(A,B);
1185: }
1186: return(0);
1187: }
1189: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1190: {
1191: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1192: Mat a = aij->A,b = aij->B;
1193: int ierr,s1,s2,s3;
1196: MatGetLocalSize(mat,&s2,&s3);
1197: if (rr) {
1198: VecGetLocalSize(rr,&s1);
1199: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1200: /* Overlap communication with computation. */
1201: VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1202: }
1203: if (ll) {
1204: VecGetLocalSize(ll,&s1);
1205: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1206: (*b->ops->diagonalscale)(b,ll,0);
1207: }
1208: /* scale the diagonal block */
1209: (*a->ops->diagonalscale)(a,ll,rr);
1211: if (rr) {
1212: /* Do a scatter end and then right scale the off-diagonal block */
1213: VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1214: (*b->ops->diagonalscale)(b,0,aij->lvec);
1215: }
1216:
1217: return(0);
1218: }
1221: int MatPrintHelp_MPIAIJ(Mat A)
1222: {
1223: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1224: int ierr;
1227: if (!a->rank) {
1228: MatPrintHelp_SeqAIJ(a->A);
1229: }
1230: return(0);
1231: }
1233: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1234: {
1236: *bs = 1;
1237: return(0);
1238: }
1239: int MatSetUnfactored_MPIAIJ(Mat A)
1240: {
1241: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1242: int ierr;
1245: MatSetUnfactored(a->A);
1246: return(0);
1247: }
1249: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1250: {
1251: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1252: Mat a,b,c,d;
1253: PetscTruth flg;
1254: int ierr;
1257: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1258: if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1259: a = matA->A; b = matA->B;
1260: c = matB->A; d = matB->B;
1262: MatEqual(a,c,&flg);
1263: if (flg == PETSC_TRUE) {
1264: MatEqual(b,d,&flg);
1265: }
1266: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1267: return(0);
1268: }
1270: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1271: {
1272: int ierr;
1273: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1274: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1275: PetscTruth flg;
1278: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1279: if (str != SAME_NONZERO_PATTERN || !flg) {
1280: /* because of the column compression in the off-processor part of the matrix a->B,
1281: the number of columns in a->B and b->B may be different, hence we cannot call
1282: the MatCopy() directly on the two parts. If need be, we can provide a more
1283: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1284: then copying the submatrices */
1285: MatCopy_Basic(A,B,str);
1286: } else {
1287: MatCopy(a->A,b->A,str);
1288: MatCopy(a->B,b->B,str);
1289: }
1290: return(0);
1291: }
1293: int MatSetUpPreallocation_MPIAIJ(Mat A)
1294: {
1295: int ierr;
1298: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1299: return(0);
1300: }
1302: EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1303: EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1304: EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1305: EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1306: EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1307: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1308: EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1309: #endif
1311: #include petscblaslapack.h
1313: int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1314: {
1315: int ierr,one;
1316: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1317: Mat_SeqAIJ *x,*y;
1320: if (str == SAME_NONZERO_PATTERN) {
1321: x = (Mat_SeqAIJ *)xx->A->data;
1322: y = (Mat_SeqAIJ *)yy->A->data;
1323: BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1324: x = (Mat_SeqAIJ *)xx->B->data;
1325: y = (Mat_SeqAIJ *)yy->B->data;
1326: BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1327: } else {
1328: MatAXPY_Basic(a,X,Y,str);
1329: }
1330: return(0);
1331: }
1333: /* -------------------------------------------------------------------*/
1334: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1335: MatGetRow_MPIAIJ,
1336: MatRestoreRow_MPIAIJ,
1337: MatMult_MPIAIJ,
1338: MatMultAdd_MPIAIJ,
1339: MatMultTranspose_MPIAIJ,
1340: MatMultTransposeAdd_MPIAIJ,
1341: 0,
1342: 0,
1343: 0,
1344: 0,
1345: 0,
1346: 0,
1347: MatRelax_MPIAIJ,
1348: MatTranspose_MPIAIJ,
1349: MatGetInfo_MPIAIJ,
1350: MatEqual_MPIAIJ,
1351: MatGetDiagonal_MPIAIJ,
1352: MatDiagonalScale_MPIAIJ,
1353: MatNorm_MPIAIJ,
1354: MatAssemblyBegin_MPIAIJ,
1355: MatAssemblyEnd_MPIAIJ,
1356: 0,
1357: MatSetOption_MPIAIJ,
1358: MatZeroEntries_MPIAIJ,
1359: MatZeroRows_MPIAIJ,
1360: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1361: MatLUFactorSymbolic_MPIAIJ_TFS,
1362: #else
1363: 0,
1364: #endif
1365: 0,
1366: 0,
1367: 0,
1368: MatSetUpPreallocation_MPIAIJ,
1369: 0,
1370: 0,
1371: 0,
1372: 0,
1373: MatDuplicate_MPIAIJ,
1374: 0,
1375: 0,
1376: 0,
1377: 0,
1378: MatAXPY_MPIAIJ,
1379: MatGetSubMatrices_MPIAIJ,
1380: MatIncreaseOverlap_MPIAIJ,
1381: MatGetValues_MPIAIJ,
1382: MatCopy_MPIAIJ,
1383: MatPrintHelp_MPIAIJ,
1384: MatScale_MPIAIJ,
1385: 0,
1386: 0,
1387: 0,
1388: MatGetBlockSize_MPIAIJ,
1389: 0,
1390: 0,
1391: 0,
1392: 0,
1393: MatFDColoringCreate_MPIAIJ,
1394: 0,
1395: MatSetUnfactored_MPIAIJ,
1396: 0,
1397: 0,
1398: MatGetSubMatrix_MPIAIJ,
1399: MatDestroy_MPIAIJ,
1400: MatView_MPIAIJ,
1401: MatGetPetscMaps_Petsc,
1402: 0,
1403: 0,
1404: 0,
1405: 0,
1406: 0,
1407: 0,
1408: 0,
1409: 0,
1410: MatSetColoring_MPIAIJ,
1411: MatSetValuesAdic_MPIAIJ,
1412: MatSetValuesAdifor_MPIAIJ
1413: };
1415: /* ----------------------------------------------------------------------------------------*/
1417: EXTERN_C_BEGIN
1418: int MatStoreValues_MPIAIJ(Mat mat)
1419: {
1420: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1421: int ierr;
1424: MatStoreValues(aij->A);
1425: MatStoreValues(aij->B);
1426: return(0);
1427: }
1428: EXTERN_C_END
1430: EXTERN_C_BEGIN
1431: int MatRetrieveValues_MPIAIJ(Mat mat)
1432: {
1433: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1434: int ierr;
1437: MatRetrieveValues(aij->A);
1438: MatRetrieveValues(aij->B);
1439: return(0);
1440: }
1441: EXTERN_C_END
1443: #include petscpc.h
1444: EXTERN_C_BEGIN
1445: EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1446: EXTERN_C_END
1448: EXTERN_C_BEGIN
1449: int MatCreate_MPIAIJ(Mat B)
1450: {
1451: Mat_MPIAIJ *b;
1452: int ierr,i,size;
1453: #if defined(PETSC_HAVE_SUPERLUDIST)
1454: PetscTruth flg;
1455: #endif
1459: MPI_Comm_size(B->comm,&size);
1461: ierr = PetscNew(Mat_MPIAIJ,&b);
1462: B->data = (void*)b;
1463: ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));
1464: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1465: B->factor = 0;
1466: B->assembled = PETSC_FALSE;
1467: B->mapping = 0;
1469: B->insertmode = NOT_SET_VALUES;
1470: b->size = size;
1471: MPI_Comm_rank(B->comm,&b->rank);
1473: PetscSplitOwnership(B->comm,&B->m,&B->M);
1474: PetscSplitOwnership(B->comm,&B->n,&B->N);
1476: /* the information in the maps duplicates the information computed below, eventually
1477: we should remove the duplicate information that is not contained in the maps */
1478: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1479: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1481: /* build local table of row and column ownerships */
1482: PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1483: PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1484: b->cowners = b->rowners + b->size + 2;
1485: MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1486: b->rowners[0] = 0;
1487: for (i=2; i<=b->size; i++) {
1488: b->rowners[i] += b->rowners[i-1];
1489: }
1490: b->rstart = b->rowners[b->rank];
1491: b->rend = b->rowners[b->rank+1];
1492: MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1493: b->cowners[0] = 0;
1494: for (i=2; i<=b->size; i++) {
1495: b->cowners[i] += b->cowners[i-1];
1496: }
1497: b->cstart = b->cowners[b->rank];
1498: b->cend = b->cowners[b->rank+1];
1500: /* build cache for off array entries formed */
1501: MatStashCreate_Private(B->comm,1,&B->stash);
1502: b->donotstash = PETSC_FALSE;
1503: b->colmap = 0;
1504: b->garray = 0;
1505: b->roworiented = PETSC_TRUE;
1507: /* stuff used for matrix vector multiply */
1508: b->lvec = PETSC_NULL;
1509: b->Mvctx = PETSC_NULL;
1511: /* stuff for MatGetRow() */
1512: b->rowindices = 0;
1513: b->rowvalues = 0;
1514: b->getrowactive = PETSC_FALSE;
1516: #if defined(PETSC_HAVE_SUPERLUDIST)
1517: PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);
1518: if (flg) { MatUseSuperLU_DIST_MPIAIJ(B); }
1519: #endif
1521: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1522: "MatStoreValues_MPIAIJ",
1523: MatStoreValues_MPIAIJ);
1524: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1525: "MatRetrieveValues_MPIAIJ",
1526: MatRetrieveValues_MPIAIJ);
1527: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1528: "MatGetDiagonalBlock_MPIAIJ",
1529: MatGetDiagonalBlock_MPIAIJ);
1531: return(0);
1532: }
1533: EXTERN_C_END
1535: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1536: {
1537: Mat mat;
1538: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1539: int ierr;
1542: *newmat = 0;
1543: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1544: MatSetType(mat,MATMPIAIJ);
1545: a = (Mat_MPIAIJ*)mat->data;
1546: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1547: mat->factor = matin->factor;
1548: mat->assembled = PETSC_TRUE;
1549: mat->insertmode = NOT_SET_VALUES;
1550: mat->preallocated = PETSC_TRUE;
1552: a->rstart = oldmat->rstart;
1553: a->rend = oldmat->rend;
1554: a->cstart = oldmat->cstart;
1555: a->cend = oldmat->cend;
1556: a->size = oldmat->size;
1557: a->rank = oldmat->rank;
1558: a->donotstash = oldmat->donotstash;
1559: a->roworiented = oldmat->roworiented;
1560: a->rowindices = 0;
1561: a->rowvalues = 0;
1562: a->getrowactive = PETSC_FALSE;
1564: ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1565: ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);
1566: if (oldmat->colmap) {
1567: #if defined (PETSC_USE_CTABLE)
1568: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1569: #else
1570: PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1571: PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1572: ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1573: #endif
1574: } else a->colmap = 0;
1575: if (oldmat->garray) {
1576: int len;
1577: len = oldmat->B->n;
1578: PetscMalloc((len+1)*sizeof(int),&a->garray);
1579: PetscLogObjectMemory(mat,len*sizeof(int));
1580: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1581: } else a->garray = 0;
1582:
1583: VecDuplicate(oldmat->lvec,&a->lvec);
1584: PetscLogObjectParent(mat,a->lvec);
1585: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1586: PetscLogObjectParent(mat,a->Mvctx);
1587: MatDuplicate(oldmat->A,cpvalues,&a->A);
1588: PetscLogObjectParent(mat,a->A);
1589: MatDuplicate(oldmat->B,cpvalues,&a->B);
1590: PetscLogObjectParent(mat,a->B);
1591: PetscFListDuplicate(matin->qlist,&mat->qlist);
1592: *newmat = mat;
1593: return(0);
1594: }
1596: #include petscsys.h
1598: EXTERN_C_BEGIN
1599: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1600: {
1601: Mat A;
1602: PetscScalar *vals,*svals;
1603: MPI_Comm comm = ((PetscObject)viewer)->comm;
1604: MPI_Status status;
1605: int i,nz,ierr,j,rstart,rend,fd;
1606: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1607: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1608: int tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1611: MPI_Comm_size(comm,&size);
1612: MPI_Comm_rank(comm,&rank);
1613: if (!rank) {
1614: PetscViewerBinaryGetDescriptor(viewer,&fd);
1615: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1616: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1617: if (header[3] < 0) {
1618: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1619: }
1620: }
1622: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1623: M = header[1]; N = header[2];
1624: /* determine ownership of all rows */
1625: m = M/size + ((M % size) > rank);
1626: PetscMalloc((size+2)*sizeof(int),&rowners);
1627: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1628: rowners[0] = 0;
1629: for (i=2; i<=size; i++) {
1630: rowners[i] += rowners[i-1];
1631: }
1632: rstart = rowners[rank];
1633: rend = rowners[rank+1];
1635: /* distribute row lengths to all processors */
1636: ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1637: offlens = ourlens + (rend-rstart);
1638: if (!rank) {
1639: PetscMalloc(M*sizeof(int),&rowlengths);
1640: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1641: PetscMalloc(size*sizeof(int),&sndcounts);
1642: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1643: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1644: PetscFree(sndcounts);
1645: } else {
1646: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1647: }
1649: if (!rank) {
1650: /* calculate the number of nonzeros on each processor */
1651: PetscMalloc(size*sizeof(int),&procsnz);
1652: PetscMemzero(procsnz,size*sizeof(int));
1653: for (i=0; i<size; i++) {
1654: for (j=rowners[i]; j< rowners[i+1]; j++) {
1655: procsnz[i] += rowlengths[j];
1656: }
1657: }
1658: PetscFree(rowlengths);
1660: /* determine max buffer needed and allocate it */
1661: maxnz = 0;
1662: for (i=0; i<size; i++) {
1663: maxnz = PetscMax(maxnz,procsnz[i]);
1664: }
1665: PetscMalloc(maxnz*sizeof(int),&cols);
1667: /* read in my part of the matrix column indices */
1668: nz = procsnz[0];
1669: PetscMalloc(nz*sizeof(int),&mycols);
1670: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1672: /* read in every one elses and ship off */
1673: for (i=1; i<size; i++) {
1674: nz = procsnz[i];
1675: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1676: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1677: }
1678: PetscFree(cols);
1679: } else {
1680: /* determine buffer space needed for message */
1681: nz = 0;
1682: for (i=0; i<m; i++) {
1683: nz += ourlens[i];
1684: }
1685: PetscMalloc((nz+1)*sizeof(int),&mycols);
1687: /* receive message of column indices*/
1688: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1689: MPI_Get_count(&status,MPI_INT,&maxnz);
1690: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1691: }
1693: /* determine column ownership if matrix is not square */
1694: if (N != M) {
1695: n = N/size + ((N % size) > rank);
1696: ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
1697: cstart = cend - n;
1698: } else {
1699: cstart = rstart;
1700: cend = rend;
1701: n = cend - cstart;
1702: }
1704: /* loop over local rows, determining number of off diagonal entries */
1705: PetscMemzero(offlens,m*sizeof(int));
1706: jj = 0;
1707: for (i=0; i<m; i++) {
1708: for (j=0; j<ourlens[i]; j++) {
1709: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1710: jj++;
1711: }
1712: }
1714: /* create our matrix */
1715: for (i=0; i<m; i++) {
1716: ourlens[i] -= offlens[i];
1717: }
1718: MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);
1719: A = *newmat;
1720: MatSetOption(A,MAT_COLUMNS_SORTED);
1721: for (i=0; i<m; i++) {
1722: ourlens[i] += offlens[i];
1723: }
1725: if (!rank) {
1726: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1728: /* read in my part of the matrix numerical values */
1729: nz = procsnz[0];
1730: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1731:
1732: /* insert into matrix */
1733: jj = rstart;
1734: smycols = mycols;
1735: svals = vals;
1736: for (i=0; i<m; i++) {
1737: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1738: smycols += ourlens[i];
1739: svals += ourlens[i];
1740: jj++;
1741: }
1743: /* read in other processors and ship out */
1744: for (i=1; i<size; i++) {
1745: nz = procsnz[i];
1746: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1747: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1748: }
1749: PetscFree(procsnz);
1750: } else {
1751: /* receive numeric values */
1752: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
1754: /* receive message of values*/
1755: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1756: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1757: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1759: /* insert into matrix */
1760: jj = rstart;
1761: smycols = mycols;
1762: svals = vals;
1763: for (i=0; i<m; i++) {
1764: ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1765: smycols += ourlens[i];
1766: svals += ourlens[i];
1767: jj++;
1768: }
1769: }
1770: PetscFree(ourlens);
1771: PetscFree(vals);
1772: PetscFree(mycols);
1773: PetscFree(rowners);
1775: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1776: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1777: return(0);
1778: }
1779: EXTERN_C_END
1781: /*
1782: Not great since it makes two copies of the submatrix, first an SeqAIJ
1783: in local and then by concatenating the local matrices the end result.
1784: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1785: */
1786: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1787: {
1788: int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1789: int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1790: Mat *local,M,Mreuse;
1791: PetscScalar *vwork,*aa;
1792: MPI_Comm comm = mat->comm;
1793: Mat_SeqAIJ *aij;
1797: MPI_Comm_rank(comm,&rank);
1798: MPI_Comm_size(comm,&size);
1800: if (call == MAT_REUSE_MATRIX) {
1801: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
1802: if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1803: local = &Mreuse;
1804: ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
1805: } else {
1806: ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
1807: Mreuse = *local;
1808: ierr = PetscFree(local);
1809: }
1811: /*
1812: m - number of local rows
1813: n - number of columns (same on all processors)
1814: rstart - first row in new global matrix generated
1815: */
1816: MatGetSize(Mreuse,&m,&n);
1817: if (call == MAT_INITIAL_MATRIX) {
1818: aij = (Mat_SeqAIJ*)(Mreuse)->data;
1819: if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1820: ii = aij->i;
1821: jj = aij->j;
1823: /*
1824: Determine the number of non-zeros in the diagonal and off-diagonal
1825: portions of the matrix in order to do correct preallocation
1826: */
1828: /* first get start and end of "diagonal" columns */
1829: if (csize == PETSC_DECIDE) {
1830: nlocal = n/size + ((n % size) > rank);
1831: } else {
1832: nlocal = csize;
1833: }
1834: ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
1835: rstart = rend - nlocal;
1836: if (rank == size - 1 && rend != n) {
1837: SETERRQ(1,"Local column sizes do not add up to total number of columns");
1838: }
1840: /* next, compute all the lengths */
1841: ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);
1842: olens = dlens + m;
1843: for (i=0; i<m; i++) {
1844: jend = ii[i+1] - ii[i];
1845: olen = 0;
1846: dlen = 0;
1847: for (j=0; j<jend; j++) {
1848: if (*jj < rstart || *jj >= rend) olen++;
1849: else dlen++;
1850: jj++;
1851: }
1852: olens[i] = olen;
1853: dlens[i] = dlen;
1854: }
1855: MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
1856: PetscFree(dlens);
1857: } else {
1858: int ml,nl;
1860: M = *newmat;
1861: MatGetLocalSize(M,&ml,&nl);
1862: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1863: MatZeroEntries(M);
1864: /*
1865: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1866: rather than the slower MatSetValues().
1867: */
1868: M->was_assembled = PETSC_TRUE;
1869: M->assembled = PETSC_FALSE;
1870: }
1871: MatGetOwnershipRange(M,&rstart,&rend);
1872: aij = (Mat_SeqAIJ*)(Mreuse)->data;
1873: if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1874: ii = aij->i;
1875: jj = aij->j;
1876: aa = aij->a;
1877: for (i=0; i<m; i++) {
1878: row = rstart + i;
1879: nz = ii[i+1] - ii[i];
1880: cwork = jj; jj += nz;
1881: vwork = aa; aa += nz;
1882: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
1883: }
1885: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
1886: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
1887: *newmat = M;
1889: /* save submatrix used in processor for next request */
1890: if (call == MAT_INITIAL_MATRIX) {
1891: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
1892: PetscObjectDereference((PetscObject)Mreuse);
1893: }
1895: return(0);
1896: }
1898: /*@C
1899: MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1900: (the default parallel PETSc format). For good matrix assembly performance
1901: the user should preallocate the matrix storage by setting the parameters
1902: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
1903: performance can be increased by more than a factor of 50.
1905: Collective on MPI_Comm
1907: Input Parameters:
1908: + A - the matrix
1909: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
1910: (same value is used for all local rows)
1911: . d_nnz - array containing the number of nonzeros in the various rows of the
1912: DIAGONAL portion of the local submatrix (possibly different for each row)
1913: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1914: The size of this array is equal to the number of local rows, i.e 'm'.
1915: You must leave room for the diagonal entry even if it is zero.
1916: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
1917: submatrix (same value is used for all local rows).
1918: - o_nnz - array containing the number of nonzeros in the various rows of the
1919: OFF-DIAGONAL portion of the local submatrix (possibly different for
1920: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1921: structure. The size of this array is equal to the number
1922: of local rows, i.e 'm'.
1924: The AIJ format (also called the Yale sparse matrix format or
1925: compressed row storage), is fully compatible with standard Fortran 77
1926: storage. That is, the stored row and column indices can begin at
1927: either one (as in Fortran) or zero. See the users manual for details.
1929: The user MUST specify either the local or global matrix dimensions
1930: (possibly both).
1932: The parallel matrix is partitioned such that the first m0 rows belong to
1933: process 0, the next m1 rows belong to process 1, the next m2 rows belong
1934: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1936: The DIAGONAL portion of the local submatrix of a processor can be defined
1937: as the submatrix which is obtained by extraction the part corresponding
1938: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1939: first row that belongs to the processor, and r2 is the last row belonging
1940: to the this processor. This is a square mxm matrix. The remaining portion
1941: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1943: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1945: By default, this format uses inodes (identical nodes) when possible.
1946: We search for consecutive rows with the same nonzero structure, thereby
1947: reusing matrix information to achieve increased efficiency.
1949: Options Database Keys:
1950: + -mat_aij_no_inode - Do not use inodes
1951: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1952: - -mat_aij_oneindex - Internally use indexing starting at 1
1953: rather than 0. Note that when calling MatSetValues(),
1954: the user still MUST index entries starting at 0!
1956: Example usage:
1957:
1958: Consider the following 8x8 matrix with 34 non-zero values, that is
1959: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1960: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1961: as follows:
1963: .vb
1964: 1 2 0 | 0 3 0 | 0 4
1965: Proc0 0 5 6 | 7 0 0 | 8 0
1966: 9 0 10 | 11 0 0 | 12 0
1967: -------------------------------------
1968: 13 0 14 | 15 16 17 | 0 0
1969: Proc1 0 18 0 | 19 20 21 | 0 0
1970: 0 0 0 | 22 23 0 | 24 0
1971: -------------------------------------
1972: Proc2 25 26 27 | 0 0 28 | 29 0
1973: 30 0 0 | 31 32 33 | 0 34
1974: .ve
1976: This can be represented as a collection of submatrices as:
1978: .vb
1979: A B C
1980: D E F
1981: G H I
1982: .ve
1984: Where the submatrices A,B,C are owned by proc0, D,E,F are
1985: owned by proc1, G,H,I are owned by proc2.
1987: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1988: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1989: The 'M','N' parameters are 8,8, and have the same values on all procs.
1991: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1992: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1993: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1994: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1995: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1996: matrix, ans [DF] as another SeqAIJ matrix.
1998: When d_nz, o_nz parameters are specified, d_nz storage elements are
1999: allocated for every row of the local diagonal submatrix, and o_nz
2000: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2001: One way to choose d_nz and o_nz is to use the max nonzerors per local
2002: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2003: In this case, the values of d_nz,o_nz are:
2004: .vb
2005: proc0 : dnz = 2, o_nz = 2
2006: proc1 : dnz = 3, o_nz = 2
2007: proc2 : dnz = 1, o_nz = 4
2008: .ve
2009: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2010: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2011: for proc3. i.e we are using 12+15+10=37 storage locations to store
2012: 34 values.
2014: When d_nnz, o_nnz parameters are specified, the storage is specified
2015: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2016: In the above case the values for d_nnz,o_nnz are:
2017: .vb
2018: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2019: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2020: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2021: .ve
2022: Here the space allocated is sum of all the above values i.e 34, and
2023: hence pre-allocation is perfect.
2025: Level: intermediate
2027: .keywords: matrix, aij, compressed row, sparse, parallel
2029: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2030: @*/
2031: int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2032: {
2033: Mat_MPIAIJ *b;
2034: int ierr,i;
2035: PetscTruth flg2;
2038: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);
2039: if (!flg2) return(0);
2040: B->preallocated = PETSC_TRUE;
2041: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2042: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2043: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2044: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2045: if (d_nnz) {
2046: for (i=0; i<B->m; i++) {
2047: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2048: }
2049: }
2050: if (o_nnz) {
2051: for (i=0; i<B->m; i++) {
2052: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2053: }
2054: }
2055: b = (Mat_MPIAIJ*)B->data;
2057: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
2058: PetscLogObjectParent(B,b->A);
2059: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
2060: PetscLogObjectParent(B,b->B);
2062: return(0);
2063: }
2065: /*@C
2066: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2067: (the default parallel PETSc format). For good matrix assembly performance
2068: the user should preallocate the matrix storage by setting the parameters
2069: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2070: performance can be increased by more than a factor of 50.
2072: Collective on MPI_Comm
2074: Input Parameters:
2075: + comm - MPI communicator
2076: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2077: This value should be the same as the local size used in creating the
2078: y vector for the matrix-vector product y = Ax.
2079: . n - This value should be the same as the local size used in creating the
2080: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2081: calculated if N is given) For square matrices n is almost always m.
2082: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2083: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2084: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2085: (same value is used for all local rows)
2086: . d_nnz - array containing the number of nonzeros in the various rows of the
2087: DIAGONAL portion of the local submatrix (possibly different for each row)
2088: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2089: The size of this array is equal to the number of local rows, i.e 'm'.
2090: You must leave room for the diagonal entry even if it is zero.
2091: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2092: submatrix (same value is used for all local rows).
2093: - o_nnz - array containing the number of nonzeros in the various rows of the
2094: OFF-DIAGONAL portion of the local submatrix (possibly different for
2095: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2096: structure. The size of this array is equal to the number
2097: of local rows, i.e 'm'.
2099: Output Parameter:
2100: . A - the matrix
2102: Notes:
2103: m,n,M,N parameters specify the size of the matrix, and its partitioning across
2104: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2105: storage requirements for this matrix.
2107: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
2108: processor than it must be used on all processors that share the object for
2109: that argument.
2111: The AIJ format (also called the Yale sparse matrix format or
2112: compressed row storage), is fully compatible with standard Fortran 77
2113: storage. That is, the stored row and column indices can begin at
2114: either one (as in Fortran) or zero. See the users manual for details.
2116: The user MUST specify either the local or global matrix dimensions
2117: (possibly both).
2119: The parallel matrix is partitioned such that the first m0 rows belong to
2120: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2121: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2123: The DIAGONAL portion of the local submatrix of a processor can be defined
2124: as the submatrix which is obtained by extraction the part corresponding
2125: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2126: first row that belongs to the processor, and r2 is the last row belonging
2127: to the this processor. This is a square mxm matrix. The remaining portion
2128: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2130: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2132: By default, this format uses inodes (identical nodes) when possible.
2133: We search for consecutive rows with the same nonzero structure, thereby
2134: reusing matrix information to achieve increased efficiency.
2136: Options Database Keys:
2137: + -mat_aij_no_inode - Do not use inodes
2138: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2139: - -mat_aij_oneindex - Internally use indexing starting at 1
2140: rather than 0. Note that when calling MatSetValues(),
2141: the user still MUST index entries starting at 0!
2144: Example usage:
2145:
2146: Consider the following 8x8 matrix with 34 non-zero values, that is
2147: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2148: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2149: as follows:
2151: .vb
2152: 1 2 0 | 0 3 0 | 0 4
2153: Proc0 0 5 6 | 7 0 0 | 8 0
2154: 9 0 10 | 11 0 0 | 12 0
2155: -------------------------------------
2156: 13 0 14 | 15 16 17 | 0 0
2157: Proc1 0 18 0 | 19 20 21 | 0 0
2158: 0 0 0 | 22 23 0 | 24 0
2159: -------------------------------------
2160: Proc2 25 26 27 | 0 0 28 | 29 0
2161: 30 0 0 | 31 32 33 | 0 34
2162: .ve
2164: This can be represented as a collection of submatrices as:
2166: .vb
2167: A B C
2168: D E F
2169: G H I
2170: .ve
2172: Where the submatrices A,B,C are owned by proc0, D,E,F are
2173: owned by proc1, G,H,I are owned by proc2.
2175: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2176: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2177: The 'M','N' parameters are 8,8, and have the same values on all procs.
2179: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2180: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2181: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2182: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2183: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2184: matrix, ans [DF] as another SeqAIJ matrix.
2186: When d_nz, o_nz parameters are specified, d_nz storage elements are
2187: allocated for every row of the local diagonal submatrix, and o_nz
2188: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2189: One way to choose d_nz and o_nz is to use the max nonzerors per local
2190: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2191: In this case, the values of d_nz,o_nz are:
2192: .vb
2193: proc0 : dnz = 2, o_nz = 2
2194: proc1 : dnz = 3, o_nz = 2
2195: proc2 : dnz = 1, o_nz = 4
2196: .ve
2197: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2198: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2199: for proc3. i.e we are using 12+15+10=37 storage locations to store
2200: 34 values.
2202: When d_nnz, o_nnz parameters are specified, the storage is specified
2203: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2204: In the above case the values for d_nnz,o_nnz are:
2205: .vb
2206: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2207: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2208: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2209: .ve
2210: Here the space allocated is sum of all the above values i.e 34, and
2211: hence pre-allocation is perfect.
2213: Level: intermediate
2215: .keywords: matrix, aij, compressed row, sparse, parallel
2217: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2218: @*/
2219: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2220: {
2221: int ierr,size;
2224: MatCreate(comm,m,n,M,N,A);
2225: MPI_Comm_size(comm,&size);
2226: if (size > 1) {
2227: MatSetType(*A,MATMPIAIJ);
2228: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2229: } else {
2230: MatSetType(*A,MATSEQAIJ);
2231: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2232: }
2233: return(0);
2234: }
2236: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2237: {
2238: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2240: *Ad = a->A;
2241: *Ao = a->B;
2242: *colmap = a->garray;
2243: return(0);
2244: }
2246: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2247: {
2248: int ierr;
2249: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2252: if (coloring->ctype == IS_COLORING_LOCAL) {
2253: int *allcolors,*colors,i;
2254: ISColoring ocoloring;
2256: /* set coloring for diagonal portion */
2257: MatSetColoring_SeqAIJ(a->A,coloring);
2259: /* set coloring for off-diagonal portion */
2260: ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2261: PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2262: for (i=0; i<a->B->n; i++) {
2263: colors[i] = allcolors[a->garray[i]];
2264: }
2265: PetscFree(allcolors);
2266: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2267: MatSetColoring_SeqAIJ(a->B,ocoloring);
2268: ISColoringDestroy(ocoloring);
2269: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2270: int *colors,i,*larray;
2271: ISColoring ocoloring;
2273: /* set coloring for diagonal portion */
2274: PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2275: for (i=0; i<a->A->n; i++) {
2276: larray[i] = i + a->cstart;
2277: }
2278: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2279: PetscMalloc((a->A->n+1)*sizeof(int),&colors);
2280: for (i=0; i<a->A->n; i++) {
2281: colors[i] = coloring->colors[larray[i]];
2282: }
2283: PetscFree(larray);
2284: ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);
2285: MatSetColoring_SeqAIJ(a->A,ocoloring);
2286: ISColoringDestroy(ocoloring);
2288: /* set coloring for off-diagonal portion */
2289: PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2290: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2291: PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2292: for (i=0; i<a->B->n; i++) {
2293: colors[i] = coloring->colors[larray[i]];
2294: }
2295: PetscFree(larray);
2296: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2297: MatSetColoring_SeqAIJ(a->B,ocoloring);
2298: ISColoringDestroy(ocoloring);
2299: } else {
2300: SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2301: }
2303: return(0);
2304: }
2306: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2307: {
2308: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2309: int ierr;
2312: MatSetValuesAdic_SeqAIJ(a->A,advalues);
2313: MatSetValuesAdic_SeqAIJ(a->B,advalues);
2314: return(0);
2315: }
2317: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2318: {
2319: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2320: int ierr;
2323: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2324: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2325: return(0);
2326: }