Actual source code: mpiaij.c
1: /*$Id: mpiaij.c,v 1.334 2001/04/10 19:35:25 bsmith Exp $*/
3: #include "src/mat/impls/aij/mpi/mpiaij.h"
4: #include "src/vec/vecimpl.h"
5: #include "src/inline/spops.h"
7: EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8: EXTERN int DisAssemble_MPIAIJ(Mat);
9: EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
10: EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
11: EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
12: EXTERN int MatPrintHelp_SeqAIJ(Mat);
14: /*
15: Local utility routine that creates a mapping from the global column
16: number to the local number in the off-diagonal part of the local
17: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
18: a slightly higher hash table cost; without it it is not scalable (each processor
19: has an order N integer array but is fast to acess.
20: */
21: int CreateColmap_MPIAIJ_Private(Mat mat)
22: {
23: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
24: int n = aij->B->n,i,ierr;
27: #if defined (PETSC_USE_CTABLE)
28: PetscTableCreate(n,&aij->colmap);
29: for (i=0; i<n; i++){
30: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
31: }
32: #else
33: PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
34: PetscLogObjectMemory(mat,mat->N*sizeof(int));
35: PetscMemzero(aij->colmap,mat->N*sizeof(int));
36: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
37: #endif
38: return(0);
39: }
41: #define CHUNKSIZE 15
42: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv)
43: {
44:
45: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift;
46: rmax = aimax[row]; nrow = ailen[row];
47: col1 = col - shift;
48:
49: low = 0; high = nrow;
50: while (high-low > 5) {
51: t = (low+high)/2;
52: if (rp[t] > col) high = t;
53: else low = t;
54: }
55: for (_i=low; _i<high; _i++) {
56: if (rp[_i] > col1) break;
57: if (rp[_i] == col1) {
58: if (addv == ADD_VALUES) ap[_i] += value;
59: else ap[_i] = value;
60: goto a_noinsert;
61: }
62: }
63: if (nonew == 1) goto a_noinsert;
64: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
65: if (nrow >= rmax) {
66: /* there is no extra room in row, therefore enlarge */
67: int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j;
68: Scalar *new_a;
69:
70: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
71:
72: /* malloc new storage space */
73: len = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int);
74: ierr = PetscMalloc(len,&new_a);
75: new_j = (int*)(new_a + new_nz);
76: new_i = new_j + new_nz;
77:
78: /* copy over old data into new slots */
79: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
80: for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
81: PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));
82: len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift);
83: PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow,
84: len*sizeof(int));
85: PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));
86: PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow,
87: len*sizeof(Scalar));
88: /* free up old matrix storage */
89:
90: PetscFree(a->a);
91: if (!a->singlemalloc) {
92: PetscFree(a->i);
93: PetscFree(a->j);
94: }
95: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
96: a->singlemalloc = PETSC_TRUE;
97:
98: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift;
99: rmax = aimax[row] = aimax[row] + CHUNKSIZE;
100: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar)));
101: a->maxnz += CHUNKSIZE;
102: a->reallocs++;
103: }
104: N = nrow++ - 1; a->nz++;
105: /* shift up all the later entries in this row */
106: for (ii=N; ii>=_i; ii--) {
107: rp[ii+1] = rp[ii];
108: ap[ii+1] = ap[ii];
109: }
110: rp[_i] = col1;
111: ap[_i] = value;
112: a_noinsert: ;
113: ailen[row] = nrow;
114: }
116: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv)
117: {
118:
119: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift;
120: rmax = bimax[row]; nrow = bilen[row];
121: col1 = col - shift;
122:
123: low = 0; high = nrow;
124: while (high-low > 5) {
125: t = (low+high)/2;
126: if (rp[t] > col) high = t;
127: else low = t;
128: }
129: for (_i=low; _i<high; _i++) {
130: if (rp[_i] > col1) break;
131: if (rp[_i] == col1) {
132: if (addv == ADD_VALUES) ap[_i] += value;
133: else ap[_i] = value;
134: goto b_noinsert;
135: }
136: }
137: if (nonew == 1) goto b_noinsert;
138: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
139: if (nrow >= rmax) {
140: /* there is no extra room in row, therefore enlarge */
141: int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j;
142: Scalar *new_a;
143:
144: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
145:
146: /* malloc new storage space */
147: len = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int);
148: ierr = PetscMalloc(len,&new_a);
149: new_j = (int*)(new_a + new_nz);
150: new_i = new_j + new_nz;
151:
152: /* copy over old data into new slots */
153: for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];}
154: for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;}
155: PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));
156: len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift);
157: PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow,
158: len*sizeof(int));
159: PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));
160: PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow,
161: len*sizeof(Scalar));
162: /* free up old matrix storage */
163:
164: PetscFree(b->a);
165: if (!b->singlemalloc) {
166: PetscFree(b->i);
167: PetscFree(b->j);
168: }
169: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;
170: b->singlemalloc = PETSC_TRUE;
171:
172: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift;
173: rmax = bimax[row] = bimax[row] + CHUNKSIZE;
174: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar)));
175: b->maxnz += CHUNKSIZE;
176: b->reallocs++;
177: }
178: N = nrow++ - 1; b->nz++;
179: /* shift up all the later entries in this row */
180: for (ii=N; ii>=_i; ii--) {
181: rp[ii+1] = rp[ii];
182: ap[ii+1] = ap[ii];
183: }
184: rp[_i] = col1;
185: ap[_i] = value;
186: b_noinsert: ;
187: bilen[row] = nrow;
188: }
190: int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
191: {
192: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
193: Scalar value;
194: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
195: int cstart = aij->cstart,cend = aij->cend,row,col;
196: PetscTruth roworiented = aij->roworiented;
198: /* Some Variables required in the macro */
199: Mat A = aij->A;
200: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
201: int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
202: Scalar *aa = a->a;
203: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
204: Mat B = aij->B;
205: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
206: int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
207: Scalar *ba = b->a;
209: int *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
210: int nonew = a->nonew,shift = a->indexshift;
211: Scalar *ap;
214: for (i=0; i<m; i++) {
215: if (im[i] < 0) continue;
216: #if defined(PETSC_USE_BOPT_g)
217: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
218: #endif
219: if (im[i] >= rstart && im[i] < rend) {
220: row = im[i] - rstart;
221: for (j=0; j<n; j++) {
222: if (in[j] >= cstart && in[j] < cend){
223: col = in[j] - cstart;
224: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
225: if (ignorezeroentries && value == 0.0) continue;
226: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
227: /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
228: } else if (in[j] < 0) continue;
229: #if defined(PETSC_USE_BOPT_g)
230: else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
231: #endif
232: else {
233: if (mat->was_assembled) {
234: if (!aij->colmap) {
235: CreateColmap_MPIAIJ_Private(mat);
236: }
237: #if defined (PETSC_USE_CTABLE)
238: PetscTableFind(aij->colmap,in[j]+1,&col);
239: col--;
240: #else
241: col = aij->colmap[in[j]] - 1;
242: #endif
243: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
244: DisAssemble_MPIAIJ(mat);
245: col = in[j];
246: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
247: B = aij->B;
248: b = (Mat_SeqAIJ*)B->data;
249: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
250: ba = b->a;
251: }
252: } else col = in[j];
253: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254: if (ignorezeroentries && value == 0.0) continue;
255: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
256: /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
257: }
258: }
259: } else {
260: if (!aij->donotstash) {
261: if (roworiented) {
262: if (ignorezeroentries && v[i*n] == 0.0) continue;
263: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
264: } else {
265: if (ignorezeroentries && v[i] == 0.0) continue;
266: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
267: }
268: }
269: }
270: }
271: return(0);
272: }
274: int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
275: {
276: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
277: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
278: int cstart = aij->cstart,cend = aij->cend,row,col;
281: for (i=0; i<m; i++) {
282: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
283: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
284: if (idxm[i] >= rstart && idxm[i] < rend) {
285: row = idxm[i] - rstart;
286: for (j=0; j<n; j++) {
287: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
288: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
289: if (idxn[j] >= cstart && idxn[j] < cend){
290: col = idxn[j] - cstart;
291: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
292: } else {
293: if (!aij->colmap) {
294: CreateColmap_MPIAIJ_Private(mat);
295: }
296: #if defined (PETSC_USE_CTABLE)
297: PetscTableFind(aij->colmap,idxn[j]+1,&col);
298: col --;
299: #else
300: col = aij->colmap[idxn[j]] - 1;
301: #endif
302: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303: else {
304: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
305: }
306: }
307: }
308: } else {
309: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310: }
311: }
312: return(0);
313: }
315: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316: {
317: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
318: int ierr,nstash,reallocs;
319: InsertMode addv;
322: if (aij->donotstash) {
323: return(0);
324: }
326: /* make sure all processors are either in INSERTMODE or ADDMODE */
327: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
328: if (addv == (ADD_VALUES|INSERT_VALUES)) {
329: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
330: }
331: mat->insertmode = addv; /* in case this processor had no cache */
333: MatStashScatterBegin_Private(&mat->stash,aij->rowners);
334: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
335: PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
336: return(0);
337: }
340: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
341: {
342: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
343: int i,j,rstart,ncols,n,ierr,flg;
344: int *row,*col,other_disassembled;
345: Scalar *val;
346: InsertMode addv = mat->insertmode;
349: if (!aij->donotstash) {
350: while (1) {
351: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
352: if (!flg) break;
354: for (i=0; i<n;) {
355: /* Now identify the consecutive vals belonging to the same row */
356: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
357: if (j < n) ncols = j-i;
358: else ncols = n-i;
359: /* Now assemble all these values with a single function call */
360: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
361: i = j;
362: }
363: }
364: MatStashScatterEnd_Private(&mat->stash);
365: }
366:
367: MatAssemblyBegin(aij->A,mode);
368: MatAssemblyEnd(aij->A,mode);
370: /* determine if any processor has disassembled, if so we must
371: also disassemble ourselfs, in order that we may reassemble. */
372: /*
373: if nonzero structure of submatrix B cannot change then we know that
374: no processor disassembled thus we can skip this stuff
375: */
376: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
377: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
378: if (mat->was_assembled && !other_disassembled) {
379: DisAssemble_MPIAIJ(mat);
380: }
381: }
383: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
384: MatSetUpMultiply_MPIAIJ(mat);
385: }
386: MatAssemblyBegin(aij->B,mode);
387: MatAssemblyEnd(aij->B,mode);
389: if (aij->rowvalues) {
390: PetscFree(aij->rowvalues);
391: aij->rowvalues = 0;
392: }
393: return(0);
394: }
396: int MatZeroEntries_MPIAIJ(Mat A)
397: {
398: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
399: int ierr;
402: MatZeroEntries(l->A);
403: MatZeroEntries(l->B);
404: return(0);
405: }
407: int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
408: {
409: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
410: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
411: int *procs,*nprocs,j,idx,nsends,*work,row;
412: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
413: int *rvalues,tag = A->tag,count,base,slen,n,*source;
414: int *lens,imdex,*lrows,*values,rstart=l->rstart;
415: MPI_Comm comm = A->comm;
416: MPI_Request *send_waits,*recv_waits;
417: MPI_Status recv_status,*send_status;
418: IS istmp;
419: PetscTruth found;
422: ISGetLocalSize(is,&N);
423: ISGetIndices(is,&rows);
425: /* first count number of contributors to each processor */
426: PetscMalloc(2*size*sizeof(int),&nprocs);
427: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
428: procs = nprocs + size;
429: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
430: for (i=0; i<N; i++) {
431: idx = rows[i];
432: found = PETSC_FALSE;
433: for (j=0; j<size; j++) {
434: if (idx >= owners[j] && idx < owners[j+1]) {
435: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
436: }
437: }
438: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
439: }
440: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
442: /* inform other processors of number of messages and max length*/
443: PetscMalloc(2*size*sizeof(int),&work);
444: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
445: nrecvs = work[size+rank];
446: nmax = work[rank];
447: ierr = PetscFree(work);
449: /* post receives: */
450: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
451: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
452: for (i=0; i<nrecvs; i++) {
453: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
454: }
456: /* do sends:
457: 1) starts[i] gives the starting index in svalues for stuff going to
458: the ith processor
459: */
460: PetscMalloc((N+1)*sizeof(int),&svalues);
461: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
462: PetscMalloc((size+1)*sizeof(int),&starts);
463: starts[0] = 0;
464: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
465: for (i=0; i<N; i++) {
466: svalues[starts[owner[i]]++] = rows[i];
467: }
468: ISRestoreIndices(is,&rows);
470: starts[0] = 0;
471: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
472: count = 0;
473: for (i=0; i<size; i++) {
474: if (procs[i]) {
475: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
476: }
477: }
478: PetscFree(starts);
480: base = owners[rank];
482: /* wait on receives */
483: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
484: source = lens + nrecvs;
485: count = nrecvs; slen = 0;
486: while (count) {
487: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
488: /* unpack receives into our local space */
489: MPI_Get_count(&recv_status,MPI_INT,&n);
490: source[imdex] = recv_status.MPI_SOURCE;
491: lens[imdex] = n;
492: slen += n;
493: count--;
494: }
495: PetscFree(recv_waits);
496:
497: /* move the data into the send scatter */
498: PetscMalloc((slen+1)*sizeof(int),&lrows);
499: count = 0;
500: for (i=0; i<nrecvs; i++) {
501: values = rvalues + i*nmax;
502: for (j=0; j<lens[i]; j++) {
503: lrows[count++] = values[j] - base;
504: }
505: }
506: PetscFree(rvalues);
507: PetscFree(lens);
508: PetscFree(owner);
509: PetscFree(nprocs);
510:
511: /* actually zap the local rows */
512: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
513: PetscLogObjectParent(A,istmp);
515: /*
516: Zero the required rows. If the "diagonal block" of the matrix
517: is square and the user wishes to set the diagonal we use seperate
518: code so that MatSetValues() is not called for each diagonal allocating
519: new memory, thus calling lots of mallocs and slowing things down.
521: Contributed by: Mathew Knepley
522: */
523: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
524: MatZeroRows(l->B,istmp,0);
525: if (diag && (l->A->M == l->A->N)) {
526: ierr = MatZeroRows(l->A,istmp,diag);
527: } else if (diag) {
528: MatZeroRows(l->A,istmp,0);
529: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
530: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat optionsn
531: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
532: }
533: for (i = 0; i < slen; i++) {
534: row = lrows[i] + rstart;
535: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
536: }
537: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
538: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
539: } else {
540: MatZeroRows(l->A,istmp,0);
541: }
542: ISDestroy(istmp);
543: PetscFree(lrows);
545: /* wait on sends */
546: if (nsends) {
547: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
548: MPI_Waitall(nsends,send_waits,send_status);
549: PetscFree(send_status);
550: }
551: PetscFree(send_waits);
552: PetscFree(svalues);
554: return(0);
555: }
557: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
558: {
559: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
560: int ierr,nt;
563: VecGetLocalSize(xx,&nt);
564: if (nt != A->n) {
565: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
566: }
567: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
568: (*a->A->ops->mult)(a->A,xx,yy);
569: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
570: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
571: return(0);
572: }
574: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
575: {
576: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
577: int ierr;
580: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
581: (*a->A->ops->multadd)(a->A,xx,yy,zz);
582: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
583: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
584: return(0);
585: }
587: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
588: {
589: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
590: int ierr;
593: /* do nondiagonal part */
594: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
595: /* send it on its way */
596: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
597: /* do local part */
598: (*a->A->ops->multtranspose)(a->A,xx,yy);
599: /* receive remote parts: note this assumes the values are not actually */
600: /* inserted in yy until the next line, which is true for my implementation*/
601: /* but is not perhaps always true. */
602: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
603: return(0);
604: }
606: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
607: {
608: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609: int ierr;
612: /* do nondiagonal part */
613: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
614: /* send it on its way */
615: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
616: /* do local part */
617: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
618: /* receive remote parts: note this assumes the values are not actually */
619: /* inserted in yy until the next line, which is true for my implementation*/
620: /* but is not perhaps always true. */
621: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
622: return(0);
623: }
625: /*
626: This only works correctly for square matrices where the subblock A->A is the
627: diagonal block
628: */
629: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
630: {
631: int ierr;
632: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
635: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
636: if (a->rstart != a->cstart || a->rend != a->cend) {
637: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
638: }
639: MatGetDiagonal(a->A,v);
640: return(0);
641: }
643: int MatScale_MPIAIJ(Scalar *aa,Mat A)
644: {
645: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
646: int ierr;
649: MatScale(aa,a->A);
650: MatScale(aa,a->B);
651: return(0);
652: }
654: int MatDestroy_MPIAIJ(Mat mat)
655: {
656: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
657: int ierr;
660: #if defined(PETSC_USE_LOG)
661: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
662: #endif
663: MatStashDestroy_Private(&mat->stash);
664: PetscFree(aij->rowners);
665: MatDestroy(aij->A);
666: MatDestroy(aij->B);
667: #if defined (PETSC_USE_CTABLE)
668: if (aij->colmap) {PetscTableDelete(aij->colmap);}
669: #else
670: if (aij->colmap) {PetscFree(aij->colmap);}
671: #endif
672: if (aij->garray) {PetscFree(aij->garray);}
673: if (aij->lvec) {VecDestroy(aij->lvec);}
674: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
675: if (aij->rowvalues) {PetscFree(aij->rowvalues);}
676: PetscFree(aij);
677: return(0);
678: }
680: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
681: {
682: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
683: Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
684: int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
685: PetscTruth isdraw,isascii,flg;
686: PetscViewer sviewer;
687: PetscViewerFormat format;
690: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
691: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
692: if (isascii) {
693: PetscViewerGetFormat(viewer,&format);
694: if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
695: MatInfo info;
696: MPI_Comm_rank(mat->comm,&rank);
697: MatGetInfo(mat,MAT_LOCAL,&info);
698: PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
699: if (flg) {
700: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routinesn",
701: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
702: } else {
703: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routinesn",
704: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
705: }
706: MatGetInfo(aij->A,MAT_LOCAL,&info);
707: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used);
708: MatGetInfo(aij->B,MAT_LOCAL,&info);
709: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used);
710: PetscViewerFlush(viewer);
711: VecScatterView(aij->Mvctx,viewer);
712: return(0);
713: } else if (format == PETSC_VIEWER_ASCII_INFO) {
714: return(0);
715: }
716: } else if (isdraw) {
717: PetscDraw draw;
718: PetscTruth isnull;
719: PetscViewerDrawGetDraw(viewer,0,&draw);
720: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
721: }
723: if (size == 1) {
724: MatView(aij->A,viewer);
725: } else {
726: /* assemble the entire matrix onto first processor. */
727: Mat A;
728: Mat_SeqAIJ *Aloc;
729: int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
730: Scalar *a;
732: if (!rank) {
733: MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
734: } else {
735: MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
736: }
737: PetscLogObjectParent(mat,A);
739: /* copy over the A part */
740: Aloc = (Mat_SeqAIJ*)aij->A->data;
741: m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
742: row = aij->rstart;
743: for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
744: for (i=0; i<m; i++) {
745: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
746: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
747: }
748: aj = Aloc->j;
749: for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
751: /* copy over the B part */
752: Aloc = (Mat_SeqAIJ*)aij->B->data;
753: m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
754: row = aij->rstart;
755: PetscMalloc((ai[m]+1)*sizeof(int),&cols);
756: ct = cols;
757: for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
758: for (i=0; i<m; i++) {
759: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
760: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
761: }
762: PetscFree(ct);
763: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
764: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
765: /*
766: Everyone has to call to draw the matrix since the graphics waits are
767: synchronized across all processors that share the PetscDraw object
768: */
769: PetscViewerGetSingleton(viewer,&sviewer);
770: if (!rank) {
771: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
772: }
773: PetscViewerRestoreSingleton(viewer,&sviewer);
774: MatDestroy(A);
775: }
776: return(0);
777: }
779: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
780: {
781: int ierr;
782: PetscTruth isascii,isdraw,issocket,isbinary;
783:
785: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
786: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
787: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
788: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
789: if (isascii || isdraw || isbinary || issocket) {
790: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
791: } else {
792: SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
793: }
794: return(0);
795: }
797: /*
798: This has to provide several versions.
800: 2) a) use only local smoothing updating outer values only once.
801: b) local smoothing updating outer values each inner iteration
802: 3) color updating out values betwen colors.
803: */
804: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
805: {
806: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
807: Mat AA = mat->A,BB = mat->B;
808: Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data;
809: Scalar *b,*x,*xs,*ls,d,*v,sum;
810: int ierr,*idx,*diag;
811: int n = matin->n,m = matin->m,i,shift = A->indexshift;
814: if (!A->diag) {MatMarkDiagonal_SeqAIJ(AA);}
815: diag = A->diag;
816: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
817: if (flag & SOR_ZERO_INITIAL_GUESS) {
818: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);
819: return(0);
820: }
821: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
822: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
823: VecGetArray(xx,&x);
824: if (xx != bb) {
825: VecGetArray(bb,&b);
826: } else {
827: b = x;
828: }
829: VecGetArray(mat->lvec,&ls);
830: xs = x + shift; /* shift by one for index start of 1 */
831: ls = ls + shift;
832: while (its--) {
833: /* go down through the rows */
834: for (i=0; i<m; i++) {
835: n = A->i[i+1] - A->i[i];
836: PetscLogFlops(4*n+3);
837: idx = A->j + A->i[i] + shift;
838: v = A->a + A->i[i] + shift;
839: sum = b[i];
840: SPARSEDENSEMDOT(sum,xs,v,idx,n);
841: d = fshift + A->a[diag[i]+shift];
842: n = B->i[i+1] - B->i[i];
843: idx = B->j + B->i[i] + shift;
844: v = B->a + B->i[i] + shift;
845: SPARSEDENSEMDOT(sum,ls,v,idx,n);
846: x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
847: }
848: /* come up through the rows */
849: for (i=m-1; i>-1; i--) {
850: n = A->i[i+1] - A->i[i];
851: PetscLogFlops(4*n+3);
852: idx = A->j + A->i[i] + shift;
853: v = A->a + A->i[i] + shift;
854: sum = b[i];
855: SPARSEDENSEMDOT(sum,xs,v,idx,n);
856: d = fshift + A->a[diag[i]+shift];
857: n = B->i[i+1] - B->i[i];
858: idx = B->j + B->i[i] + shift;
859: v = B->a + B->i[i] + shift;
860: SPARSEDENSEMDOT(sum,ls,v,idx,n);
861: x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
862: }
863: }
864: VecRestoreArray(xx,&x);
865: if (bb != xx) {VecRestoreArray(bb,&b); }
866: VecRestoreArray(mat->lvec,&ls);
867: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
868: if (flag & SOR_ZERO_INITIAL_GUESS) {
869: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);
870: return(0);
871: }
872: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
873: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
874: VecGetArray(xx,&x);
875: if (xx != bb) {
876: VecGetArray(bb,&b);
877: } else {
878: b = x;
879: }
880: VecGetArray(mat->lvec,&ls);
881: xs = x + shift; /* shift by one for index start of 1 */
882: ls = ls + shift;
883: while (its--) {
884: for (i=0; i<m; i++) {
885: n = A->i[i+1] - A->i[i];
886: PetscLogFlops(4*n+3);
887: idx = A->j + A->i[i] + shift;
888: v = A->a + A->i[i] + shift;
889: sum = b[i];
890: SPARSEDENSEMDOT(sum,xs,v,idx,n);
891: d = fshift + A->a[diag[i]+shift];
892: n = B->i[i+1] - B->i[i];
893: idx = B->j + B->i[i] + shift;
894: v = B->a + B->i[i] + shift;
895: SPARSEDENSEMDOT(sum,ls,v,idx,n);
896: x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
897: }
898: }
899: VecRestoreArray(xx,&x);
900: if (bb != xx) {VecRestoreArray(bb,&b); }
901: VecRestoreArray(mat->lvec,&ls);
902: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
903: if (flag & SOR_ZERO_INITIAL_GUESS) {
904: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);
905: return(0);
906: }
907: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
908: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
909: VecGetArray(xx,&x);
910: if (xx != bb) {
911: VecGetArray(bb,&b);
912: } else {
913: b = x;
914: }
915: VecGetArray(mat->lvec,&ls);
916: xs = x + shift; /* shift by one for index start of 1 */
917: ls = ls + shift;
918: while (its--) {
919: for (i=m-1; i>-1; i--) {
920: n = A->i[i+1] - A->i[i];
921: PetscLogFlops(4*n+3);
922: idx = A->j + A->i[i] + shift;
923: v = A->a + A->i[i] + shift;
924: sum = b[i];
925: SPARSEDENSEMDOT(sum,xs,v,idx,n);
926: d = fshift + A->a[diag[i]+shift];
927: n = B->i[i+1] - B->i[i];
928: idx = B->j + B->i[i] + shift;
929: v = B->a + B->i[i] + shift;
930: SPARSEDENSEMDOT(sum,ls,v,idx,n);
931: x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
932: }
933: }
934: VecRestoreArray(xx,&x);
935: if (bb != xx) {VecRestoreArray(bb,&b); }
936: VecRestoreArray(mat->lvec,&ls);
937: } else {
938: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
939: }
940: return(0);
941: }
943: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
944: {
945: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
946: Mat A = mat->A,B = mat->B;
947: int ierr;
948: PetscReal isend[5],irecv[5];
951: info->block_size = 1.0;
952: MatGetInfo(A,MAT_LOCAL,info);
953: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
954: isend[3] = info->memory; isend[4] = info->mallocs;
955: MatGetInfo(B,MAT_LOCAL,info);
956: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
957: isend[3] += info->memory; isend[4] += info->mallocs;
958: if (flag == MAT_LOCAL) {
959: info->nz_used = isend[0];
960: info->nz_allocated = isend[1];
961: info->nz_unneeded = isend[2];
962: info->memory = isend[3];
963: info->mallocs = isend[4];
964: } else if (flag == MAT_GLOBAL_MAX) {
965: MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
966: info->nz_used = irecv[0];
967: info->nz_allocated = irecv[1];
968: info->nz_unneeded = irecv[2];
969: info->memory = irecv[3];
970: info->mallocs = irecv[4];
971: } else if (flag == MAT_GLOBAL_SUM) {
972: MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
973: info->nz_used = irecv[0];
974: info->nz_allocated = irecv[1];
975: info->nz_unneeded = irecv[2];
976: info->memory = irecv[3];
977: info->mallocs = irecv[4];
978: }
979: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
980: info->fill_ratio_needed = 0;
981: info->factor_mallocs = 0;
982: info->rows_global = (double)matin->M;
983: info->columns_global = (double)matin->N;
984: info->rows_local = (double)matin->m;
985: info->columns_local = (double)matin->N;
987: return(0);
988: }
990: int MatSetOption_MPIAIJ(Mat A,MatOption op)
991: {
992: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
993: int ierr;
996: if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
997: op == MAT_YES_NEW_NONZERO_LOCATIONS ||
998: op == MAT_COLUMNS_UNSORTED ||
999: op == MAT_COLUMNS_SORTED ||
1000: op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1001: op == MAT_KEEP_ZEROED_ROWS ||
1002: op == MAT_NEW_NONZERO_LOCATION_ERR ||
1003: op == MAT_USE_INODES ||
1004: op == MAT_DO_NOT_USE_INODES ||
1005: op == MAT_IGNORE_ZERO_ENTRIES) {
1006: MatSetOption(a->A,op);
1007: MatSetOption(a->B,op);
1008: } else if (op == MAT_ROW_ORIENTED) {
1009: a->roworiented = PETSC_TRUE;
1010: MatSetOption(a->A,op);
1011: MatSetOption(a->B,op);
1012: } else if (op == MAT_ROWS_SORTED ||
1013: op == MAT_ROWS_UNSORTED ||
1014: op == MAT_YES_NEW_DIAGONALS) {
1015: PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignoredn");
1016: } else if (op == MAT_COLUMN_ORIENTED) {
1017: a->roworiented = PETSC_FALSE;
1018: MatSetOption(a->A,op);
1019: MatSetOption(a->B,op);
1020: } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1021: a->donotstash = PETSC_TRUE;
1022: } else if (op == MAT_NO_NEW_DIAGONALS){
1023: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1024: } else {
1025: SETERRQ(PETSC_ERR_SUP,"unknown option");
1026: }
1027: return(0);
1028: }
1030: int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1031: {
1032: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1035: if (m) *m = mat->rstart;
1036: if (n) *n = mat->rend;
1037: return(0);
1038: }
1040: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1041: {
1042: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1043: Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1044: int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1045: int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1046: int *cmap,*idx_p;
1049: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1050: mat->getrowactive = PETSC_TRUE;
1052: if (!mat->rowvalues && (idx || v)) {
1053: /*
1054: allocate enough space to hold information from the longest row.
1055: */
1056: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1057: int max = 1,tmp;
1058: for (i=0; i<matin->m; i++) {
1059: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1060: if (max < tmp) { max = tmp; }
1061: }
1062: PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);
1063: mat->rowindices = (int*)(mat->rowvalues + max);
1064: }
1066: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1067: lrow = row - rstart;
1069: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1070: if (!v) {pvA = 0; pvB = 0;}
1071: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1072: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1073: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1074: nztot = nzA + nzB;
1076: cmap = mat->garray;
1077: if (v || idx) {
1078: if (nztot) {
1079: /* Sort by increasing column numbers, assuming A and B already sorted */
1080: int imark = -1;
1081: if (v) {
1082: *v = v_p = mat->rowvalues;
1083: for (i=0; i<nzB; i++) {
1084: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1085: else break;
1086: }
1087: imark = i;
1088: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1089: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1090: }
1091: if (idx) {
1092: *idx = idx_p = mat->rowindices;
1093: if (imark > -1) {
1094: for (i=0; i<imark; i++) {
1095: idx_p[i] = cmap[cworkB[i]];
1096: }
1097: } else {
1098: for (i=0; i<nzB; i++) {
1099: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1100: else break;
1101: }
1102: imark = i;
1103: }
1104: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1105: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1106: }
1107: } else {
1108: if (idx) *idx = 0;
1109: if (v) *v = 0;
1110: }
1111: }
1112: *nz = nztot;
1113: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1114: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1115: return(0);
1116: }
1118: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1119: {
1120: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1123: if (aij->getrowactive == PETSC_FALSE) {
1124: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1125: }
1126: aij->getrowactive = PETSC_FALSE;
1127: return(0);
1128: }
1130: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1131: {
1132: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1133: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1134: int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1135: PetscReal sum = 0.0;
1136: Scalar *v;
1139: if (aij->size == 1) {
1140: MatNorm(aij->A,type,norm);
1141: } else {
1142: if (type == NORM_FROBENIUS) {
1143: v = amat->a;
1144: for (i=0; i<amat->nz; i++) {
1145: #if defined(PETSC_USE_COMPLEX)
1146: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1147: #else
1148: sum += (*v)*(*v); v++;
1149: #endif
1150: }
1151: v = bmat->a;
1152: for (i=0; i<bmat->nz; i++) {
1153: #if defined(PETSC_USE_COMPLEX)
1154: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1155: #else
1156: sum += (*v)*(*v); v++;
1157: #endif
1158: }
1159: MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1160: *norm = sqrt(*norm);
1161: } else if (type == NORM_1) { /* max column norm */
1162: PetscReal *tmp,*tmp2;
1163: int *jj,*garray = aij->garray;
1164: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1165: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1166: PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1167: *norm = 0.0;
1168: v = amat->a; jj = amat->j;
1169: for (j=0; j<amat->nz; j++) {
1170: tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++;
1171: }
1172: v = bmat->a; jj = bmat->j;
1173: for (j=0; j<bmat->nz; j++) {
1174: tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1175: }
1176: MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1177: for (j=0; j<mat->N; j++) {
1178: if (tmp2[j] > *norm) *norm = tmp2[j];
1179: }
1180: PetscFree(tmp);
1181: PetscFree(tmp2);
1182: } else if (type == NORM_INFINITY) { /* max row norm */
1183: PetscReal ntemp = 0.0;
1184: for (j=0; j<aij->A->m; j++) {
1185: v = amat->a + amat->i[j] + shift;
1186: sum = 0.0;
1187: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1188: sum += PetscAbsScalar(*v); v++;
1189: }
1190: v = bmat->a + bmat->i[j] + shift;
1191: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1192: sum += PetscAbsScalar(*v); v++;
1193: }
1194: if (sum > ntemp) ntemp = sum;
1195: }
1196: MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1197: } else {
1198: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1199: }
1200: }
1201: return(0);
1202: }
1204: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1205: {
1206: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1207: Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1208: int ierr,shift = Aloc->indexshift;
1209: int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1210: Mat B;
1211: Scalar *array;
1214: if (!matout && M != N) {
1215: SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1216: }
1218: MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1220: /* copy over the A part */
1221: Aloc = (Mat_SeqAIJ*)a->A->data;
1222: m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1223: row = a->rstart;
1224: for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1225: for (i=0; i<m; i++) {
1226: MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1227: row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1228: }
1229: aj = Aloc->j;
1230: for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1232: /* copy over the B part */
1233: Aloc = (Mat_SeqAIJ*)a->B->data;
1234: m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1235: row = a->rstart;
1236: PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);
1237: ct = cols;
1238: for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1239: for (i=0; i<m; i++) {
1240: MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1241: row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1242: }
1243: PetscFree(ct);
1244: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1245: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1246: if (matout) {
1247: *matout = B;
1248: } else {
1249: MatHeaderCopy(A,B);
1250: }
1251: return(0);
1252: }
1254: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1255: {
1256: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1257: Mat a = aij->A,b = aij->B;
1258: int ierr,s1,s2,s3;
1261: MatGetLocalSize(mat,&s2,&s3);
1262: if (rr) {
1263: VecGetLocalSize(rr,&s1);
1264: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1265: /* Overlap communication with computation. */
1266: VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1267: }
1268: if (ll) {
1269: VecGetLocalSize(ll,&s1);
1270: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1271: (*b->ops->diagonalscale)(b,ll,0);
1272: }
1273: /* scale the diagonal block */
1274: (*a->ops->diagonalscale)(a,ll,rr);
1276: if (rr) {
1277: /* Do a scatter end and then right scale the off-diagonal block */
1278: VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1279: (*b->ops->diagonalscale)(b,0,aij->lvec);
1280: }
1281:
1282: return(0);
1283: }
1286: int MatPrintHelp_MPIAIJ(Mat A)
1287: {
1288: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1289: int ierr;
1292: if (!a->rank) {
1293: MatPrintHelp_SeqAIJ(a->A);
1294: }
1295: return(0);
1296: }
1298: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1299: {
1301: *bs = 1;
1302: return(0);
1303: }
1304: int MatSetUnfactored_MPIAIJ(Mat A)
1305: {
1306: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1307: int ierr;
1310: MatSetUnfactored(a->A);
1311: return(0);
1312: }
1314: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1315: {
1316: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1317: Mat a,b,c,d;
1318: PetscTruth flg;
1319: int ierr;
1322: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1323: if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1324: a = matA->A; b = matA->B;
1325: c = matB->A; d = matB->B;
1327: MatEqual(a,c,&flg);
1328: if (flg == PETSC_TRUE) {
1329: MatEqual(b,d,&flg);
1330: }
1331: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1332: return(0);
1333: }
1335: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1336: {
1337: int ierr;
1338: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1339: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1340: PetscTruth flg;
1343: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1344: if (str != SAME_NONZERO_PATTERN || !flg) {
1345: /* because of the column compression in the off-processor part of the matrix a->B,
1346: the number of columns in a->B and b->B may be different, hence we cannot call
1347: the MatCopy() directly on the two parts. If need be, we can provide a more
1348: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1349: then copying the submatrices */
1350: MatCopy_Basic(A,B,str);
1351: } else {
1352: MatCopy(a->A,b->A,str);
1353: MatCopy(a->B,b->B,str);
1354: }
1355: return(0);
1356: }
1358: int MatSetUpPreallocation_MPIAIJ(Mat A)
1359: {
1360: int ierr;
1363: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1364: return(0);
1365: }
1367: EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1368: EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1369: EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1370: EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1371: EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1372: #if !defined(PETSC_USE_COMPLEX)
1373: EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1374: #endif
1376: /* -------------------------------------------------------------------*/
1377: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1378: MatGetRow_MPIAIJ,
1379: MatRestoreRow_MPIAIJ,
1380: MatMult_MPIAIJ,
1381: MatMultAdd_MPIAIJ,
1382: MatMultTranspose_MPIAIJ,
1383: MatMultTransposeAdd_MPIAIJ,
1384: 0,
1385: 0,
1386: 0,
1387: 0,
1388: 0,
1389: 0,
1390: MatRelax_MPIAIJ,
1391: MatTranspose_MPIAIJ,
1392: MatGetInfo_MPIAIJ,
1393: MatEqual_MPIAIJ,
1394: MatGetDiagonal_MPIAIJ,
1395: MatDiagonalScale_MPIAIJ,
1396: MatNorm_MPIAIJ,
1397: MatAssemblyBegin_MPIAIJ,
1398: MatAssemblyEnd_MPIAIJ,
1399: 0,
1400: MatSetOption_MPIAIJ,
1401: MatZeroEntries_MPIAIJ,
1402: MatZeroRows_MPIAIJ,
1403: #if !defined(PETSC_USE_COMPLEX)
1404: MatLUFactorSymbolic_MPIAIJ_TFS,
1405: #else
1406: 0,
1407: #endif
1408: 0,
1409: 0,
1410: 0,
1411: MatSetUpPreallocation_MPIAIJ,
1412: 0,
1413: MatGetOwnershipRange_MPIAIJ,
1414: 0,
1415: 0,
1416: 0,
1417: 0,
1418: MatDuplicate_MPIAIJ,
1419: 0,
1420: 0,
1421: 0,
1422: 0,
1423: 0,
1424: MatGetSubMatrices_MPIAIJ,
1425: MatIncreaseOverlap_MPIAIJ,
1426: MatGetValues_MPIAIJ,
1427: MatCopy_MPIAIJ,
1428: MatPrintHelp_MPIAIJ,
1429: MatScale_MPIAIJ,
1430: 0,
1431: 0,
1432: 0,
1433: MatGetBlockSize_MPIAIJ,
1434: 0,
1435: 0,
1436: 0,
1437: 0,
1438: MatFDColoringCreate_MPIAIJ,
1439: 0,
1440: MatSetUnfactored_MPIAIJ,
1441: 0,
1442: 0,
1443: MatGetSubMatrix_MPIAIJ,
1444: MatDestroy_MPIAIJ,
1445: MatView_MPIAIJ,
1446: MatGetMaps_Petsc};
1448: /* ----------------------------------------------------------------------------------------*/
1450: EXTERN_C_BEGIN
1451: int MatStoreValues_MPIAIJ(Mat mat)
1452: {
1453: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1454: int ierr;
1457: MatStoreValues(aij->A);
1458: MatStoreValues(aij->B);
1459: return(0);
1460: }
1461: EXTERN_C_END
1463: EXTERN_C_BEGIN
1464: int MatRetrieveValues_MPIAIJ(Mat mat)
1465: {
1466: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1467: int ierr;
1470: MatRetrieveValues(aij->A);
1471: MatRetrieveValues(aij->B);
1472: return(0);
1473: }
1474: EXTERN_C_END
1476: #include "petscpc.h"
1477: EXTERN_C_BEGIN
1478: EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1479: EXTERN_C_END
1481: EXTERN_C_BEGIN
1482: int MatCreate_MPIAIJ(Mat B)
1483: {
1484: Mat_MPIAIJ *b;
1485: int ierr,i,size;
1488: MPI_Comm_size(B->comm,&size);
1490: ierr = PetscNew(Mat_MPIAIJ,&b);
1491: B->data = (void*)b;
1492: ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));
1493: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1494: B->factor = 0;
1495: B->assembled = PETSC_FALSE;
1496: B->mapping = 0;
1498: B->insertmode = NOT_SET_VALUES;
1499: b->size = size;
1500: MPI_Comm_rank(B->comm,&b->rank);
1502: PetscSplitOwnership(B->comm,&B->m,&B->M);
1503: PetscSplitOwnership(B->comm,&B->n,&B->N);
1505: /* the information in the maps duplicates the information computed below, eventually
1506: we should remove the duplicate information that is not contained in the maps */
1507: MapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1508: MapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1510: /* build local table of row and column ownerships */
1511: PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1512: PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1513: b->cowners = b->rowners + b->size + 2;
1514: MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1515: b->rowners[0] = 0;
1516: for (i=2; i<=b->size; i++) {
1517: b->rowners[i] += b->rowners[i-1];
1518: }
1519: b->rstart = b->rowners[b->rank];
1520: b->rend = b->rowners[b->rank+1];
1521: MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1522: b->cowners[0] = 0;
1523: for (i=2; i<=b->size; i++) {
1524: b->cowners[i] += b->cowners[i-1];
1525: }
1526: b->cstart = b->cowners[b->rank];
1527: b->cend = b->cowners[b->rank+1];
1529: /* build cache for off array entries formed */
1530: MatStashCreate_Private(B->comm,1,&B->stash);
1531: b->donotstash = PETSC_FALSE;
1532: b->colmap = 0;
1533: b->garray = 0;
1534: b->roworiented = PETSC_TRUE;
1536: /* stuff used for matrix vector multiply */
1537: b->lvec = PETSC_NULL;
1538: b->Mvctx = PETSC_NULL;
1540: /* stuff for MatGetRow() */
1541: b->rowindices = 0;
1542: b->rowvalues = 0;
1543: b->getrowactive = PETSC_FALSE;
1545: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1546: "MatStoreValues_MPIAIJ",
1547: MatStoreValues_MPIAIJ);
1548: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1549: "MatRetrieveValues_MPIAIJ",
1550: MatRetrieveValues_MPIAIJ);
1551: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1552: "MatGetDiagonalBlock_MPIAIJ",
1553: MatGetDiagonalBlock_MPIAIJ);
1554: return(0);
1555: }
1556: EXTERN_C_END
1558: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1559: {
1560: Mat mat;
1561: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1562: int ierr;
1565: *newmat = 0;
1566: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1567: MatSetType(mat,MATMPIAIJ);
1568: a = (Mat_MPIAIJ*)mat->data;
1569: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1570: mat->factor = matin->factor;
1571: mat->assembled = PETSC_TRUE;
1572: mat->insertmode = NOT_SET_VALUES;
1573: mat->preallocated = PETSC_TRUE;
1575: a->rstart = oldmat->rstart;
1576: a->rend = oldmat->rend;
1577: a->cstart = oldmat->cstart;
1578: a->cend = oldmat->cend;
1579: a->size = oldmat->size;
1580: a->rank = oldmat->rank;
1581: a->donotstash = oldmat->donotstash;
1582: a->roworiented = oldmat->roworiented;
1583: a->rowindices = 0;
1584: a->rowvalues = 0;
1585: a->getrowactive = PETSC_FALSE;
1587: ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1588: ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);
1589: if (oldmat->colmap) {
1590: #if defined (PETSC_USE_CTABLE)
1591: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1592: #else
1593: PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1594: PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1595: ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1596: #endif
1597: } else a->colmap = 0;
1598: if (oldmat->garray) {
1599: int len;
1600: len = oldmat->B->n;
1601: PetscMalloc((len+1)*sizeof(int),&a->garray);
1602: PetscLogObjectMemory(mat,len*sizeof(int));
1603: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1604: } else a->garray = 0;
1605:
1606: VecDuplicate(oldmat->lvec,&a->lvec);
1607: PetscLogObjectParent(mat,a->lvec);
1608: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1609: PetscLogObjectParent(mat,a->Mvctx);
1610: MatDuplicate(oldmat->A,cpvalues,&a->A);
1611: PetscLogObjectParent(mat,a->A);
1612: MatDuplicate(oldmat->B,cpvalues,&a->B);
1613: PetscLogObjectParent(mat,a->B);
1614: PetscFListDuplicate(matin->qlist,&mat->qlist);
1615: *newmat = mat;
1616: return(0);
1617: }
1619: #include "petscsys.h"
1621: EXTERN_C_BEGIN
1622: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1623: {
1624: Mat A;
1625: Scalar *vals,*svals;
1626: MPI_Comm comm = ((PetscObject)viewer)->comm;
1627: MPI_Status status;
1628: int i,nz,ierr,j,rstart,rend,fd;
1629: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1630: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1631: int tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1634: MPI_Comm_size(comm,&size);
1635: MPI_Comm_rank(comm,&rank);
1636: if (!rank) {
1637: PetscViewerBinaryGetDescriptor(viewer,&fd);
1638: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1639: if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1640: if (header[3] < 0) {
1641: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1642: }
1643: }
1645: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1646: M = header[1]; N = header[2];
1647: /* determine ownership of all rows */
1648: m = M/size + ((M % size) > rank);
1649: PetscMalloc((size+2)*sizeof(int),&rowners);
1650: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1651: rowners[0] = 0;
1652: for (i=2; i<=size; i++) {
1653: rowners[i] += rowners[i-1];
1654: }
1655: rstart = rowners[rank];
1656: rend = rowners[rank+1];
1658: /* distribute row lengths to all processors */
1659: ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1660: offlens = ourlens + (rend-rstart);
1661: if (!rank) {
1662: PetscMalloc(M*sizeof(int),&rowlengths);
1663: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1664: PetscMalloc(size*sizeof(int),&sndcounts);
1665: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1666: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1667: PetscFree(sndcounts);
1668: } else {
1669: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1670: }
1672: if (!rank) {
1673: /* calculate the number of nonzeros on each processor */
1674: PetscMalloc(size*sizeof(int),&procsnz);
1675: PetscMemzero(procsnz,size*sizeof(int));
1676: for (i=0; i<size; i++) {
1677: for (j=rowners[i]; j< rowners[i+1]; j++) {
1678: procsnz[i] += rowlengths[j];
1679: }
1680: }
1681: PetscFree(rowlengths);
1683: /* determine max buffer needed and allocate it */
1684: maxnz = 0;
1685: for (i=0; i<size; i++) {
1686: maxnz = PetscMax(maxnz,procsnz[i]);
1687: }
1688: PetscMalloc(maxnz*sizeof(int),&cols);
1690: /* read in my part of the matrix column indices */
1691: nz = procsnz[0];
1692: PetscMalloc(nz*sizeof(int),&mycols);
1693: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1695: /* read in every one elses and ship off */
1696: for (i=1; i<size; i++) {
1697: nz = procsnz[i];
1698: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1699: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1700: }
1701: PetscFree(cols);
1702: } else {
1703: /* determine buffer space needed for message */
1704: nz = 0;
1705: for (i=0; i<m; i++) {
1706: nz += ourlens[i];
1707: }
1708: PetscMalloc((nz+1)*sizeof(int),&mycols);
1710: /* receive message of column indices*/
1711: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1712: MPI_Get_count(&status,MPI_INT,&maxnz);
1713: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1714: }
1716: /* determine column ownership if matrix is not square */
1717: if (N != M) {
1718: n = N/size + ((N % size) > rank);
1719: ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
1720: cstart = cend - n;
1721: } else {
1722: cstart = rstart;
1723: cend = rend;
1724: n = cend - cstart;
1725: }
1727: /* loop over local rows, determining number of off diagonal entries */
1728: PetscMemzero(offlens,m*sizeof(int));
1729: jj = 0;
1730: for (i=0; i<m; i++) {
1731: for (j=0; j<ourlens[i]; j++) {
1732: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1733: jj++;
1734: }
1735: }
1737: /* create our matrix */
1738: for (i=0; i<m; i++) {
1739: ourlens[i] -= offlens[i];
1740: }
1741: MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);
1742: A = *newmat;
1743: MatSetOption(A,MAT_COLUMNS_SORTED);
1744: for (i=0; i<m; i++) {
1745: ourlens[i] += offlens[i];
1746: }
1748: if (!rank) {
1749: PetscMalloc(maxnz*sizeof(Scalar),&vals);
1751: /* read in my part of the matrix numerical values */
1752: nz = procsnz[0];
1753: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1754:
1755: /* insert into matrix */
1756: jj = rstart;
1757: smycols = mycols;
1758: svals = vals;
1759: for (i=0; i<m; i++) {
1760: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1761: smycols += ourlens[i];
1762: svals += ourlens[i];
1763: jj++;
1764: }
1766: /* read in other processors and ship out */
1767: for (i=1; i<size; i++) {
1768: nz = procsnz[i];
1769: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1770: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1771: }
1772: PetscFree(procsnz);
1773: } else {
1774: /* receive numeric values */
1775: PetscMalloc((nz+1)*sizeof(Scalar),&vals);
1777: /* receive message of values*/
1778: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1779: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1780: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1782: /* insert into matrix */
1783: jj = rstart;
1784: smycols = mycols;
1785: svals = vals;
1786: for (i=0; i<m; i++) {
1787: ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1788: smycols += ourlens[i];
1789: svals += ourlens[i];
1790: jj++;
1791: }
1792: }
1793: PetscFree(ourlens);
1794: PetscFree(vals);
1795: PetscFree(mycols);
1796: PetscFree(rowners);
1798: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1799: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1800: return(0);
1801: }
1802: EXTERN_C_END
1804: /*
1805: Not great since it makes two copies of the submatrix, first an SeqAIJ
1806: in local and then by concatenating the local matrices the end result.
1807: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1808: */
1809: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1810: {
1811: int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1812: int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1813: Mat *local,M,Mreuse;
1814: Scalar *vwork,*aa;
1815: MPI_Comm comm = mat->comm;
1816: Mat_SeqAIJ *aij;
1820: MPI_Comm_rank(comm,&rank);
1821: MPI_Comm_size(comm,&size);
1823: if (call == MAT_REUSE_MATRIX) {
1824: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
1825: if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1826: local = &Mreuse;
1827: ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
1828: } else {
1829: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
1830: Mreuse = *local;
1831: PetscFree(local);
1832: }
1834: /*
1835: m - number of local rows
1836: n - number of columns (same on all processors)
1837: rstart - first row in new global matrix generated
1838: */
1839: MatGetSize(Mreuse,&m,&n);
1840: if (call == MAT_INITIAL_MATRIX) {
1841: aij = (Mat_SeqAIJ*)(Mreuse)->data;
1842: if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1843: ii = aij->i;
1844: jj = aij->j;
1846: /*
1847: Determine the number of non-zeros in the diagonal and off-diagonal
1848: portions of the matrix in order to do correct preallocation
1849: */
1851: /* first get start and end of "diagonal" columns */
1852: if (csize == PETSC_DECIDE) {
1853: nlocal = n/size + ((n % size) > rank);
1854: } else {
1855: nlocal = csize;
1856: }
1857: ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
1858: rstart = rend - nlocal;
1859: if (rank == size - 1 && rend != n) {
1860: SETERRQ(1,"Local column sizes do not add up to total number of columns");
1861: }
1863: /* next, compute all the lengths */
1864: ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);
1865: olens = dlens + m;
1866: for (i=0; i<m; i++) {
1867: jend = ii[i+1] - ii[i];
1868: olen = 0;
1869: dlen = 0;
1870: for (j=0; j<jend; j++) {
1871: if (*jj < rstart || *jj >= rend) olen++;
1872: else dlen++;
1873: jj++;
1874: }
1875: olens[i] = olen;
1876: dlens[i] = dlen;
1877: }
1878: MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
1879: PetscFree(dlens);
1880: } else {
1881: int ml,nl;
1883: M = *newmat;
1884: MatGetLocalSize(M,&ml,&nl);
1885: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1886: MatZeroEntries(M);
1887: /*
1888: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1889: rather than the slower MatSetValues().
1890: */
1891: M->was_assembled = PETSC_TRUE;
1892: M->assembled = PETSC_FALSE;
1893: }
1894: MatGetOwnershipRange(M,&rstart,&rend);
1895: aij = (Mat_SeqAIJ*)(Mreuse)->data;
1896: if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1897: ii = aij->i;
1898: jj = aij->j;
1899: aa = aij->a;
1900: for (i=0; i<m; i++) {
1901: row = rstart + i;
1902: nz = ii[i+1] - ii[i];
1903: cwork = jj; jj += nz;
1904: vwork = aa; aa += nz;
1905: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
1906: }
1908: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
1909: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
1910: *newmat = M;
1912: /* save submatrix used in processor for next request */
1913: if (call == MAT_INITIAL_MATRIX) {
1914: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
1915: PetscObjectDereference((PetscObject)Mreuse);
1916: }
1918: return(0);
1919: }
1921: /*@C
1922: MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1923: (the default parallel PETSc format). For good matrix assembly performance
1924: the user should preallocate the matrix storage by setting the parameters
1925: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
1926: performance can be increased by more than a factor of 50.
1928: Collective on MPI_Comm
1930: Input Parameters:
1931: + A - the matrix
1932: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
1933: (same value is used for all local rows)
1934: . d_nnz - array containing the number of nonzeros in the various rows of the
1935: DIAGONAL portion of the local submatrix (possibly different for each row)
1936: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1937: The size of this array is equal to the number of local rows, i.e 'm'.
1938: You must leave room for the diagonal entry even if it is zero.
1939: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
1940: submatrix (same value is used for all local rows).
1941: - o_nnz - array containing the number of nonzeros in the various rows of the
1942: OFF-DIAGONAL portion of the local submatrix (possibly different for
1943: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1944: structure. The size of this array is equal to the number
1945: of local rows, i.e 'm'.
1947: The AIJ format (also called the Yale sparse matrix format or
1948: compressed row storage), is fully compatible with standard Fortran 77
1949: storage. That is, the stored row and column indices can begin at
1950: either one (as in Fortran) or zero. See the users manual for details.
1952: The user MUST specify either the local or global matrix dimensions
1953: (possibly both).
1955: The parallel matrix is partitioned such that the first m0 rows belong to
1956: process 0, the next m1 rows belong to process 1, the next m2 rows belong
1957: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1959: The DIAGONAL portion of the local submatrix of a processor can be defined
1960: as the submatrix which is obtained by extraction the part corresponding
1961: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1962: first row that belongs to the processor, and r2 is the last row belonging
1963: to the this processor. This is a square mxm matrix. The remaining portion
1964: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1966: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1968: By default, this format uses inodes (identical nodes) when possible.
1969: We search for consecutive rows with the same nonzero structure, thereby
1970: reusing matrix information to achieve increased efficiency.
1972: Options Database Keys:
1973: + -mat_aij_no_inode - Do not use inodes
1974: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1975: - -mat_aij_oneindex - Internally use indexing starting at 1
1976: rather than 0. Note that when calling MatSetValues(),
1977: the user still MUST index entries starting at 0!
1979: Example usage:
1980:
1981: Consider the following 8x8 matrix with 34 non-zero values, that is
1982: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1983: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1984: as follows:
1986: .vb
1987: 1 2 0 | 0 3 0 | 0 4
1988: Proc0 0 5 6 | 7 0 0 | 8 0
1989: 9 0 10 | 11 0 0 | 12 0
1990: -------------------------------------
1991: 13 0 14 | 15 16 17 | 0 0
1992: Proc1 0 18 0 | 19 20 21 | 0 0
1993: 0 0 0 | 22 23 0 | 24 0
1994: -------------------------------------
1995: Proc2 25 26 27 | 0 0 28 | 29 0
1996: 30 0 0 | 31 32 33 | 0 34
1997: .ve
1999: This can be represented as a collection of submatrices as:
2001: .vb
2002: A B C
2003: D E F
2004: G H I
2005: .ve
2007: Where the submatrices A,B,C are owned by proc0, D,E,F are
2008: owned by proc1, G,H,I are owned by proc2.
2010: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2011: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2012: The 'M','N' parameters are 8,8, and have the same values on all procs.
2014: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2015: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2016: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2017: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2018: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2019: matrix, ans [DF] as another SeqAIJ matrix.
2021: When d_nz, o_nz parameters are specified, d_nz storage elements are
2022: allocated for every row of the local diagonal submatrix, and o_nz
2023: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2024: One way to choose d_nz and o_nz is to use the max nonzerors per local
2025: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2026: In this case, the values of d_nz,o_nz are:
2027: .vb
2028: proc0 : dnz = 2, o_nz = 2
2029: proc1 : dnz = 3, o_nz = 2
2030: proc2 : dnz = 1, o_nz = 4
2031: .ve
2032: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2033: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2034: for proc3. i.e we are using 12+15+10=37 storage locations to store
2035: 34 values.
2037: When d_nnz, o_nnz parameters are specified, the storage is specified
2038: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2039: In the above case the values for d_nnz,o_nnz are:
2040: .vb
2041: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2042: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2043: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2044: .ve
2045: Here the space allocated is sum of all the above values i.e 34, and
2046: hence pre-allocation is perfect.
2048: Level: intermediate
2050: .keywords: matrix, aij, compressed row, sparse, parallel
2052: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2053: @*/
2054: int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2055: {
2056: Mat_MPIAIJ *b;
2057: int ierr,i;
2058: PetscTruth flg2;
2061: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);
2062: if (!flg2) return(0);
2063: B->preallocated = PETSC_TRUE;
2064: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2065: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2066: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2067: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2068: if (d_nnz) {
2069: for (i=0; i<B->m; i++) {
2070: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2071: }
2072: }
2073: if (o_nnz) {
2074: for (i=0; i<B->m; i++) {
2075: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2076: }
2077: }
2078: b = (Mat_MPIAIJ*)B->data;
2080: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
2081: PetscLogObjectParent(B,b->A);
2082: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
2083: PetscLogObjectParent(B,b->B);
2085: return(0);
2086: }
2088: /*@C
2089: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2090: (the default parallel PETSc format). For good matrix assembly performance
2091: the user should preallocate the matrix storage by setting the parameters
2092: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2093: performance can be increased by more than a factor of 50.
2095: Collective on MPI_Comm
2097: Input Parameters:
2098: + comm - MPI communicator
2099: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2100: This value should be the same as the local size used in creating the
2101: y vector for the matrix-vector product y = Ax.
2102: . n - This value should be the same as the local size used in creating the
2103: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2104: calculated if N is given) For square matrices n is almost always m.
2105: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2106: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2107: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2108: (same value is used for all local rows)
2109: . d_nnz - array containing the number of nonzeros in the various rows of the
2110: DIAGONAL portion of the local submatrix (possibly different for each row)
2111: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2112: The size of this array is equal to the number of local rows, i.e 'm'.
2113: You must leave room for the diagonal entry even if it is zero.
2114: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2115: submatrix (same value is used for all local rows).
2116: - o_nnz - array containing the number of nonzeros in the various rows of the
2117: OFF-DIAGONAL portion of the local submatrix (possibly different for
2118: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2119: structure. The size of this array is equal to the number
2120: of local rows, i.e 'm'.
2122: Output Parameter:
2123: . A - the matrix
2125: Notes:
2126: m,n,M,N parameters specify the size of the matrix, and its partitioning across
2127: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2128: storage requirements for this matrix.
2130: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
2131: processor than it must be used on all processors that share the object for
2132: that argument.
2134: The AIJ format (also called the Yale sparse matrix format or
2135: compressed row storage), is fully compatible with standard Fortran 77
2136: storage. That is, the stored row and column indices can begin at
2137: either one (as in Fortran) or zero. See the users manual for details.
2139: The user MUST specify either the local or global matrix dimensions
2140: (possibly both).
2142: The parallel matrix is partitioned such that the first m0 rows belong to
2143: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2144: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2146: The DIAGONAL portion of the local submatrix of a processor can be defined
2147: as the submatrix which is obtained by extraction the part corresponding
2148: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2149: first row that belongs to the processor, and r2 is the last row belonging
2150: to the this processor. This is a square mxm matrix. The remaining portion
2151: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2153: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2155: By default, this format uses inodes (identical nodes) when possible.
2156: We search for consecutive rows with the same nonzero structure, thereby
2157: reusing matrix information to achieve increased efficiency.
2159: Options Database Keys:
2160: + -mat_aij_no_inode - Do not use inodes
2161: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2162: - -mat_aij_oneindex - Internally use indexing starting at 1
2163: rather than 0. Note that when calling MatSetValues(),
2164: the user still MUST index entries starting at 0!
2167: Example usage:
2168:
2169: Consider the following 8x8 matrix with 34 non-zero values, that is
2170: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2171: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2172: as follows:
2174: .vb
2175: 1 2 0 | 0 3 0 | 0 4
2176: Proc0 0 5 6 | 7 0 0 | 8 0
2177: 9 0 10 | 11 0 0 | 12 0
2178: -------------------------------------
2179: 13 0 14 | 15 16 17 | 0 0
2180: Proc1 0 18 0 | 19 20 21 | 0 0
2181: 0 0 0 | 22 23 0 | 24 0
2182: -------------------------------------
2183: Proc2 25 26 27 | 0 0 28 | 29 0
2184: 30 0 0 | 31 32 33 | 0 34
2185: .ve
2187: This can be represented as a collection of submatrices as:
2189: .vb
2190: A B C
2191: D E F
2192: G H I
2193: .ve
2195: Where the submatrices A,B,C are owned by proc0, D,E,F are
2196: owned by proc1, G,H,I are owned by proc2.
2198: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2199: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2200: The 'M','N' parameters are 8,8, and have the same values on all procs.
2202: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2203: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2204: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2205: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2206: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2207: matrix, ans [DF] as another SeqAIJ matrix.
2209: When d_nz, o_nz parameters are specified, d_nz storage elements are
2210: allocated for every row of the local diagonal submatrix, and o_nz
2211: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2212: One way to choose d_nz and o_nz is to use the max nonzerors per local
2213: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2214: In this case, the values of d_nz,o_nz are:
2215: .vb
2216: proc0 : dnz = 2, o_nz = 2
2217: proc1 : dnz = 3, o_nz = 2
2218: proc2 : dnz = 1, o_nz = 4
2219: .ve
2220: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2221: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2222: for proc3. i.e we are using 12+15+10=37 storage locations to store
2223: 34 values.
2225: When d_nnz, o_nnz parameters are specified, the storage is specified
2226: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2227: In the above case the values for d_nnz,o_nnz are:
2228: .vb
2229: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2230: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2231: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2232: .ve
2233: Here the space allocated is sum of all the above values i.e 34, and
2234: hence pre-allocation is perfect.
2236: Level: intermediate
2238: .keywords: matrix, aij, compressed row, sparse, parallel
2240: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2241: @*/
2242: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2243: {
2244: int ierr,size;
2247: MatCreate(comm,m,n,M,N,A);
2248: MPI_Comm_size(comm,&size);
2249: if (size > 1) {
2250: MatSetType(*A,MATMPIAIJ);
2251: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2252: } else {
2253: MatSetType(*A,MATSEQAIJ);
2254: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2255: }
2256: return(0);
2257: }
2259: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2260: {
2261: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2263: *Ad = a->A;
2264: *Ao = a->B;
2265: *colmap = a->garray;
2266: return(0);
2267: }