Actual source code: tools.c
petsc-3.5.4 2015-05-23
1: /*
2: GAMG geometric-algebric multigrid PC - Mark Adams 2011
3: */
4: #include <petsc-private/matimpl.h>
5: #include <../src/ksp/pc/impls/gamg/gamg.h> /*I "petscpc.h" I*/
6: #include <petsc-private/kspimpl.h>
10: /*
11: Produces a set of block column indices of the matrix row, one for each block represented in the original row
13: n - the number of block indices in cc[]
14: cc - the block indices (must be large enough to contain the indices)
15: */
16: PETSC_STATIC_INLINE PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
17: {
18: PetscInt cnt = -1,nidx,j;
19: const PetscInt *idx;
23: MatGetRow(Amat,row,&nidx,&idx,NULL);
24: if (nidx) {
25: cnt = 0;
26: cc[cnt] = idx[0]/bs;
27: for (j=1; j<nidx; j++) {
28: if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
29: }
30: }
31: MatRestoreRow(Amat,row,&nidx,&idx,NULL);
32: *n = cnt+1;
33: return(0);
34: }
38: /*
39: Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
41: ncollapsed - the number of block indices
42: collapsed - the block indices (must be large enough to contain the indices)
43: */
44: PETSC_STATIC_INLINE PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
45: {
46: PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
50: MatCollapseRow(Amat,start,bs,&nprev,cprev);
51: for (i=start+1; i<start+bs; i++) {
52: MatCollapseRow(Amat,i,bs,&ncur,ccur);
53: PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged);
54: cprevtmp = cprev; cprev = merged; merged = cprevtmp;
55: }
56: *ncollapsed = nprev;
57: if (collapsed) *collapsed = cprev;
58: return(0);
59: }
62: /* -------------------------------------------------------------------------- */
63: /*
64: PCGAMGCreateGraph - create simple scaled scalar graph from matrix
66: Input Parameter:
67: . Amat - matrix
68: Output Parameter:
69: . a_Gmaat - eoutput scalar graph (symmetric?)
70: */
73: PetscErrorCode PCGAMGCreateGraph(const Mat Amat, Mat *a_Gmat)
74: {
76: PetscInt Istart,Iend,Ii,i,jj,kk,ncols,nloc,NN,MM,bs;
77: MPI_Comm comm;
78: Mat Gmat;
81: PetscObjectGetComm((PetscObject)Amat,&comm);
82: MatGetOwnershipRange(Amat, &Istart, &Iend);
83: MatGetSize(Amat, &MM, &NN);
84: MatGetBlockSize(Amat, &bs);
85: nloc = (Iend-Istart)/bs;
87: #if defined PETSC_GAMG_USE_LOG
88: PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);
89: #endif
91: if (bs > 1) {
92: const PetscScalar *vals;
93: const PetscInt *idx;
94: PetscInt *d_nnz, *o_nnz,*blockmask = NULL,maskcnt,*w0,*w1,*w2;
95: PetscBool ismpiaij,isseqaij;
97: /*
98: Determine the preallocation needed for the scalar matrix derived from the vector matrix.
99: */
101: PetscObjectTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij);
102: PetscObjectTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij);
104: PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz);
106: if (isseqaij) {
107: PetscInt max_d_nnz;
109: /*
110: Determine exact preallocation count for (sequential) scalar matrix
111: */
112: MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz);
113: max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
114: PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);
115: for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
116: MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);
117: }
118: PetscFree3(w0,w1,w2);
120: } else if (ismpiaij) {
121: Mat Daij,Oaij;
122: const PetscInt *garray;
123: PetscInt max_d_nnz;
125: MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray);
127: /*
128: Determine exact preallocation count for diagonal block portion of scalar matrix
129: */
130: MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz);
131: max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
132: PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);
133: for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
134: MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);
135: }
136: PetscFree3(w0,w1,w2);
138: /*
139: Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
140: */
141: for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
142: o_nnz[jj] = 0;
143: for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
144: MatGetRow(Oaij,Ii+kk,&ncols,0,0);
145: o_nnz[jj] += ncols;
146: MatRestoreRow(Oaij,Ii+kk,&ncols,0,0);
147: }
148: if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
149: }
151: } else {
152: /*
154: This is O(nloc*nloc/bs) work!
156: This is accurate for the "diagonal" block of the matrix but will be grossly high for the
157: off diagonal block most of the time but could be too low for the off-diagonal.
159: This should be fixed to be accurate for the off-diagonal portion. Cannot just use a mask
160: for the off-diagonal portion since for huge matrices that would require too much memory per
161: MPI process.
162: */
163: PetscMalloc1(nloc, &blockmask);
164: for (Ii = Istart, jj = 0; Ii < Iend; Ii += bs, jj++) {
165: o_nnz[jj] = 0;
166: PetscMemzero(blockmask,nloc*sizeof(PetscInt));
167: for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
168: MatGetRow(Amat,Ii+kk,&ncols,&idx,0);
169: for (i=0; i<ncols; i++) {
170: if (idx[i] >= Istart && idx[i] < Iend) {
171: blockmask[(idx[i] - Istart)/bs] = 1;
172: }
173: }
174: if (ncols > o_nnz[jj]) {
175: o_nnz[jj] = ncols;
176: if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
177: }
178: MatRestoreRow(Amat,Ii+kk,&ncols,&idx,0);
179: }
180: maskcnt = 0;
181: for (i=0; i<nloc; i++) {
182: if (blockmask[i]) maskcnt++;
183: }
184: d_nnz[jj] = maskcnt;
185: }
186: PetscFree(blockmask);
187: }
189: /* get scalar copy (norms) of matrix -- AIJ specific!!! */
190: MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE,0, d_nnz, 0, o_nnz, &Gmat);
191: PetscFree2(d_nnz,o_nnz);
193: for (Ii = Istart; Ii < Iend; Ii++) {
194: PetscInt dest_row = Ii/bs;
195: MatGetRow(Amat,Ii,&ncols,&idx,&vals);
196: for (jj=0; jj<ncols; jj++) {
197: PetscInt dest_col = idx[jj]/bs;
198: PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
199: MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES);
200: }
201: MatRestoreRow(Amat,Ii,&ncols,&idx,&vals);
202: }
203: MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);
204: MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);
205: } else {
206: /* just copy scalar matrix - abs() not taken here but scaled later */
207: MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat);
208: }
210: #if defined PETSC_GAMG_USE_LOG
211: PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);
212: #endif
214: *a_Gmat = Gmat;
215: return(0);
216: }
218: /* -------------------------------------------------------------------------- */
219: /*
220: PCGAMGFilterGraph - filter graph and symetrize if needed
222: Input Parameter:
223: . vfilter - threshold paramter [0,1)
224: . symm - symetrize?
225: In/Output Parameter:
226: . a_Gmat - original graph
227: */
230: PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,const PetscReal vfilter,const PetscBool symm,const PetscInt verbose)
231: {
232: PetscErrorCode ierr;
233: PetscInt Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc;
234: PetscMPIInt rank;
235: Mat Gmat = *a_Gmat, tGmat, matTrans;
236: MPI_Comm comm;
237: const PetscScalar *vals;
238: const PetscInt *idx;
239: PetscInt *d_nnz, *o_nnz;
240: Vec diag;
243: PetscObjectGetComm((PetscObject)Gmat,&comm);
244: MPI_Comm_rank(comm,&rank);
245: MatGetOwnershipRange(Gmat, &Istart, &Iend);
246: nloc = Iend - Istart;
247: MatGetSize(Gmat, &MM, &NN);
248: #if defined PETSC_GAMG_USE_LOG
249: PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);
250: #endif
251: /* scale Gmat so filter works */
252: MatGetVecs(Gmat, &diag, 0);
253: MatGetDiagonal(Gmat, diag);
254: VecReciprocal(diag);
255: VecSqrtAbs(diag);
256: MatDiagonalScale(Gmat, diag, diag);
257: VecDestroy(&diag);
259: if (symm) {
260: MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans);
261: }
263: /* filter - dup zeros out matrix */
264: PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz);
265: for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) {
266: MatGetRow(Gmat,Ii,&ncols,NULL,NULL);
267: d_nnz[jj] = ncols;
268: o_nnz[jj] = ncols;
269: MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL);
270: if (symm) {
271: MatGetRow(matTrans,Ii,&ncols,NULL,NULL);
272: d_nnz[jj] += ncols;
273: o_nnz[jj] += ncols;
274: MatRestoreRow(matTrans,Ii,&ncols,NULL,NULL);
275: }
276: if (d_nnz[jj] > nloc) d_nnz[jj] = nloc;
277: if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc;
278: }
279: MatCreateAIJ(comm, nloc, nloc, MM, MM, 0, d_nnz, 0, o_nnz, &tGmat);
280: PetscFree2(d_nnz,o_nnz);
281: if (symm) {
282: MatDestroy(&matTrans);
283: }
285: for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) {
286: MatGetRow(Gmat,Ii,&ncols,&idx,&vals);
287: for (jj=0; jj<ncols; jj++,nnz0++) {
288: PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
289: if (PetscRealPart(sv) > vfilter) {
290: nnz1++;
291: if (symm) {
292: sv *= 0.5;
293: MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,ADD_VALUES);
294: MatSetValues(tGmat,1,&idx[jj],1,&Ii,&sv,ADD_VALUES);
295: } else {
296: MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,ADD_VALUES);
297: }
298: }
299: }
300: MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals);
301: }
302: MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY);
303: MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY);
305: #if defined PETSC_GAMG_USE_LOG
306: PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);
307: #endif
309: if (verbose) {
310: if (verbose == 1) {
311: PetscPrintf(comm,"\t[%d]%s %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%d)\n",rank,__FUNCT__,
312: 100.*(double)nnz1/(double)nnz0,vfilter,(double)nnz0/(double)nloc,MM);
313: } else {
314: PetscInt nnz[2],out[2];
315: nnz[0] = nnz0; nnz[1] = nnz1;
316: MPI_Allreduce(nnz, out, 2, MPIU_INT, MPI_SUM, comm);
317: PetscPrintf(comm,"\t[%d]%s %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%d)\n",rank,__FUNCT__,
318: 100.*(double)out[1]/(double)out[0],vfilter,(double)out[0]/(double)MM,MM);
319: }
320: }
321: MatDestroy(&Gmat);
322: *a_Gmat = tGmat;
323: return(0);
324: }
326: /* -------------------------------------------------------------------------- */
327: /*
328: PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have > 1 pe
330: Input Parameter:
331: . Gmat - MPIAIJ matrix for scattters
332: . data_sz - number of data terms per node (# cols in output)
333: . data_in[nloc*data_sz] - column oriented data
334: Output Parameter:
335: . a_stride - numbrt of rows of output
336: . a_data_out[stride*data_sz] - output data with ghosts
337: */
340: PetscErrorCode PCGAMGGetDataWithGhosts(const Mat Gmat,const PetscInt data_sz,const PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out)
341: {
343: MPI_Comm comm;
344: Vec tmp_crds;
345: Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)Gmat->data;
346: PetscInt nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc;
347: PetscScalar *data_arr;
348: PetscReal *datas;
349: PetscBool isMPIAIJ;
352: PetscObjectGetComm((PetscObject)Gmat,&comm);
353: PetscObjectTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ);
354: MatGetOwnershipRange(Gmat, &my0, &Iend);
355: nloc = Iend - my0;
356: VecGetLocalSize(mpimat->lvec, &num_ghosts);
357: nnodes = num_ghosts + nloc;
358: *a_stride = nnodes;
359: MatGetVecs(Gmat, &tmp_crds, 0);
361: PetscMalloc1(data_sz*nnodes, &datas);
362: for (dir=0; dir<data_sz; dir++) {
363: /* set local, and global */
364: for (kk=0; kk<nloc; kk++) {
365: PetscInt gid = my0 + kk;
366: PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */
367: datas[dir*nnodes + kk] = PetscRealPart(crd);
369: VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES);
370: }
371: VecAssemblyBegin(tmp_crds);
372: VecAssemblyEnd(tmp_crds);
373: /* get ghost datas */
374: VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
375: VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
376: VecGetArray(mpimat->lvec, &data_arr);
377: for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]);
378: VecRestoreArray(mpimat->lvec, &data_arr);
379: }
380: VecDestroy(&tmp_crds);
381: *a_data_out = datas;
382: return(0);
383: }
386: /* hash table stuff - simple, not dymanic, key >= 0, has table
387: *
388: * GAMGTableCreate
389: */
390: /* avoid overflow */
391: #define GAMG_HASH(key) ((((PetscInt)7)*key)%a_tab->size)
394: PetscErrorCode GAMGTableCreate(PetscInt a_size, GAMGHashTable *a_tab)
395: {
397: PetscInt kk;
400: a_tab->size = a_size;
402: PetscMalloc1(a_size, &a_tab->table);
403: PetscMalloc1(a_size, &a_tab->data);
404: for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1;
405: return(0);
406: }
410: PetscErrorCode GAMGTableDestroy(GAMGHashTable *a_tab)
411: {
415: PetscFree(a_tab->table);
416: PetscFree(a_tab->data);
417: return(0);
418: }
422: PetscErrorCode GAMGTableAdd(GAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data)
423: {
424: PetscInt kk,idx;
427: if (a_key<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %d.",a_key);
428: for (kk = 0, idx = GAMG_HASH(a_key);
429: kk < a_tab->size;
430: kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) {
432: if (a_tab->table[idx] == a_key) {
433: /* exists */
434: a_tab->data[idx] = a_data;
435: break;
436: } else if (a_tab->table[idx] == -1) {
437: /* add */
438: a_tab->table[idx] = a_key;
439: a_tab->data[idx] = a_data;
440: break;
441: }
442: }
443: if (kk==a_tab->size) {
444: /* this is not to efficient, waiting until completely full */
445: PetscInt oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data;
448: a_tab->size = new_size;
450: PetscMalloc1(a_tab->size, &a_tab->table);
451: PetscMalloc1(a_tab->size, &a_tab->data);
453: for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1;
454: for (kk=0;kk<oldsize;kk++) {
455: if (oldtable[kk] != -1) {
456: GAMGTableAdd(a_tab, oldtable[kk], olddata[kk]);
457: }
458: }
459: PetscFree(oldtable);
460: PetscFree(olddata);
461: GAMGTableAdd(a_tab, a_key, a_data);
462: }
463: return(0);
464: }
468: PetscErrorCode GAMGTableFind(GAMGHashTable *a_tab, PetscInt a_key, PetscInt *a_data)
469: {
470: PetscInt kk,idx;
473: if (a_key<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %d.",a_key);
474: for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) {
475: if (a_tab->table[idx] == a_key) {
476: *a_data = a_tab->data[idx];
477: break;
478: } else if (a_tab->table[idx] == -1) {
479: /* not here */
480: *a_data = -1;
481: break;
482: }
483: }
484: if (kk==a_tab->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"key %d not found in table",a_key);
485: return(0);
486: }