Actual source code: mpispooles.c
1: #define PETSCMAT_DLL
3: /*
4: Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
5: */
7: #include src/mat/impls/aij/seq/aij.h
8: #include src/mat/impls/sbaij/seq/sbaij.h
9: #include src/mat/impls/baij/seq/baij.h
10: #include src/mat/impls/aij/mpi/mpiaij.h
11: #include src/mat/impls/sbaij/mpi/mpisbaij.h
12: #include src/mat/impls/aij/seq/spooles/spooles.h
14: EXTERN int SetSpoolesOptions(Mat, Spooles_options *);
18: PetscErrorCode MatDestroy_MPIAIJSpooles(Mat A)
19: {
20: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
22:
24: if (lu->CleanUpSpooles) {
25: FrontMtx_free(lu->frontmtx);
26: IV_free(lu->newToOldIV);
27: IV_free(lu->oldToNewIV);
28: IV_free(lu->vtxmapIV);
29: InpMtx_free(lu->mtxA);
30: ETree_free(lu->frontETree);
31: IVL_free(lu->symbfacIVL);
32: SubMtxManager_free(lu->mtxmanager);
33: DenseMtx_free(lu->mtxX);
34: DenseMtx_free(lu->mtxY);
35: MPI_Comm_free(&(lu->comm_spooles));
36: if ( lu->scat ){
37: VecDestroy(lu->vec_spooles);
38: ISDestroy(lu->iden);
39: ISDestroy(lu->is_petsc);
40: VecScatterDestroy(lu->scat);
41: }
42: }
43: MatConvert_Spooles_Base(A,lu->basetype,MAT_REUSE_MATRIX,&A);
44: (*A->ops->destroy)(A);
46: return(0);
47: }
51: PetscErrorCode MatSolve_MPISpooles(Mat A,Vec b,Vec x)
52: {
53: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
55: int size,rank,m=A->rmap.n,irow,*rowindY;
56: PetscScalar *array;
57: DenseMtx *newY ;
58: SubMtxManager *solvemanager ;
59: #if defined(PETSC_USE_COMPLEX)
60: double x_real,x_imag;
61: #endif
64: MPI_Comm_size(((PetscObject)A)->comm,&size);
65: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
66:
67: /* copy b into spooles' rhs mtxY */
68: DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m);
69: VecGetArray(b,&array);
71: DenseMtx_rowIndices(lu->mtxY, &m, &rowindY); /* get m, rowind */
72: for ( irow = 0 ; irow < m ; irow++ ) {
73: rowindY[irow] = irow + lu->rstart; /* global rowind */
74: #if !defined(PETSC_USE_COMPLEX)
75: DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++);
76: #else
77: DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
78: array++;
79: #endif
80: }
81: VecRestoreArray(b,&array);
82:
83: if ( lu->options.msglvl > 2 ) {
84: int err;
85: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n 1 matrix in original ordering");
86: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
87: err = fflush(lu->options.msgFile);
88: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
89: }
90:
91: /* permute and redistribute Y if necessary */
92: DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV);
93: if ( lu->options.msglvl > 2 ) {
94: int err;
95: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix in new ordering");
96: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
97: err = fflush(lu->options.msgFile);
98: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
99: }
101: MPI_Barrier(((PetscObject)A)->comm); /* for initializing firsttag, because the num. of tags used
102: by FrontMtx_MPI_split() is unknown */
103: lu->firsttag = 0;
104: newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
105: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
106: DenseMtx_free(lu->mtxY);
107: lu->mtxY = newY ;
108: lu->firsttag += size ;
109: if ( lu->options.msglvl > 2 ) {
110: int err;
111: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split DenseMtx Y");
112: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
113: err = fflush(lu->options.msgFile);
114: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
115: }
117: if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
118: /* pivoting has taken place, redistribute the right hand side
119: to match the final rows and columns in the fronts */
120: IV *rowmapIV ;
121: rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
122: lu->options.msgFile, lu->comm_spooles);
123: newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
124: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
125: DenseMtx_free(lu->mtxY);
126: lu->mtxY = newY ;
127: IV_free(rowmapIV);
128: lu->firsttag += size;
129: }
130: if ( lu->options.msglvl > 2 ) {
131: int err;
132: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix after split");
133: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
134: err = fflush(lu->options.msgFile);
135: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
136: }
138: if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
139:
140: /* solve the linear system */
141: solvemanager = SubMtxManager_new();
142: SubMtxManager_init(solvemanager, NO_LOCK, 0);
143: FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
144: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
145: SubMtxManager_free(solvemanager);
146: if ( lu->options.msglvl > 2 ) {
147: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n solution in new ordering");
148: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
149: }
151: /* permute the solution into the original ordering */
152: DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV);
153: if ( lu->options.msglvl > 2 ) {
154: int err;
155: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution in old ordering");
156: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
157: err = fflush(lu->options.msgFile);
158: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
159: }
160:
161: /* scatter local solution mtxX into mpi vector x */
162: if( !lu->scat ){ /* create followings once for each numfactorization */
163: /* vec_spooles <- mtxX */
164: #if !defined(PETSC_USE_COMPLEX)
165: VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
166: #else
167: VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
168: #endif
169: ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
170: ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
171: VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
172: }
173: #if defined(PETSC_USE_COMPLEX)
174: VecGetArray(lu->vec_spooles,&array);
175: for (irow = 0; irow < lu->nmycol; irow++){
176: DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
177: array[irow] = x_real+x_imag*PETSC_i;
178: }
179: VecRestoreArray(lu->vec_spooles,&array);
180: #endif
181: VecScatterBegin(lu->scat,lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD);
182: VecScatterEnd(lu->scat,lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD);
183: return(0);
184: }
188: PetscErrorCode MatFactorNumeric_MPISpooles(Mat A,MatFactorInfo *info,Mat *F)
189: {
190: Mat_Spooles *lu = (Mat_Spooles*)(*F)->spptr;
191: PetscErrorCode ierr;
192: int rank,size,lookahead=0,sierr;
193: ChvManager *chvmanager ;
194: Chv *rootchv ;
195: Graph *graph ;
196: IVL *adjIVL;
197: DV *cumopsDV ;
198: double droptol=0.0,*opcounts,minops,cutoff;
199: #if !defined(PETSC_USE_COMPLEX)
200: double *val;
201: #endif
202: InpMtx *newA ;
203: PetscScalar *av, *bv;
204: PetscInt *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
205: i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
206: PetscInt M=A->rmap.N,m=A->rmap.n,root,nedges,tagbound,lasttag;
207: Mat F_diag;
208:
210: MPI_Comm_size(((PetscObject)A)->comm,&size);
211: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
213: if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
214: /* get input parameters */
215: SetSpoolesOptions(A, &lu->options);
217: (*F)->ops->solve = MatSolve_MPISpooles;
218: (*F)->ops->destroy = MatDestroy_MPIAIJSpooles;
219: (*F)->assembled = PETSC_TRUE;
220: if ((*F)->factor == FACTOR_LU){
221: F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
222: } else {
223: F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
224: }
225: F_diag->assembled = PETSC_TRUE;
227: /* to be used by MatSolve() */
228: lu->mtxY = DenseMtx_new();
229: lu->mtxX = DenseMtx_new();
230: lu->scat = PETSC_NULL;
232: IVzero(20, lu->stats);
233: DVzero(20, lu->cpus);
235: lu->mtxA = InpMtx_new();
236: }
237:
238: /* copy A to Spooles' InpMtx object */
239: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
240: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
241: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
242: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
243: ai=aa->i; aj=aa->j; av=aa->a;
244: bi=bb->i; bj=bb->j; bv=bb->a;
245: lu->rstart = A->rmap.rstart;
246: nz = aa->nz + bb->nz;
247: garray = mat->garray;
248: } else { /* SPOOLES_SYMMETRIC */
249: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
250: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
251: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
252: ai=aa->i; aj=aa->j; av=aa->a;
253: bi=bb->i; bj=bb->j; bv=bb->a;
254: lu->rstart = A->rmap.rstart;
255: nz = aa->nz + bb->nz;
256: garray = mat->garray;
257: }
258:
259: InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
260: row = InpMtx_ivec1(lu->mtxA);
261: col = InpMtx_ivec2(lu->mtxA);
262: #if !defined(PETSC_USE_COMPLEX)
263: val = InpMtx_dvec(lu->mtxA);
264: #endif
266: jj = 0; irow = lu->rstart;
267: for ( i=0; i<m; i++ ) {
268: ajj = aj + ai[i]; /* ptr to the beginning of this row */
269: countA = ai[i+1] - ai[i];
270: countB = bi[i+1] - bi[i];
271: bjj = bj + bi[i];
272: jB = 0;
273:
274: if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
275: /* B part, smaller col index */
276: colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
277: for (j=0; j<countB; j++){
278: jcol = garray[bjj[j]];
279: if (jcol > colA_start) {
280: jB = j;
281: break;
282: }
283: row[jj] = irow; col[jj] = jcol;
284: #if !defined(PETSC_USE_COMPLEX)
285: val[jj++] = *bv++;
286: #else
287: InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv));
288: bv++; jj++;
289: #endif
290: if (j==countB-1) jB = countB;
291: }
292: }
293: /* A part */
294: for (j=0; j<countA; j++){
295: row[jj] = irow; col[jj] = lu->rstart + ajj[j];
296: #if !defined(PETSC_USE_COMPLEX)
297: val[jj++] = *av++;
298: #else
299: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av));
300: av++; jj++;
301: #endif
302: }
303: /* B part, larger col index */
304: for (j=jB; j<countB; j++){
305: row[jj] = irow; col[jj] = garray[bjj[j]];
306: #if !defined(PETSC_USE_COMPLEX)
307: val[jj++] = *bv++;
308: #else
309: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv));
310: bv++; jj++;
311: #endif
312: }
313: irow++;
314: }
315: #if !defined(PETSC_USE_COMPLEX)
316: InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
317: #endif
318: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
319: if ( lu->options.msglvl > 0 ) {
320: int err;
321: printf("[%d] input matrix\n",rank);
322: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n [%d] input matrix\n",rank);
323: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
324: err = fflush(lu->options.msgFile);
325: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
326: }
328: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
329: /*
330: find a low-fill ordering
331: (1) create the Graph object
332: (2) order the graph using multiple minimum degree
333: (3) find out who has the best ordering w.r.t. op count,
334: and broadcast that front tree object
335: */
336: graph = Graph_new();
337: adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
338: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
339: nedges = IVL_tsize(adjIVL);
340: Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL);
341: if ( lu->options.msglvl > 2 ) {
342: int err;
343: err = PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
344: Graph_writeForHumanEye(graph, lu->options.msgFile);
345: fflush(lu->options.msgFile);
346: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
347: }
349: switch (lu->options.ordering) {
350: case 0:
351: lu->frontETree = orderViaBestOfNDandMS(graph,
352: lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
353: lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
354: case 1:
355: lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
356: case 2:
357: lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
358: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
359: case 3:
360: lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
361: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
362: default:
363: SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
364: }
366: Graph_free(graph);
367: if ( lu->options.msglvl > 2 ) {
368: int err;
369: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
370: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
371: err = fflush(lu->options.msgFile);
372: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
373: }
375: opcounts = DVinit(size, 0.0);
376: opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag);
377: MPI_Allgather((void*) &opcounts[rank], 1, MPI_DOUBLE,
378: (void*) opcounts, 1, MPI_DOUBLE, ((PetscObject)A)->comm);
379: minops = DVmin(size, opcounts, &root);
380: DVfree(opcounts);
381:
382: lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
383: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
384: if ( lu->options.msglvl > 2 ) {
385: int err;
386: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n best front tree");
387: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
388: err = fflush(lu->options.msgFile);
389: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
390: }
391:
392: /* get the permutations, permute the front tree, permute the matrix */
393: lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
394: lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);
396: ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);
398: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
399:
400: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
402: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
403: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
405: /* generate the owners map IV object and the map from vertices to owners */
406: cutoff = 1./(2*size);
407: cumopsDV = DV_new();
408: DV_init(cumopsDV, size, NULL);
409: lu->ownersIV = ETree_ddMap(lu->frontETree,
410: lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff);
411: DV_free(cumopsDV);
412: lu->vtxmapIV = IV_new();
413: IV_init(lu->vtxmapIV, M, NULL);
414: IVgather(M, IV_entries(lu->vtxmapIV),
415: IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree));
416: if ( lu->options.msglvl > 2 ) {
417: int err;
419: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from fronts to owning processes");
420: IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile);
421: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from vertices to owning processes");
422: IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile);
423: err = fflush(lu->options.msgFile);
424: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
425: }
427: /* redistribute the matrix */
428: lu->firsttag = 0 ;
429: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
430: lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
431: lu->firsttag += size ;
433: InpMtx_free(lu->mtxA);
434: lu->mtxA = newA ;
435: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
436: if ( lu->options.msglvl > 2 ) {
437: int err;
438: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
439: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
440: err = fflush(lu->options.msgFile);
441: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
442: }
443:
444: /* compute the symbolic factorization */
445: lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
446: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
447: lu->firsttag += lu->frontETree->nfront ;
448: if ( lu->options.msglvl > 2 ) {
449: int err;
450: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n local symbolic factorization");
451: IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
452: err = fflush(lu->options.msgFile);
453: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
454: }
456: lu->mtxmanager = SubMtxManager_new();
457: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
458: lu->frontmtx = FrontMtx_new();
460: } else { /* new num factorization using previously computed symbolic factor */
461: if (lu->options.pivotingflag) { /* different FrontMtx is required */
462: FrontMtx_free(lu->frontmtx);
463: lu->frontmtx = FrontMtx_new();
464: }
466: SubMtxManager_free(lu->mtxmanager);
467: lu->mtxmanager = SubMtxManager_new();
468: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
470: /* permute mtxA */
471: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
472: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
473:
474: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
475: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
477: /* redistribute the matrix */
478: MPI_Barrier(((PetscObject)A)->comm);
479: lu->firsttag = 0;
480: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
481: lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
482: lu->firsttag += size ;
484: InpMtx_free(lu->mtxA);
485: lu->mtxA = newA ;
486: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
487: if ( lu->options.msglvl > 2 ) {
488: int err;
489: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
490: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
491: err = fflush(lu->options.msgFile);
492: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
493: }
494: } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */
496: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
497: FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
498: lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);
500: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
501: if ( lu->options.patchAndGoFlag == 1 ) {
502: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
503: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
504: lu->options.storeids, lu->options.storevalues);
505: } else if ( lu->options.patchAndGoFlag == 2 ) {
506: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
507: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
508: lu->options.storeids, lu->options.storevalues);
509: }
510: }
512: /* numerical factorization */
513: chvmanager = ChvManager_new();
514: ChvManager_init(chvmanager, NO_LOCK, 0);
516: tagbound = maxTagMPI(lu->comm_spooles);
517: lasttag = lu->firsttag + 3*lu->frontETree->nfront + 2;
518: /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
519: if ( lasttag > tagbound ) {
520: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
521: lu->firsttag, lasttag, tagbound);
522: }
523: rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
524: chvmanager, lu->ownersIV, lookahead, &sierr, lu->cpus,
525: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
526: ChvManager_free(chvmanager);
527: lu->firsttag = lasttag;
528: if ( lu->options.msglvl > 2 ) {
529: int err;
530: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization");
531: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
532: err = fflush(lu->options.msgFile);
533: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
534: }
536: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
537: if ( lu->options.patchAndGoFlag == 1 ) {
538: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
539: if (lu->options.msglvl > 0 ){
540: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
541: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
542: }
543: }
544: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
545: } else if ( lu->options.patchAndGoFlag == 2 ) {
546: if (lu->options.msglvl > 0 ){
547: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
548: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
549: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
550: }
551: if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
552: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
553: DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
554: }
555: }
556: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
557: }
558: }
559: if ( sierr >= 0 ) SETERRQ2(PETSC_ERR_LIB,"\n proc %d : factorization error at front %d", rank, sierr);
560:
561: /* post-process the factorization and split
562: the factor matrices into submatrices */
563: lasttag = lu->firsttag + 5*size;
564: if ( lasttag > tagbound ) {
565: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
566: lu->firsttag, lasttag, tagbound);
567: }
568: FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
569: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
570: lu->firsttag += 5*size ;
571: if ( lu->options.msglvl > 2 ) {
572: int err;
573: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after post-processing");
574: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
575: err = fflush(lu->options.msgFile);
576: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
577: }
578:
579: /* create the solve map object */
580: lu->solvemap = SolveMap_new();
581: SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
582: FrontMtx_upperBlockIVL(lu->frontmtx),
583: FrontMtx_lowerBlockIVL(lu->frontmtx),
584: size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
585: lu->options.seed, lu->options.msglvl, lu->options.msgFile);
586: if ( lu->options.msglvl > 2 ) {
587: int err;
588: SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile);
589: err = fflush(lu->options.msgFile);
590: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
591: }
593: /* redistribute the submatrices of the factors */
594: FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
595: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
596: if ( lu->options.msglvl > 2 ) {
597: int err;
598: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after split");
599: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
600: err = fflush(lu->options.msgFile);
601: if (err) SETERRQ(PETSC_ERR_SYS,"fflush() failed on file");
602: }
604: /* create a solution DenseMtx object */
605: lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
606: lu->options.msglvl, lu->options.msgFile);
607: lu->nmycol = IV_size(lu->ownedColumnsIV);
608: if ( lu->nmycol > 0) {
609: DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol);
610: /* get pointers rowindX and entX */
611: DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
612: lu->entX = DenseMtx_entries(lu->mtxX);
613: } else { /* lu->nmycol == 0 */
614: lu->entX = 0;
615: lu->rowindX = 0;
616: }
618: if ( lu->scat ){
619: VecDestroy(lu->vec_spooles);
620: ISDestroy(lu->iden);
621: ISDestroy(lu->is_petsc);
622: VecScatterDestroy(lu->scat);
623: }
624: lu->scat = PETSC_NULL;
625: lu->flg = SAME_NONZERO_PATTERN;
627: lu->CleanUpSpooles = PETSC_TRUE;
628: return(0);
629: }
634: PetscErrorCode MatConvert_MPIAIJ_MPIAIJSpooles(Mat A,MatType type,MatReuse reuse,Mat *newmat)
635: {
637: Mat B=*newmat;
638: Mat_Spooles *lu;
641: PetscNewLog(B,Mat_Spooles,&lu);
642: if (reuse == MAT_INITIAL_MATRIX) {
643: MatDuplicate(A,MAT_COPY_VALUES,&B);
644: lu->MatDuplicate = B->ops->duplicate;
645: lu->MatLUFactorSymbolic = B->ops->lufactorsymbolic;
646: lu->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic;
647: lu->MatView = B->ops->view;
648: lu->MatAssemblyEnd = B->ops->assemblyend;
649: lu->MatDestroy = B->ops->destroy;
650: } else {
651: lu->MatDuplicate = A->ops->duplicate;
652: lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic;
653: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
654: lu->MatView = A->ops->view;
655: lu->MatAssemblyEnd = A->ops->assemblyend;
656: lu->MatDestroy = A->ops->destroy;
657: }
658: lu->basetype = MATMPIAIJ;
659: lu->CleanUpSpooles = PETSC_FALSE;
661: B->spptr = (void*)lu;
662: B->ops->duplicate = MatDuplicate_Spooles;
663: B->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJSpooles;
664: B->ops->view = MatView_Spooles;
665: B->ops->assemblyend = MatAssemblyEnd_MPIAIJSpooles;
666: B->ops->destroy = MatDestroy_MPIAIJSpooles;
668: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaijspooles_mpiaij_C",
669: "MatConvert_Spooles_Base",MatConvert_Spooles_Base);
670: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijspooles_C",
671: "MatConvert_MPIAIJ_MPIAIJSpooles",MatConvert_MPIAIJ_MPIAIJSpooles);
672: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJSPOOLES);
673: *newmat = B;
674: return(0);
675: }
678: /*MC
679: MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices
680: via the external package Spooles.
682: If MPIAIJSPOOLES is installed (see the manual for
683: instructions on how to declare the existence of external packages),
684: a matrix type can be constructed which invokes SPOOLES solvers.
685: After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES), then
686: optionally call MatMPIAIJSetPreallocation() etc DO NOT
687: call MatCreateMPIAIJ() directly or the preallocation information will be LOST!
689: This matrix inherits from MATMPIAIJ. As a result, MatMPIAIJSetPreallocation() is
690: supported for this matrix type. One can also call MatConvert() for an inplace conversion to or from
691: the MATMPIAIJ type without data copy AFTER the matrix values have been set.
693: Consult Spooles documentation for more information about the options database keys below.
695: Options Database Keys:
696: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
697: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
698: . -mat_spooles_seed <seed> - random number seed used for ordering
699: . -mat_spooles_msglvl <msglvl> - message output level
700: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
701: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
702: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
703: . -mat_spooles_maxsize <n> - maximum size of a supernode
704: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
705: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
706: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
707: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
708: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
709: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
710: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object
712: Level: beginner
714: .seealso: PCLU
715: M*/
720: PetscErrorCode MatCreate_MPIAIJSpooles(Mat A)
721: {
725: MatSetType(A,MATMPIAIJ);
726: /*
727: Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
728: MatConvert_SeqAIJ_SeqAIJSpooles(A_diag,MATSEQAIJSPOOLES,MAT_REUSE_MATRIX,&A_diag);
729: */
730: MatConvert_MPIAIJ_MPIAIJSpooles(A,MATMPIAIJSPOOLES,MAT_REUSE_MATRIX,&A);
731: return(0);
732: }