Actual source code: spooles.c
1: /*$Id: spooles.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2: /*
3: Provides an interface to the Spooles serial sparse solver
4: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/mat/impls/sbaij/seq/sbaij.h
9: #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
10: #include src/mat/impls/aij/seq/spooles.h
12: extern int MatDestroy_SeqAIJ(Mat);
13: #undef __FUNCT__
15: int MatDestroy_SeqAIJ_Spooles(Mat A)
16: {
17: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
18: int ierr;
19:
21:
22: FrontMtx_free(lu->frontmtx) ;
23: IV_free(lu->newToOldIV) ;
24: IV_free(lu->oldToNewIV) ;
25: InpMtx_free(lu->mtxA) ;
26: ETree_free(lu->frontETree) ;
27: IVL_free(lu->symbfacIVL) ;
28: SubMtxManager_free(lu->mtxmanager) ;
29: Graph_free(lu->graph);
30:
31: PetscFree(lu);
32: MatDestroy_SeqAIJ(A);
34: return(0);
35: }
37: #undef __FUNCT__
39: int MatSolve_SeqAIJ_Spooles(Mat A,Vec b,Vec x)
40: {
41: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
42: PetscScalar *array;
43: DenseMtx *mtxY, *mtxX ;
44: double *entX;
45: int ierr,irow,neqns=A->n,nrow=A->m,*iv;
48:
49: mtxY = DenseMtx_new() ;
50: DenseMtx_init(mtxY, SPOOLES_REAL, 0, 0, nrow, 1, 1, nrow) ; /* column major */
51: VecGetArray(b,&array);
52: if (lu->options.useQR) { /* copy b to mtxY */
53: for ( irow = 0 ; irow < nrow; irow++ )
54: DenseMtx_setRealEntry(mtxY, irow, 0, *array++) ;
55: } else { /* copy permuted b to mtxY */
56: iv = IV_entries(lu->oldToNewIV);
57: for ( irow = 0 ; irow < nrow; irow++ )
58: DenseMtx_setRealEntry(mtxY, *iv++, 0, *array++) ;
59: }
60: VecRestoreArray(b,&array);
62: mtxX = DenseMtx_new() ;
63: DenseMtx_init(mtxX, SPOOLES_REAL, 0, 0, neqns, 1, 1, neqns) ;
64: if (lu->options.useQR) {
65: FrontMtx_QR_solve(lu->frontmtx, lu->mtxA, mtxX, mtxY, lu->mtxmanager,
66: lu->cpus, lu->options.msglvl, lu->options.msgFile) ;
67: } else {
68: FrontMtx_solve(lu->frontmtx, mtxX, mtxY, lu->mtxmanager,
69: lu->cpus, lu->options.msglvl, lu->options.msgFile) ;
70: }
71: if ( lu->options.msglvl > 2 ) {
72: fprintf(lu->options.msgFile, "nn right hand side matrix after permutation") ;
73: DenseMtx_writeForHumanEye(mtxY, lu->options.msgFile) ;
74: fprintf(lu->options.msgFile, "nn solution matrix in new ordering") ;
75: DenseMtx_writeForHumanEye(mtxX, lu->options.msgFile) ;
76: fflush(lu->options.msgFile) ;
77: }
79: /* permute solution into original ordering, then copy to x */
80: DenseMtx_permuteRows(mtxX, lu->newToOldIV);
81: VecGetArray(x,&array);
82: entX = DenseMtx_entries(mtxX);
83: DVcopy(neqns, array, entX);
84: VecRestoreArray(x,&array);
85:
86: /* free memory */
87: DenseMtx_free(mtxX) ;
88: DenseMtx_free(mtxY) ;
89:
90: return(0);
91: }
93: #undef __FUNCT__
95: int MatFactorNumeric_SeqAIJ_Spooles(Mat A,Mat *F)
96: {
97: Mat_Spooles *lu = (Mat_Spooles*)(*F)->spptr;
98: ChvManager *chvmanager ;
99: Chv *rootchv ;
100: IVL *adjIVL;
101: int ierr,nz,nrow=A->m,irow,nedges,neqns=A->n,
102: *ai,*aj,*ivec1, *ivec2, i;
103: PetscScalar *av;
104: double *dvec,cputotal,facops;
105:
107: /* copy A to Spooles' InpMtx object */
108: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
109: Mat_SeqAIJ *mat = (Mat_SeqAIJ*)A->data;
110: ai=mat->i; aj=mat->j; av=mat->a;
111: nz=mat->nz;
112: } else {
113: Mat_SeqSBAIJ *mat = (Mat_SeqSBAIJ*)A->data;
114: ai=mat->i; aj=mat->j; av=mat->a;
115: nz=mat->s_nz;
116: }
117: if (lu->flg == DIFFERENT_NONZERO_PATTERN) lu->mtxA = InpMtx_new() ;
118: InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, nz, 0) ;
119: ivec1 = InpMtx_ivec1(lu->mtxA);
120: ivec2 = InpMtx_ivec2(lu->mtxA);
121: dvec = InpMtx_dvec(lu->mtxA);
122: for (irow = 0; irow < nrow; irow++){
123: for (i = ai[irow]; i<ai[irow+1]; i++) ivec1[i] = irow;
124: }
125: IVcopy(nz, ivec2, aj);
126: DVcopy(nz, dvec, av);
127: InpMtx_inputRealTriples(lu->mtxA, nz, ivec1, ivec2, dvec);
128: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
130: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
131:
132: (*F)->ops->solve = MatSolve_SeqAIJ_Spooles;
133: (*F)->ops->destroy = MatDestroy_SeqAIJ_Spooles;
134: (*F)->assembled = PETSC_TRUE;
135:
136: SetSpoolesOptions(A, &lu->options);
138: /*---------------------------------------------------
139: find a low-fill ordering
140: (1) create the Graph object
141: (2) order the graph
142: -------------------------------------------------------*/
143: if (lu->options.useQR){
144: adjIVL = InpMtx_adjForATA(lu->mtxA) ;
145: } else {
146: adjIVL = InpMtx_fullAdjacency(lu->mtxA) ;
147: }
148: nedges = IVL_tsize(adjIVL) ;
150: lu->graph = Graph_new() ;
151: Graph_init2(lu->graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL) ;
152: if ( lu->options.msglvl > 2 ) {
153: if (lu->options.useQR){
154: fprintf(lu->options.msgFile, "nn graph of A^T A") ;
155: } else {
156: fprintf(lu->options.msgFile, "nn graph of the input matrix") ;
157: }
158: Graph_writeForHumanEye(lu->graph, lu->options.msgFile) ;
159: fflush(lu->options.msgFile) ;
160: }
162: switch (lu->options.ordering) {
163: case 0:
164: lu->frontETree = orderViaBestOfNDandMS(lu->graph,
165: lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
166: lu->options.seed, lu->options.msglvl, lu->options.msgFile); break;
167: case 1:
168: lu->frontETree = orderViaMMD(lu->graph,lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
169: case 2:
170: lu->frontETree = orderViaMS(lu->graph, lu->options.maxdomainsize,
171: lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
172: case 3:
173: lu->frontETree = orderViaND(lu->graph, lu->options.maxdomainsize,
174: lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
175: default:
176: SETERRQ(1,"Unknown Spooles's ordering");
177: }
179: if ( lu->options.msglvl > 0 ) {
180: fprintf(lu->options.msgFile, "nn front tree from ordering") ;
181: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile) ;
182: fflush(lu->options.msgFile) ;
183: }
184:
185: /* get the permutation, permute the front tree */
186: lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree) ;
187: lu->oldToNew = IV_entries(lu->oldToNewIV) ;
188: lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree) ;
189: if (!lu->options.useQR) ETree_permuteVertices(lu->frontETree, lu->oldToNewIV) ;
191: /* permute the matrix */
192: if (lu->options.useQR){
193: InpMtx_permute(lu->mtxA, NULL, lu->oldToNew) ;
194: } else {
195: InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew) ;
196: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
197: InpMtx_mapToUpperTriangle(lu->mtxA) ;
198: }
199: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS) ;
200: }
201: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
203: /* get symbolic factorization */
204: if (lu->options.useQR){
205: lu->symbfacIVL = SymbFac_initFromGraph(lu->frontETree, lu->graph) ;
206: IVL_overwrite(lu->symbfacIVL, lu->oldToNewIV) ;
207: IVL_sortUp(lu->symbfacIVL) ;
208: ETree_permuteVertices(lu->frontETree, lu->oldToNewIV) ;
209: } else {
210: lu->symbfacIVL = SymbFac_initFromInpMtx(lu->frontETree, lu->mtxA) ;
211: }
212: if ( lu->options.msglvl > 2 ) {
213: fprintf(lu->options.msgFile, "nn old-to-new permutation vector") ;
214: IV_writeForHumanEye(lu->oldToNewIV, lu->options.msgFile) ;
215: fprintf(lu->options.msgFile, "nn new-to-old permutation vector") ;
216: IV_writeForHumanEye(lu->newToOldIV, lu->options.msgFile) ;
217: fprintf(lu->options.msgFile, "nn front tree after permutation") ;
218: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile) ;
219: fprintf(lu->options.msgFile, "nn input matrix after permutation") ;
220: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
221: fprintf(lu->options.msgFile, "nn symbolic factorization") ;
222: IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile) ;
223: fflush(lu->options.msgFile) ;
224: }
226: lu->frontmtx = FrontMtx_new() ;
227: lu->mtxmanager = SubMtxManager_new() ;
228: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0) ;
230: } else { /* new num factorization using previously computed symbolic factor */
232: if (lu->options.pivotingflag) { /* different FrontMtx is required */
233: FrontMtx_free(lu->frontmtx) ;
234: lu->frontmtx = FrontMtx_new() ;
235: } else {
236: FrontMtx_clearData (lu->frontmtx);
237: }
239: SubMtxManager_free(lu->mtxmanager) ;
240: lu->mtxmanager = SubMtxManager_new() ;
241: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0) ;
243: /* permute mtxA */
244: if (lu->options.useQR){
245: InpMtx_permute(lu->mtxA, NULL, lu->oldToNew) ;
246: } else {
247: InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew) ;
248: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
249: InpMtx_mapToUpperTriangle(lu->mtxA) ;
250: }
251: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS) ;
252: }
253: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
254: if ( lu->options.msglvl > 2 ) {
255: fprintf(lu->options.msgFile, "nn input matrix after permutation") ;
256: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
257: }
258: } /* end of if( lu->flg == DIFFERENT_NONZERO_PATTERN) */
259:
260: if (lu->options.useQR){
261: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, SPOOLES_REAL,
262: SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS,
263: SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL,
264: lu->mtxmanager, lu->options.msglvl, lu->options.msgFile) ;
265: } else {
266: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, SPOOLES_REAL, lu->options.symflag,
267: FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, 0, NULL,
268: lu->mtxmanager, lu->options.msglvl, lu->options.msgFile) ;
269: }
271: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
272: if ( lu->options.patchAndGoFlag == 1 ) {
273: lu->frontmtx->patchinfo = PatchAndGoInfo_new() ;
274: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
275: lu->options.storeids, lu->options.storevalues) ;
276: } else if ( lu->options.patchAndGoFlag == 2 ) {
277: lu->frontmtx->patchinfo = PatchAndGoInfo_new() ;
278: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
279: lu->options.storeids, lu->options.storevalues) ;
280: }
281: }
283: /* numerical factorization */
284: chvmanager = ChvManager_new() ;
285: ChvManager_init(chvmanager, NO_LOCK, 1) ;
286: DVfill(10, lu->cpus, 0.0) ;
287: if (lu->options.useQR){
288: facops = 0.0 ;
289: FrontMtx_QR_factor(lu->frontmtx, lu->mtxA, chvmanager,
290: lu->cpus, &facops, lu->options.msglvl, lu->options.msgFile) ;
291: if ( lu->options.msglvl > 1 ) {
292: fprintf(lu->options.msgFile, "nn factor matrix") ;
293: fprintf(lu->options.msgFile, "n facops = %9.2f", facops) ;
294: }
295: } else {
296: IVfill(20, lu->stats, 0) ;
297: rootchv = FrontMtx_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, 0.0,
298: chvmanager, &ierr, lu->cpus,lu->stats,lu->options.msglvl,lu->options.msgFile) ;
299: if ( rootchv != NULL ) SETERRQ(1,"n matrix found to be singular");
300: if ( ierr >= 0 ) SETERRQ1(1,"n error encountered at front %d", ierr);
301:
302: if(lu->options.FrontMtxInfo){
303: PetscPrintf(PETSC_COMM_SELF,"n %8d pivots, %8d pivot tests, %8d delayed rows and columnsn",304: lu->stats[0], lu->stats[1], lu->stats[2]);
305: cputotal = lu->cpus[8] ;
306: if ( cputotal > 0.0 ) {
307: PetscPrintf(PETSC_COMM_SELF,
308: "n cpus cpus/totaltime"
309: "n initialize fronts %8.3f %6.2f"
310: "n load original entries %8.3f %6.2f"
311: "n update fronts %8.3f %6.2f"
312: "n assemble postponed data %8.3f %6.2f"
313: "n factor fronts %8.3f %6.2f"
314: "n extract postponed data %8.3f %6.2f"
315: "n store factor entries %8.3f %6.2f"
316: "n miscellaneous %8.3f %6.2f"
317: "n total time %8.3f n",
318: lu->cpus[0], 100.*lu->cpus[0]/cputotal,
319: lu->cpus[1], 100.*lu->cpus[1]/cputotal,
320: lu->cpus[2], 100.*lu->cpus[2]/cputotal,
321: lu->cpus[3], 100.*lu->cpus[3]/cputotal,
322: lu->cpus[4], 100.*lu->cpus[4]/cputotal,
323: lu->cpus[5], 100.*lu->cpus[5]/cputotal,
324: lu->cpus[6], 100.*lu->cpus[6]/cputotal,
325: lu->cpus[7], 100.*lu->cpus[7]/cputotal, cputotal) ;
326: }
327: }
328: }
329: ChvManager_free(chvmanager) ;
331: if ( lu->options.msglvl > 0 ) {
332: fprintf(lu->options.msgFile, "nn factor matrix") ;
333: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
334: fflush(lu->options.msgFile) ;
335: }
337: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
338: if ( lu->options.patchAndGoFlag == 1 ) {
339: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
340: if (lu->options.msglvl > 0 ){
341: fprintf(lu->options.msgFile, "n small pivots found at these locations") ;
342: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile) ;
343: }
344: }
345: PatchAndGoInfo_free(lu->frontmtx->patchinfo) ;
346: } else if ( lu->options.patchAndGoFlag == 2 ) {
347: if (lu->options.msglvl > 0 ){
348: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
349: fprintf(lu->options.msgFile, "n small pivots found at these locations") ;
350: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile) ;
351: }
352: if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
353: fprintf(lu->options.msgFile, "n perturbations") ;
354: DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile) ;
355: }
356: }
357: PatchAndGoInfo_free(lu->frontmtx->patchinfo) ;
358: }
359: }
361: /* post-process the factorization */
362: FrontMtx_postProcess(lu->frontmtx, lu->options.msglvl, lu->options.msgFile) ;
363: if ( lu->options.msglvl > 2 ) {
364: fprintf(lu->options.msgFile, "nn factor matrix after post-processing") ;
365: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
366: fflush(lu->options.msgFile) ;
367: }
369: lu->flg = SAME_NONZERO_PATTERN;
370:
371: return(0);
372: }
374: #endif