Actual source code: spooles.c

  1: /*$Id: spooles.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
  2: /* 
  3:    Provides an interface to the Spooles serial sparse solver
  4: */

 6:  #include src/mat/impls/aij/seq/aij.h
 7:  #include src/mat/impls/sbaij/seq/sbaij.h

  9: #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
 10:  #include src/mat/impls/aij/seq/spooles.h

 12: extern int MatDestroy_SeqAIJ(Mat);
 13: #undef __FUNCT__  
 15: int MatDestroy_SeqAIJ_Spooles(Mat A)
 16: {
 17:   Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
 18:   int         ierr;
 19: 
 21: 
 22:   FrontMtx_free(lu->frontmtx) ;
 23:   IV_free(lu->newToOldIV) ;
 24:   IV_free(lu->oldToNewIV) ;
 25:   InpMtx_free(lu->mtxA) ;
 26:   ETree_free(lu->frontETree) ;
 27:   IVL_free(lu->symbfacIVL) ;
 28:   SubMtxManager_free(lu->mtxmanager) ;
 29:   Graph_free(lu->graph);
 30: 
 31:   PetscFree(lu);
 32:   MatDestroy_SeqAIJ(A);

 34:   return(0);
 35: }

 37: #undef __FUNCT__  
 39: int MatSolve_SeqAIJ_Spooles(Mat A,Vec b,Vec x)
 40: {
 41:   Mat_Spooles      *lu = (Mat_Spooles*)A->spptr;
 42:   PetscScalar      *array;
 43:   DenseMtx         *mtxY, *mtxX ;
 44:   double           *entX;
 45:   int              ierr,irow,neqns=A->n,nrow=A->m,*iv;

 48: 
 49:   mtxY = DenseMtx_new() ;
 50:   DenseMtx_init(mtxY, SPOOLES_REAL, 0, 0, nrow, 1, 1, nrow) ; /* column major */
 51:   VecGetArray(b,&array);
 52:   if (lu->options.useQR) {   /* copy b to mtxY */
 53:     for ( irow = 0 ; irow < nrow; irow++ )
 54:       DenseMtx_setRealEntry(mtxY, irow, 0, *array++) ;
 55:   } else {                   /* copy permuted b to mtxY */
 56:     iv = IV_entries(lu->oldToNewIV);
 57:     for ( irow = 0 ; irow < nrow; irow++ )
 58:       DenseMtx_setRealEntry(mtxY, *iv++, 0, *array++) ;
 59:   }
 60:   VecRestoreArray(b,&array);

 62:   mtxX = DenseMtx_new() ;
 63:   DenseMtx_init(mtxX, SPOOLES_REAL, 0, 0, neqns, 1, 1, neqns) ;
 64:   if (lu->options.useQR) {
 65:     FrontMtx_QR_solve(lu->frontmtx, lu->mtxA, mtxX, mtxY, lu->mtxmanager,
 66:                   lu->cpus, lu->options.msglvl, lu->options.msgFile) ;
 67:   } else {
 68:     FrontMtx_solve(lu->frontmtx, mtxX, mtxY, lu->mtxmanager,
 69:                  lu->cpus, lu->options.msglvl, lu->options.msgFile) ;
 70:   }
 71:   if ( lu->options.msglvl > 2 ) {
 72:     fprintf(lu->options.msgFile, "nn right hand side matrix after permutation") ;
 73:     DenseMtx_writeForHumanEye(mtxY, lu->options.msgFile) ;
 74:     fprintf(lu->options.msgFile, "nn solution matrix in new ordering") ;
 75:     DenseMtx_writeForHumanEye(mtxX, lu->options.msgFile) ;
 76:     fflush(lu->options.msgFile) ;
 77:   }

 79:   /* permute solution into original ordering, then copy to x */
 80:   DenseMtx_permuteRows(mtxX, lu->newToOldIV);
 81:   VecGetArray(x,&array);
 82:   entX = DenseMtx_entries(mtxX);
 83:   DVcopy(neqns, array, entX);
 84:   VecRestoreArray(x,&array);
 85: 
 86:   /* free memory */
 87:   DenseMtx_free(mtxX) ;
 88:   DenseMtx_free(mtxY) ;
 89: 
 90:   return(0);
 91: }

 93: #undef __FUNCT__   
 95: int MatFactorNumeric_SeqAIJ_Spooles(Mat A,Mat *F)
 96: {
 97:   Mat_Spooles        *lu = (Mat_Spooles*)(*F)->spptr;
 98:   ChvManager         *chvmanager ;
 99:   Chv                *rootchv ;
100:   IVL                *adjIVL;
101:   int                ierr,nz,nrow=A->m,irow,nedges,neqns=A->n,
102:                      *ai,*aj,*ivec1, *ivec2, i;
103:   PetscScalar        *av;
104:   double             *dvec,cputotal,facops;
105: 
107:   /* copy A to Spooles' InpMtx object */
108:   if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
109:     Mat_SeqAIJ   *mat = (Mat_SeqAIJ*)A->data;
110:     ai=mat->i; aj=mat->j; av=mat->a;
111:     nz=mat->nz;
112:   } else {
113:     Mat_SeqSBAIJ *mat = (Mat_SeqSBAIJ*)A->data;
114:     ai=mat->i; aj=mat->j; av=mat->a;
115:     nz=mat->s_nz;
116:   }
117:   if (lu->flg == DIFFERENT_NONZERO_PATTERN) lu->mtxA = InpMtx_new() ;
118:   InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, nz, 0) ;
119:   ivec1 = InpMtx_ivec1(lu->mtxA);
120:   ivec2 = InpMtx_ivec2(lu->mtxA);
121:   dvec  = InpMtx_dvec(lu->mtxA);
122:   for (irow = 0; irow < nrow; irow++){
123:     for (i = ai[irow]; i<ai[irow+1]; i++) ivec1[i] = irow;
124:   }
125:   IVcopy(nz, ivec2, aj);
126:   DVcopy(nz, dvec, av);
127:   InpMtx_inputRealTriples(lu->mtxA, nz, ivec1, ivec2, dvec);
128:   InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;

130:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
131: 
132:     (*F)->ops->solve   = MatSolve_SeqAIJ_Spooles;
133:     (*F)->ops->destroy = MatDestroy_SeqAIJ_Spooles;
134:     (*F)->assembled    = PETSC_TRUE;
135: 
136:     SetSpoolesOptions(A, &lu->options);

138:     /*---------------------------------------------------
139:     find a low-fill ordering
140:          (1) create the Graph object
141:          (2) order the graph 
142:     -------------------------------------------------------*/
143:     if (lu->options.useQR){
144:       adjIVL = InpMtx_adjForATA(lu->mtxA) ;
145:     } else {
146:       adjIVL = InpMtx_fullAdjacency(lu->mtxA) ;
147:     }
148:     nedges = IVL_tsize(adjIVL) ;

150:     lu->graph = Graph_new() ;
151:     Graph_init2(lu->graph, 0, neqns, 0, nedges, neqns, nedges, adjIVL, NULL, NULL) ;
152:     if ( lu->options.msglvl > 2 ) {
153:       if (lu->options.useQR){
154:         fprintf(lu->options.msgFile, "nn graph of A^T A") ;
155:       } else {
156:         fprintf(lu->options.msgFile, "nn graph of the input matrix") ;
157:       }
158:       Graph_writeForHumanEye(lu->graph, lu->options.msgFile) ;
159:       fflush(lu->options.msgFile) ;
160:     }

162:     switch (lu->options.ordering) {
163:     case 0:
164:       lu->frontETree = orderViaBestOfNDandMS(lu->graph,
165:                      lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
166:                      lu->options.seed, lu->options.msglvl, lu->options.msgFile); break;
167:     case 1:
168:       lu->frontETree = orderViaMMD(lu->graph,lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
169:     case 2:
170:       lu->frontETree = orderViaMS(lu->graph, lu->options.maxdomainsize,
171:                      lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
172:     case 3:
173:       lu->frontETree = orderViaND(lu->graph, lu->options.maxdomainsize,
174:                      lu->options.seed,lu->options.msglvl,lu->options.msgFile); break;
175:     default:
176:       SETERRQ(1,"Unknown Spooles's ordering");
177:     }

179:     if ( lu->options.msglvl > 0 ) {
180:       fprintf(lu->options.msgFile, "nn front tree from ordering") ;
181:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile) ;
182:       fflush(lu->options.msgFile) ;
183:     }
184: 
185:     /* get the permutation, permute the front tree */
186:     lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree) ;
187:     lu->oldToNew   = IV_entries(lu->oldToNewIV) ;
188:     lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree) ;
189:     if (!lu->options.useQR) ETree_permuteVertices(lu->frontETree, lu->oldToNewIV) ;

191:     /* permute the matrix */
192:     if (lu->options.useQR){
193:       InpMtx_permute(lu->mtxA, NULL, lu->oldToNew) ;
194:     } else {
195:       InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew) ;
196:       if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
197:         InpMtx_mapToUpperTriangle(lu->mtxA) ;
198:       }
199:       InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS) ;
200:     }
201:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;

203:     /* get symbolic factorization */
204:     if (lu->options.useQR){
205:       lu->symbfacIVL = SymbFac_initFromGraph(lu->frontETree, lu->graph) ;
206:       IVL_overwrite(lu->symbfacIVL, lu->oldToNewIV) ;
207:       IVL_sortUp(lu->symbfacIVL) ;
208:       ETree_permuteVertices(lu->frontETree, lu->oldToNewIV) ;
209:     } else {
210:       lu->symbfacIVL = SymbFac_initFromInpMtx(lu->frontETree, lu->mtxA) ;
211:     }
212:     if ( lu->options.msglvl > 2 ) {
213:       fprintf(lu->options.msgFile, "nn old-to-new permutation vector") ;
214:       IV_writeForHumanEye(lu->oldToNewIV, lu->options.msgFile) ;
215:       fprintf(lu->options.msgFile, "nn new-to-old permutation vector") ;
216:       IV_writeForHumanEye(lu->newToOldIV, lu->options.msgFile) ;
217:       fprintf(lu->options.msgFile, "nn front tree after permutation") ;
218:       ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile) ;
219:       fprintf(lu->options.msgFile, "nn input matrix after permutation") ;
220:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
221:       fprintf(lu->options.msgFile, "nn symbolic factorization") ;
222:       IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile) ;
223:       fflush(lu->options.msgFile) ;
224:     }

226:     lu->frontmtx   = FrontMtx_new() ;
227:     lu->mtxmanager = SubMtxManager_new() ;
228:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0) ;

230:   } else { /* new num factorization using previously computed symbolic factor */

232:     if (lu->options.pivotingflag) { /* different FrontMtx is required */
233:       FrontMtx_free(lu->frontmtx) ;
234:       lu->frontmtx   = FrontMtx_new() ;
235:     } else {
236:       FrontMtx_clearData (lu->frontmtx);
237:     }

239:     SubMtxManager_free(lu->mtxmanager) ;
240:     lu->mtxmanager = SubMtxManager_new() ;
241:     SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0) ;

243:     /* permute mtxA */
244:     if (lu->options.useQR){
245:       InpMtx_permute(lu->mtxA, NULL, lu->oldToNew) ;
246:     } else {
247:       InpMtx_permute(lu->mtxA, lu->oldToNew, lu->oldToNew) ;
248:       if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
249:         InpMtx_mapToUpperTriangle(lu->mtxA) ;
250:       }
251:       InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS) ;
252:     }
253:     InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS) ;
254:     if ( lu->options.msglvl > 2 ) {
255:       fprintf(lu->options.msgFile, "nn input matrix after permutation") ;
256:       InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile) ;
257:     }
258:   } /* end of if( lu->flg == DIFFERENT_NONZERO_PATTERN) */
259: 
260:   if (lu->options.useQR){
261:     FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, SPOOLES_REAL,
262:                  SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS,
263:                  SPOOLES_NO_PIVOTING, NO_LOCK, 0, NULL,
264:                  lu->mtxmanager, lu->options.msglvl, lu->options.msgFile) ;
265:   } else {
266:     FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, SPOOLES_REAL, lu->options.symflag,
267:                 FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, 0, NULL,
268:                 lu->mtxmanager, lu->options.msglvl, lu->options.msgFile) ;
269:   }

271:   if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
272:     if ( lu->options.patchAndGoFlag == 1 ) {
273:       lu->frontmtx->patchinfo = PatchAndGoInfo_new() ;
274:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
275:                        lu->options.storeids, lu->options.storevalues) ;
276:     } else if ( lu->options.patchAndGoFlag == 2 ) {
277:       lu->frontmtx->patchinfo = PatchAndGoInfo_new() ;
278:       PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
279:                        lu->options.storeids, lu->options.storevalues) ;
280:     }
281:   }

283:   /* numerical factorization */
284:   chvmanager = ChvManager_new() ;
285:   ChvManager_init(chvmanager, NO_LOCK, 1) ;
286:   DVfill(10, lu->cpus, 0.0) ;
287:   if (lu->options.useQR){
288:     facops = 0.0 ;
289:     FrontMtx_QR_factor(lu->frontmtx, lu->mtxA, chvmanager,
290:                    lu->cpus, &facops, lu->options.msglvl, lu->options.msgFile) ;
291:     if ( lu->options.msglvl > 1 ) {
292:       fprintf(lu->options.msgFile, "nn factor matrix") ;
293:       fprintf(lu->options.msgFile, "n facops = %9.2f", facops) ;
294:     }
295:   } else {
296:     IVfill(20, lu->stats, 0) ;
297:     rootchv = FrontMtx_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, 0.0,
298:             chvmanager, &ierr, lu->cpus,lu->stats,lu->options.msglvl,lu->options.msgFile) ;
299:     if ( rootchv != NULL ) SETERRQ(1,"n matrix found to be singular");
300:     if ( ierr >= 0 ) SETERRQ1(1,"n error encountered at front %d", ierr);
301: 
302:     if(lu->options.FrontMtxInfo){
303:       PetscPrintf(PETSC_COMM_SELF,"n %8d pivots, %8d pivot tests, %8d delayed rows and columnsn",304:                lu->stats[0], lu->stats[1], lu->stats[2]);
305:       cputotal = lu->cpus[8] ;
306:       if ( cputotal > 0.0 ) {
307:         PetscPrintf(PETSC_COMM_SELF,
308:            "n                               cpus   cpus/totaltime"
309:            "n    initialize fronts       %8.3f %6.2f"
310:            "n    load original entries   %8.3f %6.2f"
311:            "n    update fronts           %8.3f %6.2f"
312:            "n    assemble postponed data %8.3f %6.2f"
313:            "n    factor fronts           %8.3f %6.2f"
314:            "n    extract postponed data  %8.3f %6.2f"
315:            "n    store factor entries    %8.3f %6.2f"
316:            "n    miscellaneous           %8.3f %6.2f"
317:            "n    total time              %8.3f n",
318:            lu->cpus[0], 100.*lu->cpus[0]/cputotal,
319:            lu->cpus[1], 100.*lu->cpus[1]/cputotal,
320:            lu->cpus[2], 100.*lu->cpus[2]/cputotal,
321:            lu->cpus[3], 100.*lu->cpus[3]/cputotal,
322:            lu->cpus[4], 100.*lu->cpus[4]/cputotal,
323:            lu->cpus[5], 100.*lu->cpus[5]/cputotal,
324:            lu->cpus[6], 100.*lu->cpus[6]/cputotal,
325:            lu->cpus[7], 100.*lu->cpus[7]/cputotal, cputotal) ;
326:       }
327:     }
328:   }
329:   ChvManager_free(chvmanager) ;

331:   if ( lu->options.msglvl > 0 ) {
332:     fprintf(lu->options.msgFile, "nn factor matrix") ;
333:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
334:     fflush(lu->options.msgFile) ;
335:   }

337:   if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
338:     if ( lu->options.patchAndGoFlag == 1 ) {
339:       if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
340:         if (lu->options.msglvl > 0 ){
341:           fprintf(lu->options.msgFile, "n small pivots found at these locations") ;
342:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile) ;
343:         }
344:       }
345:       PatchAndGoInfo_free(lu->frontmtx->patchinfo) ;
346:     } else if ( lu->options.patchAndGoFlag == 2 ) {
347:       if (lu->options.msglvl > 0 ){
348:         if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
349:           fprintf(lu->options.msgFile, "n small pivots found at these locations") ;
350:           IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile) ;
351:         }
352:         if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
353:           fprintf(lu->options.msgFile, "n perturbations") ;
354:           DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile) ;
355:         }
356:       }
357:       PatchAndGoInfo_free(lu->frontmtx->patchinfo) ;
358:     }
359:   }

361:   /* post-process the factorization */
362:   FrontMtx_postProcess(lu->frontmtx, lu->options.msglvl, lu->options.msgFile) ;
363:   if ( lu->options.msglvl > 2 ) {
364:     fprintf(lu->options.msgFile, "nn factor matrix after post-processing") ;
365:     FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile) ;
366:     fflush(lu->options.msgFile) ;
367:   }

369:   lu->flg = SAME_NONZERO_PATTERN;
370: 
371:   return(0);
372: }

374: #endif