Actual source code: dscpack.c

  1: /* 
  2:         Provides an interface to the DSCPACK (Domain-Separator Codes) sparse direct solver
  3: */

 5:  #include src/mat/impls/baij/seq/baij.h
 6:  #include src/mat/impls/baij/mpi/mpibaij.h

  9: #include "dscmain.h"

 12: typedef struct {
 13:   DSC_Solver        My_DSC_Solver;
 14:   PetscInt      num_local_strucs, *local_struc_old_num,
 15:                 num_local_cols, num_local_nonz,
 16:                 *global_struc_new_col_num,
 17:                 *global_struc_new_num, *global_struc_owner,
 18:                 dsc_id,bs,*local_cols_old_num,*replication;
 19:   PetscInt      order_code,scheme_code,factor_type, stat,
 20:                 LBLASLevel,DBLASLevel,max_mem_allowed;
 21:   MatStructure  flg;
 22:   IS            my_cols,iden,iden_dsc;
 23:   Vec           vec_dsc;
 24:   VecScatter    scat;
 25:   MPI_Comm      comm_dsc;

 27:   /* A few inheritance details */
 28:   PetscMPIInt    size;
 29:   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
 30:   PetscErrorCode (*MatView)(Mat,PetscViewer);
 31:   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
 32:   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
 33:   PetscErrorCode (*MatDestroy)(Mat);
 34:   PetscErrorCode (*MatPreallocate)(Mat,PetscInt,PetscInt,PetscInt*,PetscInt,PetscInt*);

 36:   /* Clean up flag for destructor */
 37:   PetscTruth CleanUpDSCPACK;
 38: } Mat_DSC;

 40: EXTERN PetscErrorCode MatDuplicate_DSCPACK(Mat,MatDuplicateOption,Mat*);
 42: EXTERN PetscErrorCode MatConvert_Base_DSCPACK(Mat,const MatType,Mat*);

 45: /* DSC function */
 48: void isort2(PetscInt size, PetscInt *list, PetscInt *idx_dsc) {
 49:   /* in increasing order */
 50:   /* idx_dsc will contain indices such that */
 51:   /* list can be accessed in sorted order */
 52:   PetscInt i, j, x, y;
 53: 
 54:   for (i=0; i<size; i++) idx_dsc[i] =i;

 56:   for (i=1; i<size; i++){
 57:     y= idx_dsc[i];
 58:     x=list[idx_dsc[i]];
 59:     for (j=i-1; ((j>=0) && (x<list[idx_dsc[j]])); j--)
 60:       idx_dsc[j+1]=idx_dsc[j];
 61:     idx_dsc[j+1]=y;
 62:   }
 63: }/*end isort2*/

 67: PetscErrorCode  BAIJtoMyANonz( PetscInt *AIndex, PetscInt *AStruct, PetscInt bs,
 68:                     RealNumberType *ANonz, PetscInt NumLocalStructs, 
 69:                     PetscInt NumLocalNonz,  PetscInt *GlobalStructNewColNum,                
 70:                     PetscInt *LocalStructOldNum,
 71:                     PetscInt *LocalStructLocalNum,
 72:                     RealNumberType **adr_MyANonz)
 73: /* 
 74:    Extract non-zero values of lower triangular part
 75:    of the permuted matrix that belong to this processor.

 77:    Only output parameter is adr_MyANonz -- is malloced and changed.
 78:    Rest are input parameters left unchanged.

 80:    When LocalStructLocalNum == PETSC_NULL,
 81:         AIndex, AStruct, and ANonz contain entire original matrix A 
 82:         in PETSc SeqBAIJ format,
 83:         otherwise,
 84:         AIndex, AStruct, and ANonz are indeces for the submatrix
 85:         of A whose colomns (in increasing order) belong to this processor.

 87:    Other variables supply information on ownership of columns
 88:    and the new numbering in a fill-reducing permutation

 90:    This information is used to setup lower half of A nonzeroes
 91:    for columns owned by this processor
 92:  */
 93: {
 95:   PetscInt            i, j, k, iold,inew, jj, kk, bs2=bs*bs,
 96:                  *idx, *NewColNum,
 97:                  MyANonz_last, max_struct=0, struct_size;
 98:   RealNumberType *MyANonz;


102:   /* loop: to find maximum number of subscripts over columns
103:      assigned to this processor */
104:   for (i=0; i <NumLocalStructs; i++) {
105:     /* for each struct i (local) assigned to this processor */
106:     if (LocalStructLocalNum){
107:       iold = LocalStructLocalNum[i];
108:     } else {
109:       iold = LocalStructOldNum[i];
110:     }
111: 
112:     struct_size = AIndex[iold+1] - AIndex[iold];
113:     if ( max_struct <= struct_size) max_struct = struct_size;
114:   }

116:   /* allocate tmp arrays large enough to hold densest struct */
117:   PetscMalloc((2*max_struct+1)*sizeof(PetscInt),&NewColNum);
118:   idx = NewColNum + max_struct;
119: 
120:   PetscMalloc(NumLocalNonz*sizeof(RealNumberType),&MyANonz);
121:   *adr_MyANonz = MyANonz;

123:   /* loop to set up nonzeroes in MyANonz */
124:   MyANonz_last = 0 ; /* points to first empty space in MyANonz */
125:   for (i=0; i <NumLocalStructs; i++) {

127:     /* for each struct i (local) assigned to this processor */
128:     if (LocalStructLocalNum){
129:       iold = LocalStructLocalNum[i];
130:     } else {
131:       iold = LocalStructOldNum[i];
132:     }

134:     struct_size = AIndex[iold+1] - AIndex[iold];
135:     for (k=0, j=AIndex[iold]; j<AIndex[iold+1]; j++){
136:       NewColNum[k] = GlobalStructNewColNum[AStruct[j]];
137:       k++;
138:     }
139:     isort2(struct_size, NewColNum, idx);
140: 
141:     kk = AIndex[iold]*bs2; /* points to 1st element of iold block col in ANonz */
142:     inew = GlobalStructNewColNum[LocalStructOldNum[i]];

144:     for (jj = 0; jj < bs; jj++) {
145:       for (j=0; j<struct_size; j++){
146:         for ( k = 0; k<bs; k++){
147:           if (NewColNum[idx[j]] + k >= inew)
148:             MyANonz[MyANonz_last++] = ANonz[kk + idx[j]*bs2 + k*bs + jj];
149:         }
150:       }
151:       inew++;
152:     }
153:   } /* end outer loop for i */

155:   PetscFree(NewColNum);
156:   if (MyANonz_last != NumLocalNonz) SETERRQ2(PETSC_ERR_PLIB,"MyANonz_last %d != NumLocalNonz %d\n",MyANonz_last, NumLocalNonz);
157:   return(0);
158: }

163: PetscErrorCode MatConvert_DSCPACK_Base(Mat A,const MatType type,Mat *newmat)
164: {
166:   Mat            B=*newmat;
167:   Mat_DSC        *lu=(Mat_DSC*)A->spptr;
168:   void           (*f)(void);

171:   if (B != A) {
172:     MatDuplicate(A,MAT_COPY_VALUES,&B);
173:   }
174:   /* Reset the original function pointers */
175:   B->ops->duplicate              = lu->MatDuplicate;
176:   B->ops->view                   = lu->MatView;
177:   B->ops->assemblyend            = lu->MatAssemblyEnd;
178:   B->ops->choleskyfactorsymbolic = lu->MatCholeskyFactorSymbolic;
179:   B->ops->destroy                = lu->MatDestroy;
180:   PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",&f);
181:   if (f) {
182:     PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","",(FCNVOID)lu->MatPreallocate);
183:   }
184:   PetscFree(lu);

186:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_dscpack_C","",PETSC_NULL);
187:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_dscpack_seqbaij_C","",PETSC_NULL);
188:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_dscpack_C","",PETSC_NULL);
189:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_dscpack_mpibaij_C","",PETSC_NULL);

191:   PetscObjectChangeTypeName((PetscObject)B,type);
192:   *newmat = B;

194:   return(0);
195: }

200: PetscErrorCode MatDestroy_DSCPACK(Mat A)
201: {
202:   Mat_DSC *lu=(Mat_DSC*)A->spptr;
204: 
206:   if (lu->CleanUpDSCPACK) {
207:     if (lu->dsc_id != -1) {
208:       if(lu->stat) DSC_DoStats(lu->My_DSC_Solver);
209:       DSC_FreeAll(lu->My_DSC_Solver);
210:       DSC_Close0(lu->My_DSC_Solver);
211: 
212:       PetscFree(lu->local_cols_old_num);
213:     }
214:     DSC_End(lu->My_DSC_Solver);
215: 
216:     MPI_Comm_free(&(lu->comm_dsc));
217:     ISDestroy(lu->my_cols);
218:     PetscFree(lu->replication);
219:     VecDestroy(lu->vec_dsc);
220:     ISDestroy(lu->iden_dsc);
221:     VecScatterDestroy(lu->scat);
222:     if (lu->size >1 && lu->iden) {ISDestroy(lu->iden);}
223:   }
224:   if (lu->size == 1) {
225:     MatConvert_DSCPACK_Base(A,MATSEQBAIJ,&A);
226:   } else {
227:     MatConvert_DSCPACK_Base(A,MATMPIBAIJ,&A);
228:   }
229:   (*A->ops->destroy)(A);
230:   return(0);
231: }

235: PetscErrorCode MatSolve_DSCPACK(Mat A,Vec b,Vec x) {
236:   Mat_DSC        *lu= (Mat_DSC*)A->spptr;
238:   RealNumberType *solution_vec,*rhs_vec;

241:   /* scatter b into seq vec_dsc */
242:   if ( !lu->scat ) {
243:     VecScatterCreate(b,lu->my_cols,lu->vec_dsc,lu->iden_dsc,&lu->scat);
244:   }
245:   VecScatterBegin(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
246:   VecScatterEnd(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);

248:   if (lu->dsc_id != -1){
249:     VecGetArray(lu->vec_dsc,&rhs_vec);
250:     DSC_InputRhsLocalVec(lu->My_DSC_Solver, rhs_vec, lu->num_local_cols);
251:     VecRestoreArray(lu->vec_dsc,&rhs_vec);
252: 
253:     DSC_Solve(lu->My_DSC_Solver);
254:     if (ierr !=  DSC_NO_ERROR) {
255:       DSC_ErrorDisplay(lu->My_DSC_Solver);
256:       SETERRQ(PETSC_ERR_LIB,"Error in calling DSC_Solve");
257:     }

259:     /* get the permuted local solution */
260:     VecGetArray(lu->vec_dsc,&solution_vec);
261:     DSC_GetLocalSolution(lu->My_DSC_Solver,solution_vec, lu->num_local_cols);
262:     VecRestoreArray(lu->vec_dsc,&solution_vec);

264:   } /* end of if (lu->dsc_id != -1) */

266:   /* put permuted local solution solution_vec into x in the original order */
267:   VecScatterBegin(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
268:   VecScatterEnd(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);

270:   return(0);
271: }

275: PetscErrorCode MatCholeskyFactorNumeric_DSCPACK(Mat A,Mat *F) {
276:   Mat_SeqBAIJ    *a_seq;
277:   Mat_DSC        *lu=(Mat_DSC*)(*F)->spptr;
278:   Mat            *tseq,A_seq=PETSC_NULL;
279:   RealNumberType *my_a_nonz;
281:   PetscMPIInt    size;
282:   PetscInt       M=A->M,Mbs=M/lu->bs,max_mem_estimate,max_single_malloc_blk,
283:                  number_of_procs,i,j,next,iold,*idx,*iidx=0,*itmp;
284:   IS             my_cols_sorted;
285: 
287:   MPI_Comm_size(A->comm,&size);
288:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
289:     /* convert A to A_seq */
290:     if (size > 1) {
291:       if (!lu->iden){
292:         ISCreateStride(PETSC_COMM_SELF,M,0,1,&lu->iden);
293:       }
294:       MatGetSubMatrices(A,1,&lu->iden,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
295:       A_seq = tseq[0];
296:       a_seq = (Mat_SeqBAIJ*)A_seq->data;
297:     } else {
298:       a_seq = (Mat_SeqBAIJ*)A->data;
299:     }
300: 
301:     PetscMalloc(Mbs*sizeof(PetscInt),&lu->replication);
302:     for (i=0; i<Mbs; i++) lu->replication[i] = lu->bs;

304:     number_of_procs = DSC_Analyze(Mbs, a_seq->i, a_seq->j, lu->replication);
305: 
306:     i = size;
307:     if ( number_of_procs < i ) i = number_of_procs;
308:     number_of_procs = 1;
309:     while ( i > 1 ){
310:       number_of_procs  *= 2; i /= 2;
311:     }

313:     /* DSC_Solver starts */
314:     DSC_Open0( lu->My_DSC_Solver, number_of_procs, &lu->dsc_id, lu->comm_dsc );

316:     if (lu->dsc_id != -1) {
317:       DSC_Order(lu->My_DSC_Solver,lu->order_code,Mbs,a_seq->i,a_seq->j,lu->replication,
318:                    &M,&lu->num_local_strucs,
319:                    &lu->num_local_cols, &lu->num_local_nonz,  &lu->global_struc_new_col_num,
320:                    &lu->global_struc_new_num, &lu->global_struc_owner,
321:                    &lu->local_struc_old_num);
322:       if (ierr !=  DSC_NO_ERROR) {
323:         DSC_ErrorDisplay(lu->My_DSC_Solver);
324:         SETERRQ(PETSC_ERR_LIB,"Error when use DSC_Order()");
325:       }

327:       DSC_SFactor(lu->My_DSC_Solver,&max_mem_estimate,&max_single_malloc_blk,
328:                      lu->max_mem_allowed, lu->LBLASLevel, lu->DBLASLevel);
329:       if (ierr !=  DSC_NO_ERROR) {
330:         DSC_ErrorDisplay(lu->My_DSC_Solver);
331:         SETERRQ(PETSC_ERR_LIB,"Error when use DSC_Order");
332:       }

334:       BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
335:                        lu->num_local_strucs, lu->num_local_nonz,
336:                        lu->global_struc_new_col_num,
337:                        lu->local_struc_old_num,
338:                        PETSC_NULL,
339:                        &my_a_nonz);
340:       if (ierr <0) {
341:           DSC_ErrorDisplay(lu->My_DSC_Solver);
342:           SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
343:       }

345:       /* get local_cols_old_num and IS my_cols to be used later */
346:       PetscMalloc(lu->num_local_cols*sizeof(PetscInt),&lu->local_cols_old_num);
347:       for (next = 0, i=0; i<lu->num_local_strucs; i++){
348:         iold = lu->bs*lu->local_struc_old_num[i];
349:         for (j=0; j<lu->bs; j++)
350:           lu->local_cols_old_num[next++] = iold++;
351:       }
352:       ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
353: 
354:     } else {    /* lu->dsc_id == -1 */
355:       lu->num_local_cols = 0;
356:       lu->local_cols_old_num = 0;
357:       ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
358:     }
359:     /* generate vec_dsc and iden_dsc to be used later */
360:     VecCreateSeq(PETSC_COMM_SELF,lu->num_local_cols,&lu->vec_dsc);
361:     ISCreateStride(PETSC_COMM_SELF,lu->num_local_cols,0,1,&lu->iden_dsc);
362:     lu->scat = PETSC_NULL;

364:     if ( size>1 ) {
365:       MatDestroyMatrices(1,&tseq);
366:     }
367:   } else { /* use previously computed symbolic factor */
368:     /* convert A to my A_seq */
369:     if (size > 1) {
370:       if (lu->dsc_id == -1) {
371:         itmp = 0;
372:       } else {
373:         PetscMalloc(2*lu->num_local_strucs*sizeof(PetscInt),&idx);
374:         iidx = idx + lu->num_local_strucs;
375:         PetscMalloc(lu->num_local_cols*sizeof(PetscInt),&itmp);
376: 
377:         isort2(lu->num_local_strucs, lu->local_struc_old_num, idx);
378:         for (next=0, i=0; i< lu->num_local_strucs; i++) {
379:           iold = lu->bs*lu->local_struc_old_num[idx[i]];
380:           for (j=0; j<lu->bs; j++){
381:             itmp[next++] = iold++; /* sorted local_cols_old_num */
382:           }
383:         }
384:         for (i=0; i< lu->num_local_strucs; i++) {
385:           iidx[idx[i]] = i;       /* inverse of idx */
386:         }
387:       } /* end of (lu->dsc_id == -1) */
388:       ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,itmp,&my_cols_sorted);
389:       MatGetSubMatrices(A,1,&my_cols_sorted,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
390:       ISDestroy(my_cols_sorted);
391:       A_seq = tseq[0];
392: 
393:       if (lu->dsc_id != -1) {
394:         DSC_ReFactorInitialize(lu->My_DSC_Solver);

396:         a_seq = (Mat_SeqBAIJ*)A_seq->data;
397:         BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
398:                        lu->num_local_strucs, lu->num_local_nonz,
399:                        lu->global_struc_new_col_num,
400:                        lu->local_struc_old_num,
401:                        iidx,
402:                        &my_a_nonz);
403:         if (ierr <0) {
404:           DSC_ErrorDisplay(lu->My_DSC_Solver);
405:           SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
406:         }
407:         PetscFree(idx);
408:         PetscFree(itmp);
409:       } /* end of if(lu->dsc_id != -1)  */
410:     } else { /* size == 1 */
411:       a_seq = (Mat_SeqBAIJ*)A->data;
412: 
413:       BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
414:                        lu->num_local_strucs, lu->num_local_nonz,
415:                        lu->global_struc_new_col_num,
416:                        lu->local_struc_old_num,
417:                        PETSC_NULL,
418:                        &my_a_nonz);
419:       if (ierr <0) {
420:         DSC_ErrorDisplay(lu->My_DSC_Solver);
421:         SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
422:       }
423:     }
424:     if ( size>1 ) {MatDestroyMatrices(1,&tseq); }
425:   }
426: 
427:   if (lu->dsc_id != -1) {
428:     DSC_NFactor(lu->My_DSC_Solver, lu->scheme_code, my_a_nonz, lu->factor_type, lu->LBLASLevel, lu->DBLASLevel);
429:     PetscFree(my_a_nonz);
430:   }
431: 
432:   (*F)->assembled = PETSC_TRUE;
433:   lu->flg         = SAME_NONZERO_PATTERN;

435:   return(0);
436: }

438: /* Note the Petsc permutation r is ignored */
441: PetscErrorCode MatCholeskyFactorSymbolic_DSCPACK(Mat A,IS r,MatFactorInfo *info,Mat *F) {
442:   Mat        B;
443:   Mat_DSC    *lu;
445:   PetscInt bs,indx;
446:   PetscTruth flg;
447:   const char *ftype[]={"LDLT","LLT"},*ltype[]={"LBLAS1","LBLAS2","LBLAS3"},*dtype[]={"DBLAS1","DBLAS2"};


451:   /* Create the factorization matrix F */
452:   MatGetBlockSize(A,&bs);
453:   MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);
454:   MatSetType(B,A->type_name);
455:   MatSeqBAIJSetPreallocation(B,bs,0,PETSC_NULL);
456:   MatMPIBAIJSetPreallocation(B,bs,0,PETSC_NULL,0,PETSC_NULL);
457: 
458:   lu = (Mat_DSC*)B->spptr;
459:   B->bs = bs;

461:   B->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_DSCPACK;
462:   B->ops->solve                  = MatSolve_DSCPACK;
463:   B->factor                      = FACTOR_CHOLESKY;

465:   /* Set the default input options */
466:   lu->order_code  = 2;
467:   lu->scheme_code = 1;
468:   lu->factor_type = 2;
469:   lu->stat        = 0; /* do not display stats */
470:   lu->LBLASLevel  = DSC_LBLAS3;
471:   lu->DBLASLevel  = DSC_DBLAS2;
472:   lu->max_mem_allowed = 256;
473:   MPI_Comm_dup(A->comm,&(lu->comm_dsc));
474:   /* Get the runtime input options */
475:   PetscOptionsBegin(A->comm,A->prefix,"DSCPACK Options","Mat");

477:   PetscOptionsInt("-mat_dscpack_order","order_code: \n\
478:          1 = ND, 2 = Hybrid with Minimum Degree, 3 = Hybrid with Minimum Deficiency", \
479:          "None",
480:          lu->order_code,&lu->order_code,PETSC_NULL);

482:   PetscOptionsInt("-mat_dscpack_scheme","scheme_code: \n\
483:          1 = standard factorization,  2 = factorization + selective inversion", \
484:          "None",
485:          lu->scheme_code,&lu->scheme_code,PETSC_NULL);
486: 
487:   PetscOptionsEList("-mat_dscpack_factor","factor_type","None",ftype,2,ftype[0],&indx,&flg);
488:   if (flg) {
489:     switch (indx) {
490:     case 0:
491:       lu->factor_type = DSC_LDLT;
492:       break;
493:     case 1:
494:       lu->factor_type = DSC_LLT;
495:       break;
496:     }
497:   }
498:   PetscOptionsInt("-mat_dscpack_MaxMemAllowed","in Mbytes","None",
499:          lu->max_mem_allowed,&lu->max_mem_allowed,PETSC_NULL);

501:   PetscOptionsInt("-mat_dscpack_stats","display stats: 0 = no display,  1 = display",
502:          "None", lu->stat,&lu->stat,PETSC_NULL);
503: 
504:   PetscOptionsEList("-mat_dscpack_LBLAS","BLAS level used in the local phase","None",ltype,3,ltype[2],&indx,&flg);
505:   if (flg) {
506:     switch (indx) {
507:     case 0:
508:       lu->LBLASLevel = DSC_LBLAS1;
509:       break;
510:     case 1:
511:       lu->LBLASLevel = DSC_LBLAS2;
512:       break;
513:     case 2:
514:       lu->LBLASLevel = DSC_LBLAS3;
515:       break;
516:     }
517:   }

519:   PetscOptionsEList("-mat_dscpack_DBLAS","BLAS level used in the distributed phase","None",dtype,2,dtype[1],&indx,&flg);
520:   if (flg) {
521:     switch (indx) {
522:     case 0:
523:       lu->DBLASLevel = DSC_DBLAS1;
524:       break;
525:     case 1:
526:       lu->DBLASLevel = DSC_DBLAS2;
527:       break;
528:     }
529:   }

531:   PetscOptionsEnd();
532: 
533:   lu->flg = DIFFERENT_NONZERO_PATTERN;

535:   lu->My_DSC_Solver = DSC_Begin();
536:   lu->CleanUpDSCPACK = PETSC_TRUE;
537:   *F = B;
538:   return(0);
539: }

543: PetscErrorCode MatAssemblyEnd_DSCPACK(Mat A,MatAssemblyType mode) {
545:   Mat_DSC *lu=(Mat_DSC*)A->spptr;

548:   (*lu->MatAssemblyEnd)(A,mode);
549:   lu->MatCholeskyFactorSymbolic  = A->ops->choleskyfactorsymbolic;
550:   A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_DSCPACK;
551:   return(0);
552: }

556: PetscErrorCode MatFactorInfo_DSCPACK(Mat A,PetscViewer viewer)
557: {
558:   Mat_DSC *lu=(Mat_DSC*)A->spptr;
560:   char    *s=0;
561: 
563:   PetscViewerASCIIPrintf(viewer,"DSCPACK run parameters:\n");

565:   switch (lu->order_code) {
566:   case 1: s = "ND"; break;
567:   case 2: s = "Hybrid with Minimum Degree"; break;
568:   case 3: s = "Hybrid with Minimum Deficiency"; break;
569:   }
570:   PetscViewerASCIIPrintf(viewer,"  order_code: %s \n",s);

572:   switch (lu->scheme_code) {
573:   case 1: s = "standard factorization"; break;
574:   case 2: s = "factorization + selective inversion"; break;
575:   }
576:   PetscViewerASCIIPrintf(viewer,"  scheme_code: %s \n",s);

578:   switch (lu->stat) {
579:   case 0: s = "NO"; break;
580:   case 1: s = "YES"; break;
581:   }
582:   PetscViewerASCIIPrintf(viewer,"  display stats: %s \n",s);
583: 
584:   if ( lu->factor_type == DSC_LLT) {
585:     s = "LLT";
586:   } else if ( lu->factor_type == DSC_LDLT){
587:     s = "LDLT";
588:   } else {
589:     SETERRQ(PETSC_ERR_PLIB,"Unknown factor type");
590:   }
591:   PetscViewerASCIIPrintf(viewer,"  factor type: %s \n",s);

593:   if ( lu->LBLASLevel == DSC_LBLAS1) {
594:     s = "BLAS1";
595:   } else if ( lu->LBLASLevel == DSC_LBLAS2){
596:     s = "BLAS2";
597:   } else if ( lu->LBLASLevel == DSC_LBLAS3){
598:     s = "BLAS3";
599:   } else {
600:     SETERRQ(PETSC_ERR_PLIB,"Unknown local phase BLAS level");
601:   }
602:   PetscViewerASCIIPrintf(viewer,"  local phase BLAS level: %s \n",s);
603: 
604:   if ( lu->DBLASLevel == DSC_DBLAS1) {
605:     s = "BLAS1";
606:   } else if ( lu->DBLASLevel == DSC_DBLAS2){
607:     s = "BLAS2";
608:   } else {
609:     SETERRQ(PETSC_ERR_PLIB,"Unknown distributed phase BLAS level");
610:   }
611:   PetscViewerASCIIPrintf(viewer,"  distributed phase BLAS level: %s \n",s);
612:   return(0);
613: }

617: PetscErrorCode MatView_DSCPACK(Mat A,PetscViewer viewer) {
618:   PetscErrorCode    ierr;
619:   PetscMPIInt       size;
620:   PetscTruth        iascii;
621:   PetscViewerFormat format;
622:   Mat_DSC           *lu=(Mat_DSC*)A->spptr;

625:   /* This convertion ugliness is because MatView for BAIJ types calls MatConvert to AIJ */
626:   size = lu->size;
627:   if (size==1) {
628:     MatConvert(A,MATSEQBAIJ,&A);
629:   } else {
630:     MatConvert(A,MATMPIBAIJ,&A);
631:   }

633:   MatView(A,viewer);

635:   MatConvert(A,MATDSCPACK,&A);

637:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
638:   if (iascii) {
639:     PetscViewerGetFormat(viewer,&format);
640:     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
641:       MatFactorInfo_DSCPACK(A,viewer);
642:     }
643:   }
644:   return(0);
645: }

650: PetscErrorCode MatMPIBAIJSetPreallocation_MPIDSCPACK(Mat  B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
651: {
652:   Mat     A;
653:   Mat_DSC *lu = (Mat_DSC*)B->spptr;

657:   /*
658:     After performing the MPIBAIJ Preallocation, we need to convert the local diagonal block matrix
659:     into DSCPACK type so that the block jacobi preconditioner (for example) can use DSCPACK.  I would
660:     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
661:     block size info so that PETSc can determine the local size properly.  The block size info is set
662:     in the preallocation routine.
663:   */
664:   (*lu->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
665:   A    = ((Mat_MPIBAIJ *)B->data)->A;
666:   MatConvert_Base_DSCPACK(A,MATDSCPACK,&A);
667:   return(0);
668: }

674: PetscErrorCode MatConvert_Base_DSCPACK(Mat A,const MatType type,Mat *newmat)
675: {
676:   /* This routine is only called to convert to MATDSCPACK */
677:   /* from MATSEQBAIJ if A has a single process communicator */
678:   /* or MATMPIBAIJ otherwise, so we will ignore 'MatType type'. */
680:   MPI_Comm       comm;
681:   Mat            B=*newmat;
682:   Mat_DSC        *lu;
683:   void           (*f)(void);

686:   if (B != A) {
687:     MatDuplicate(A,MAT_COPY_VALUES,&B);
688:   }

690:   PetscObjectGetComm((PetscObject)A,&comm);
691:   PetscNew(Mat_DSC,&lu);
692:   PetscMemzero(lu,sizeof(Mat_DSC));

694:   lu->MatDuplicate               = A->ops->duplicate;
695:   lu->MatView                    = A->ops->view;
696:   lu->MatAssemblyEnd             = A->ops->assemblyend;
697:   lu->MatCholeskyFactorSymbolic  = A->ops->choleskyfactorsymbolic;
698:   lu->MatDestroy                 = A->ops->destroy;
699:   lu->CleanUpDSCPACK             = PETSC_FALSE;
700:   lu->bs                         = A->bs;

702:   B->spptr                       = (void*)lu;
703:   B->ops->duplicate              = MatDuplicate_DSCPACK;
704:   B->ops->view                   = MatView_DSCPACK;
705:   B->ops->assemblyend            = MatAssemblyEnd_DSCPACK;
706:   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_DSCPACK;
707:   B->ops->destroy                = MatDestroy_DSCPACK;

709:   MPI_Comm_size(comm,&(lu->size));
710:   if (lu->size == 1) {
711:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_dscpack_C",
712:                                              "MatConvert_Base_DSCPACK",MatConvert_Base_DSCPACK);
713:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_dscpack_seqbaij_C",
714:                                              "MatConvert_DSCPACK_Base",MatConvert_DSCPACK_Base);
715:   } else {
716:       /* I really don't like needing to know the tag: MatMPIBAIJSetPreallocation_C */
717:     PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",&f);
718:     if (f) {
719:       lu->MatPreallocate = (PetscErrorCode (*)(Mat,PetscInt,PetscInt,PetscInt*,PetscInt,PetscInt*))f;
720:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
721:                                                "MatMPIBAIJSetPreallocation_MPIDSCPACK",
722:                                                MatMPIBAIJSetPreallocation_MPIDSCPACK);
723:     }
724:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_dscpack_C",
725:                                              "MatConvert_Base_DSCPACK",MatConvert_Base_DSCPACK);
726:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_dscpack_mpibaij_C",
727:                                              "MatConvert_DSCPACK_Base",MatConvert_DSCPACK_Base);
728:   }
729:   PetscObjectChangeTypeName((PetscObject)B,MATDSCPACK);
730:   *newmat = B;
731:   return(0);
732: }

737: PetscErrorCode MatDuplicate_DSCPACK(Mat A, MatDuplicateOption op, Mat *M) {
739:   Mat_DSC *lu=(Mat_DSC *)A->spptr;

742:   (*lu->MatDuplicate)(A,op,M);
743:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_DSC));
744:   return(0);
745: }

747: /*MC
748:   MATDSCPACK - MATDSCPACK = "dscpack" - A matrix type providing direct solvers (Cholesky) for sequential 
749:   or distributed matrices via the external package DSCPACK.

751:   If DSCPACK is installed (see the manual for
752:   instructions on how to declare the existence of external packages),
753:   a matrix type can be constructed which invokes DSCPACK solvers.
754:   After calling MatCreate(...,A), simply call MatSetType(A,MATDSCPACK).
755:   This matrix type is only supported for double precision real.

757:   This matrix inherits from MATSEQBAIJ if constructed with a single process communicator,
758:   and from MATMPIBAIJ otherwise.  As a result, for sequential matrices, MatSeqBAIJSetPreallocation is 
759:   supported, and similarly MatMPIBAIJSetPreallocation is supported for distributed matrices.  It is 
760:   recommended that you call both of the above preallocation routines for simplicity.  Also,
761:   MatConvert can be called to perform inplace conversion to and from MATSEQBAIJ or MATMPIBAIJ
762:   for sequential or distributed matrices respectively.

764:   Options Database Keys:
765: + -mat_type dscpack - sets the matrix type to dscpack during a call to MatSetFromOptions()
766: . -mat_dscpack_order <1,2,3> - DSCPACK ordering, 1:ND, 2:Hybrid with Minimum Degree, 3:Hybrid with Minimum Deficiency
767: . -mat_dscpack_scheme <1,2> - factorization scheme, 1:standard factorization,  2: factorization with selective inversion
768: . -mat_dscpack_factor <LLT,LDLT> - the type of factorization to be performed.
769: . -mat_dscpack_MaxMemAllowed <n> - the maximum memory to be used during factorization
770: . -mat_dscpack_stats <0,1> - display stats of the factorization and solves during MatDestroy(), 0: no display,  1: display
771: . -mat_dscpack_LBLAS <LBLAS1,LBLAS2,LBLAS3> - BLAS level used in the local phase
772: - -mat_dscpack_DBLAS <DBLAS1,DBLAS2> - BLAS level used in the distributed phase

774:    Level: beginner

776: .seealso: PCCHOLESKY
777: M*/

782: PetscErrorCode MatCreate_DSCPACK(Mat A)
783: {
785:   PetscMPIInt    size;

788:   /* Change type name before calling MatSetType to force proper construction of SeqBAIJ or MPIBAIJ */
789:   /*   and DSCPACK types */
790:   PetscObjectChangeTypeName((PetscObject)A,MATDSCPACK);
791:   MPI_Comm_size(A->comm,&size);
792:   if (size == 1) {
793:     MatSetType(A,MATSEQBAIJ);
794:   } else {
795:     MatSetType(A,MATMPIBAIJ);
796:   }
797:   MatConvert_Base_DSCPACK(A,MATDSCPACK,&A);
798:   return(0);
799: }