Actual source code: dscpack.c
1: /*
2: Provides an interface to the DSCPACK (Domain-Separator Codes) sparse direct solver
3: */
5: #include src/mat/impls/baij/seq/baij.h
6: #include src/mat/impls/baij/mpi/mpibaij.h
9: #include "dscmain.h"
12: typedef struct {
13: DSC_Solver My_DSC_Solver;
14: PetscInt num_local_strucs, *local_struc_old_num,
15: num_local_cols, num_local_nonz,
16: *global_struc_new_col_num,
17: *global_struc_new_num, *global_struc_owner,
18: dsc_id,bs,*local_cols_old_num,*replication;
19: PetscInt order_code,scheme_code,factor_type, stat,
20: LBLASLevel,DBLASLevel,max_mem_allowed;
21: MatStructure flg;
22: IS my_cols,iden,iden_dsc;
23: Vec vec_dsc;
24: VecScatter scat;
25: MPI_Comm comm_dsc;
27: /* A few inheritance details */
28: PetscMPIInt size;
29: PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
30: PetscErrorCode (*MatView)(Mat,PetscViewer);
31: PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
32: PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
33: PetscErrorCode (*MatDestroy)(Mat);
34: PetscErrorCode (*MatPreallocate)(Mat,PetscInt,PetscInt,PetscInt*,PetscInt,PetscInt*);
36: /* Clean up flag for destructor */
37: PetscTruth CleanUpDSCPACK;
38: } Mat_DSC;
40: EXTERN PetscErrorCode MatDuplicate_DSCPACK(Mat,MatDuplicateOption,Mat*);
42: EXTERN PetscErrorCode MatConvert_Base_DSCPACK(Mat,const MatType,Mat*);
45: /* DSC function */
48: void isort2(PetscInt size, PetscInt *list, PetscInt *idx_dsc) {
49: /* in increasing order */
50: /* idx_dsc will contain indices such that */
51: /* list can be accessed in sorted order */
52: PetscInt i, j, x, y;
53:
54: for (i=0; i<size; i++) idx_dsc[i] =i;
56: for (i=1; i<size; i++){
57: y= idx_dsc[i];
58: x=list[idx_dsc[i]];
59: for (j=i-1; ((j>=0) && (x<list[idx_dsc[j]])); j--)
60: idx_dsc[j+1]=idx_dsc[j];
61: idx_dsc[j+1]=y;
62: }
63: }/*end isort2*/
67: PetscErrorCode BAIJtoMyANonz( PetscInt *AIndex, PetscInt *AStruct, PetscInt bs,
68: RealNumberType *ANonz, PetscInt NumLocalStructs,
69: PetscInt NumLocalNonz, PetscInt *GlobalStructNewColNum,
70: PetscInt *LocalStructOldNum,
71: PetscInt *LocalStructLocalNum,
72: RealNumberType **adr_MyANonz)
73: /*
74: Extract non-zero values of lower triangular part
75: of the permuted matrix that belong to this processor.
77: Only output parameter is adr_MyANonz -- is malloced and changed.
78: Rest are input parameters left unchanged.
80: When LocalStructLocalNum == PETSC_NULL,
81: AIndex, AStruct, and ANonz contain entire original matrix A
82: in PETSc SeqBAIJ format,
83: otherwise,
84: AIndex, AStruct, and ANonz are indeces for the submatrix
85: of A whose colomns (in increasing order) belong to this processor.
87: Other variables supply information on ownership of columns
88: and the new numbering in a fill-reducing permutation
90: This information is used to setup lower half of A nonzeroes
91: for columns owned by this processor
92: */
93: {
95: PetscInt i, j, k, iold,inew, jj, kk, bs2=bs*bs,
96: *idx, *NewColNum,
97: MyANonz_last, max_struct=0, struct_size;
98: RealNumberType *MyANonz;
102: /* loop: to find maximum number of subscripts over columns
103: assigned to this processor */
104: for (i=0; i <NumLocalStructs; i++) {
105: /* for each struct i (local) assigned to this processor */
106: if (LocalStructLocalNum){
107: iold = LocalStructLocalNum[i];
108: } else {
109: iold = LocalStructOldNum[i];
110: }
111:
112: struct_size = AIndex[iold+1] - AIndex[iold];
113: if ( max_struct <= struct_size) max_struct = struct_size;
114: }
116: /* allocate tmp arrays large enough to hold densest struct */
117: PetscMalloc((2*max_struct+1)*sizeof(PetscInt),&NewColNum);
118: idx = NewColNum + max_struct;
119:
120: PetscMalloc(NumLocalNonz*sizeof(RealNumberType),&MyANonz);
121: *adr_MyANonz = MyANonz;
123: /* loop to set up nonzeroes in MyANonz */
124: MyANonz_last = 0 ; /* points to first empty space in MyANonz */
125: for (i=0; i <NumLocalStructs; i++) {
127: /* for each struct i (local) assigned to this processor */
128: if (LocalStructLocalNum){
129: iold = LocalStructLocalNum[i];
130: } else {
131: iold = LocalStructOldNum[i];
132: }
134: struct_size = AIndex[iold+1] - AIndex[iold];
135: for (k=0, j=AIndex[iold]; j<AIndex[iold+1]; j++){
136: NewColNum[k] = GlobalStructNewColNum[AStruct[j]];
137: k++;
138: }
139: isort2(struct_size, NewColNum, idx);
140:
141: kk = AIndex[iold]*bs2; /* points to 1st element of iold block col in ANonz */
142: inew = GlobalStructNewColNum[LocalStructOldNum[i]];
144: for (jj = 0; jj < bs; jj++) {
145: for (j=0; j<struct_size; j++){
146: for ( k = 0; k<bs; k++){
147: if (NewColNum[idx[j]] + k >= inew)
148: MyANonz[MyANonz_last++] = ANonz[kk + idx[j]*bs2 + k*bs + jj];
149: }
150: }
151: inew++;
152: }
153: } /* end outer loop for i */
155: PetscFree(NewColNum);
156: if (MyANonz_last != NumLocalNonz) SETERRQ2(PETSC_ERR_PLIB,"MyANonz_last %d != NumLocalNonz %d\n",MyANonz_last, NumLocalNonz);
157: return(0);
158: }
163: PetscErrorCode MatConvert_DSCPACK_Base(Mat A,const MatType type,Mat *newmat)
164: {
166: Mat B=*newmat;
167: Mat_DSC *lu=(Mat_DSC*)A->spptr;
168: void (*f)(void);
171: if (B != A) {
172: MatDuplicate(A,MAT_COPY_VALUES,&B);
173: }
174: /* Reset the original function pointers */
175: B->ops->duplicate = lu->MatDuplicate;
176: B->ops->view = lu->MatView;
177: B->ops->assemblyend = lu->MatAssemblyEnd;
178: B->ops->choleskyfactorsymbolic = lu->MatCholeskyFactorSymbolic;
179: B->ops->destroy = lu->MatDestroy;
180: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",&f);
181: if (f) {
182: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","",(FCNVOID)lu->MatPreallocate);
183: }
184: PetscFree(lu);
186: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_dscpack_C","",PETSC_NULL);
187: PetscObjectComposeFunction((PetscObject)B,"MatConvert_dscpack_seqbaij_C","",PETSC_NULL);
188: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_dscpack_C","",PETSC_NULL);
189: PetscObjectComposeFunction((PetscObject)B,"MatConvert_dscpack_mpibaij_C","",PETSC_NULL);
191: PetscObjectChangeTypeName((PetscObject)B,type);
192: *newmat = B;
194: return(0);
195: }
200: PetscErrorCode MatDestroy_DSCPACK(Mat A)
201: {
202: Mat_DSC *lu=(Mat_DSC*)A->spptr;
204:
206: if (lu->CleanUpDSCPACK) {
207: if (lu->dsc_id != -1) {
208: if(lu->stat) DSC_DoStats(lu->My_DSC_Solver);
209: DSC_FreeAll(lu->My_DSC_Solver);
210: DSC_Close0(lu->My_DSC_Solver);
211:
212: PetscFree(lu->local_cols_old_num);
213: }
214: DSC_End(lu->My_DSC_Solver);
215:
216: MPI_Comm_free(&(lu->comm_dsc));
217: ISDestroy(lu->my_cols);
218: PetscFree(lu->replication);
219: VecDestroy(lu->vec_dsc);
220: ISDestroy(lu->iden_dsc);
221: VecScatterDestroy(lu->scat);
222: if (lu->size >1 && lu->iden) {ISDestroy(lu->iden);}
223: }
224: if (lu->size == 1) {
225: MatConvert_DSCPACK_Base(A,MATSEQBAIJ,&A);
226: } else {
227: MatConvert_DSCPACK_Base(A,MATMPIBAIJ,&A);
228: }
229: (*A->ops->destroy)(A);
230: return(0);
231: }
235: PetscErrorCode MatSolve_DSCPACK(Mat A,Vec b,Vec x) {
236: Mat_DSC *lu= (Mat_DSC*)A->spptr;
238: RealNumberType *solution_vec,*rhs_vec;
241: /* scatter b into seq vec_dsc */
242: if ( !lu->scat ) {
243: VecScatterCreate(b,lu->my_cols,lu->vec_dsc,lu->iden_dsc,&lu->scat);
244: }
245: VecScatterBegin(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
246: VecScatterEnd(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
248: if (lu->dsc_id != -1){
249: VecGetArray(lu->vec_dsc,&rhs_vec);
250: DSC_InputRhsLocalVec(lu->My_DSC_Solver, rhs_vec, lu->num_local_cols);
251: VecRestoreArray(lu->vec_dsc,&rhs_vec);
252:
253: DSC_Solve(lu->My_DSC_Solver);
254: if (ierr != DSC_NO_ERROR) {
255: DSC_ErrorDisplay(lu->My_DSC_Solver);
256: SETERRQ(PETSC_ERR_LIB,"Error in calling DSC_Solve");
257: }
259: /* get the permuted local solution */
260: VecGetArray(lu->vec_dsc,&solution_vec);
261: DSC_GetLocalSolution(lu->My_DSC_Solver,solution_vec, lu->num_local_cols);
262: VecRestoreArray(lu->vec_dsc,&solution_vec);
264: } /* end of if (lu->dsc_id != -1) */
266: /* put permuted local solution solution_vec into x in the original order */
267: VecScatterBegin(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
268: VecScatterEnd(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
270: return(0);
271: }
275: PetscErrorCode MatCholeskyFactorNumeric_DSCPACK(Mat A,Mat *F) {
276: Mat_SeqBAIJ *a_seq;
277: Mat_DSC *lu=(Mat_DSC*)(*F)->spptr;
278: Mat *tseq,A_seq=PETSC_NULL;
279: RealNumberType *my_a_nonz;
281: PetscMPIInt size;
282: PetscInt M=A->M,Mbs=M/lu->bs,max_mem_estimate,max_single_malloc_blk,
283: number_of_procs,i,j,next,iold,*idx,*iidx=0,*itmp;
284: IS my_cols_sorted;
285:
287: MPI_Comm_size(A->comm,&size);
288: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
289: /* convert A to A_seq */
290: if (size > 1) {
291: if (!lu->iden){
292: ISCreateStride(PETSC_COMM_SELF,M,0,1,&lu->iden);
293: }
294: MatGetSubMatrices(A,1,&lu->iden,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
295: A_seq = tseq[0];
296: a_seq = (Mat_SeqBAIJ*)A_seq->data;
297: } else {
298: a_seq = (Mat_SeqBAIJ*)A->data;
299: }
300:
301: PetscMalloc(Mbs*sizeof(PetscInt),&lu->replication);
302: for (i=0; i<Mbs; i++) lu->replication[i] = lu->bs;
304: number_of_procs = DSC_Analyze(Mbs, a_seq->i, a_seq->j, lu->replication);
305:
306: i = size;
307: if ( number_of_procs < i ) i = number_of_procs;
308: number_of_procs = 1;
309: while ( i > 1 ){
310: number_of_procs *= 2; i /= 2;
311: }
313: /* DSC_Solver starts */
314: DSC_Open0( lu->My_DSC_Solver, number_of_procs, &lu->dsc_id, lu->comm_dsc );
316: if (lu->dsc_id != -1) {
317: DSC_Order(lu->My_DSC_Solver,lu->order_code,Mbs,a_seq->i,a_seq->j,lu->replication,
318: &M,&lu->num_local_strucs,
319: &lu->num_local_cols, &lu->num_local_nonz, &lu->global_struc_new_col_num,
320: &lu->global_struc_new_num, &lu->global_struc_owner,
321: &lu->local_struc_old_num);
322: if (ierr != DSC_NO_ERROR) {
323: DSC_ErrorDisplay(lu->My_DSC_Solver);
324: SETERRQ(PETSC_ERR_LIB,"Error when use DSC_Order()");
325: }
327: DSC_SFactor(lu->My_DSC_Solver,&max_mem_estimate,&max_single_malloc_blk,
328: lu->max_mem_allowed, lu->LBLASLevel, lu->DBLASLevel);
329: if (ierr != DSC_NO_ERROR) {
330: DSC_ErrorDisplay(lu->My_DSC_Solver);
331: SETERRQ(PETSC_ERR_LIB,"Error when use DSC_Order");
332: }
334: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
335: lu->num_local_strucs, lu->num_local_nonz,
336: lu->global_struc_new_col_num,
337: lu->local_struc_old_num,
338: PETSC_NULL,
339: &my_a_nonz);
340: if (ierr <0) {
341: DSC_ErrorDisplay(lu->My_DSC_Solver);
342: SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
343: }
345: /* get local_cols_old_num and IS my_cols to be used later */
346: PetscMalloc(lu->num_local_cols*sizeof(PetscInt),&lu->local_cols_old_num);
347: for (next = 0, i=0; i<lu->num_local_strucs; i++){
348: iold = lu->bs*lu->local_struc_old_num[i];
349: for (j=0; j<lu->bs; j++)
350: lu->local_cols_old_num[next++] = iold++;
351: }
352: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
353:
354: } else { /* lu->dsc_id == -1 */
355: lu->num_local_cols = 0;
356: lu->local_cols_old_num = 0;
357: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
358: }
359: /* generate vec_dsc and iden_dsc to be used later */
360: VecCreateSeq(PETSC_COMM_SELF,lu->num_local_cols,&lu->vec_dsc);
361: ISCreateStride(PETSC_COMM_SELF,lu->num_local_cols,0,1,&lu->iden_dsc);
362: lu->scat = PETSC_NULL;
364: if ( size>1 ) {
365: MatDestroyMatrices(1,&tseq);
366: }
367: } else { /* use previously computed symbolic factor */
368: /* convert A to my A_seq */
369: if (size > 1) {
370: if (lu->dsc_id == -1) {
371: itmp = 0;
372: } else {
373: PetscMalloc(2*lu->num_local_strucs*sizeof(PetscInt),&idx);
374: iidx = idx + lu->num_local_strucs;
375: PetscMalloc(lu->num_local_cols*sizeof(PetscInt),&itmp);
376:
377: isort2(lu->num_local_strucs, lu->local_struc_old_num, idx);
378: for (next=0, i=0; i< lu->num_local_strucs; i++) {
379: iold = lu->bs*lu->local_struc_old_num[idx[i]];
380: for (j=0; j<lu->bs; j++){
381: itmp[next++] = iold++; /* sorted local_cols_old_num */
382: }
383: }
384: for (i=0; i< lu->num_local_strucs; i++) {
385: iidx[idx[i]] = i; /* inverse of idx */
386: }
387: } /* end of (lu->dsc_id == -1) */
388: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,itmp,&my_cols_sorted);
389: MatGetSubMatrices(A,1,&my_cols_sorted,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
390: ISDestroy(my_cols_sorted);
391: A_seq = tseq[0];
392:
393: if (lu->dsc_id != -1) {
394: DSC_ReFactorInitialize(lu->My_DSC_Solver);
396: a_seq = (Mat_SeqBAIJ*)A_seq->data;
397: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
398: lu->num_local_strucs, lu->num_local_nonz,
399: lu->global_struc_new_col_num,
400: lu->local_struc_old_num,
401: iidx,
402: &my_a_nonz);
403: if (ierr <0) {
404: DSC_ErrorDisplay(lu->My_DSC_Solver);
405: SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
406: }
407: PetscFree(idx);
408: PetscFree(itmp);
409: } /* end of if(lu->dsc_id != -1) */
410: } else { /* size == 1 */
411: a_seq = (Mat_SeqBAIJ*)A->data;
412:
413: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
414: lu->num_local_strucs, lu->num_local_nonz,
415: lu->global_struc_new_col_num,
416: lu->local_struc_old_num,
417: PETSC_NULL,
418: &my_a_nonz);
419: if (ierr <0) {
420: DSC_ErrorDisplay(lu->My_DSC_Solver);
421: SETERRQ1(PETSC_ERR_LIB,"Error setting local nonzeroes at processor %d \n", lu->dsc_id);
422: }
423: }
424: if ( size>1 ) {MatDestroyMatrices(1,&tseq); }
425: }
426:
427: if (lu->dsc_id != -1) {
428: DSC_NFactor(lu->My_DSC_Solver, lu->scheme_code, my_a_nonz, lu->factor_type, lu->LBLASLevel, lu->DBLASLevel);
429: PetscFree(my_a_nonz);
430: }
431:
432: (*F)->assembled = PETSC_TRUE;
433: lu->flg = SAME_NONZERO_PATTERN;
435: return(0);
436: }
438: /* Note the Petsc permutation r is ignored */
441: PetscErrorCode MatCholeskyFactorSymbolic_DSCPACK(Mat A,IS r,MatFactorInfo *info,Mat *F) {
442: Mat B;
443: Mat_DSC *lu;
445: PetscInt bs,indx;
446: PetscTruth flg;
447: const char *ftype[]={"LDLT","LLT"},*ltype[]={"LBLAS1","LBLAS2","LBLAS3"},*dtype[]={"DBLAS1","DBLAS2"};
451: /* Create the factorization matrix F */
452: MatGetBlockSize(A,&bs);
453: MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);
454: MatSetType(B,A->type_name);
455: MatSeqBAIJSetPreallocation(B,bs,0,PETSC_NULL);
456: MatMPIBAIJSetPreallocation(B,bs,0,PETSC_NULL,0,PETSC_NULL);
457:
458: lu = (Mat_DSC*)B->spptr;
459: B->bs = bs;
461: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_DSCPACK;
462: B->ops->solve = MatSolve_DSCPACK;
463: B->factor = FACTOR_CHOLESKY;
465: /* Set the default input options */
466: lu->order_code = 2;
467: lu->scheme_code = 1;
468: lu->factor_type = 2;
469: lu->stat = 0; /* do not display stats */
470: lu->LBLASLevel = DSC_LBLAS3;
471: lu->DBLASLevel = DSC_DBLAS2;
472: lu->max_mem_allowed = 256;
473: MPI_Comm_dup(A->comm,&(lu->comm_dsc));
474: /* Get the runtime input options */
475: PetscOptionsBegin(A->comm,A->prefix,"DSCPACK Options","Mat");
477: PetscOptionsInt("-mat_dscpack_order","order_code: \n\
478: 1 = ND, 2 = Hybrid with Minimum Degree, 3 = Hybrid with Minimum Deficiency", \
479: "None",
480: lu->order_code,&lu->order_code,PETSC_NULL);
482: PetscOptionsInt("-mat_dscpack_scheme","scheme_code: \n\
483: 1 = standard factorization, 2 = factorization + selective inversion", \
484: "None",
485: lu->scheme_code,&lu->scheme_code,PETSC_NULL);
486:
487: PetscOptionsEList("-mat_dscpack_factor","factor_type","None",ftype,2,ftype[0],&indx,&flg);
488: if (flg) {
489: switch (indx) {
490: case 0:
491: lu->factor_type = DSC_LDLT;
492: break;
493: case 1:
494: lu->factor_type = DSC_LLT;
495: break;
496: }
497: }
498: PetscOptionsInt("-mat_dscpack_MaxMemAllowed","in Mbytes","None",
499: lu->max_mem_allowed,&lu->max_mem_allowed,PETSC_NULL);
501: PetscOptionsInt("-mat_dscpack_stats","display stats: 0 = no display, 1 = display",
502: "None", lu->stat,&lu->stat,PETSC_NULL);
503:
504: PetscOptionsEList("-mat_dscpack_LBLAS","BLAS level used in the local phase","None",ltype,3,ltype[2],&indx,&flg);
505: if (flg) {
506: switch (indx) {
507: case 0:
508: lu->LBLASLevel = DSC_LBLAS1;
509: break;
510: case 1:
511: lu->LBLASLevel = DSC_LBLAS2;
512: break;
513: case 2:
514: lu->LBLASLevel = DSC_LBLAS3;
515: break;
516: }
517: }
519: PetscOptionsEList("-mat_dscpack_DBLAS","BLAS level used in the distributed phase","None",dtype,2,dtype[1],&indx,&flg);
520: if (flg) {
521: switch (indx) {
522: case 0:
523: lu->DBLASLevel = DSC_DBLAS1;
524: break;
525: case 1:
526: lu->DBLASLevel = DSC_DBLAS2;
527: break;
528: }
529: }
531: PetscOptionsEnd();
532:
533: lu->flg = DIFFERENT_NONZERO_PATTERN;
535: lu->My_DSC_Solver = DSC_Begin();
536: lu->CleanUpDSCPACK = PETSC_TRUE;
537: *F = B;
538: return(0);
539: }
543: PetscErrorCode MatAssemblyEnd_DSCPACK(Mat A,MatAssemblyType mode) {
545: Mat_DSC *lu=(Mat_DSC*)A->spptr;
548: (*lu->MatAssemblyEnd)(A,mode);
549: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
550: A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_DSCPACK;
551: return(0);
552: }
556: PetscErrorCode MatFactorInfo_DSCPACK(Mat A,PetscViewer viewer)
557: {
558: Mat_DSC *lu=(Mat_DSC*)A->spptr;
560: char *s=0;
561:
563: PetscViewerASCIIPrintf(viewer,"DSCPACK run parameters:\n");
565: switch (lu->order_code) {
566: case 1: s = "ND"; break;
567: case 2: s = "Hybrid with Minimum Degree"; break;
568: case 3: s = "Hybrid with Minimum Deficiency"; break;
569: }
570: PetscViewerASCIIPrintf(viewer," order_code: %s \n",s);
572: switch (lu->scheme_code) {
573: case 1: s = "standard factorization"; break;
574: case 2: s = "factorization + selective inversion"; break;
575: }
576: PetscViewerASCIIPrintf(viewer," scheme_code: %s \n",s);
578: switch (lu->stat) {
579: case 0: s = "NO"; break;
580: case 1: s = "YES"; break;
581: }
582: PetscViewerASCIIPrintf(viewer," display stats: %s \n",s);
583:
584: if ( lu->factor_type == DSC_LLT) {
585: s = "LLT";
586: } else if ( lu->factor_type == DSC_LDLT){
587: s = "LDLT";
588: } else {
589: SETERRQ(PETSC_ERR_PLIB,"Unknown factor type");
590: }
591: PetscViewerASCIIPrintf(viewer," factor type: %s \n",s);
593: if ( lu->LBLASLevel == DSC_LBLAS1) {
594: s = "BLAS1";
595: } else if ( lu->LBLASLevel == DSC_LBLAS2){
596: s = "BLAS2";
597: } else if ( lu->LBLASLevel == DSC_LBLAS3){
598: s = "BLAS3";
599: } else {
600: SETERRQ(PETSC_ERR_PLIB,"Unknown local phase BLAS level");
601: }
602: PetscViewerASCIIPrintf(viewer," local phase BLAS level: %s \n",s);
603:
604: if ( lu->DBLASLevel == DSC_DBLAS1) {
605: s = "BLAS1";
606: } else if ( lu->DBLASLevel == DSC_DBLAS2){
607: s = "BLAS2";
608: } else {
609: SETERRQ(PETSC_ERR_PLIB,"Unknown distributed phase BLAS level");
610: }
611: PetscViewerASCIIPrintf(viewer," distributed phase BLAS level: %s \n",s);
612: return(0);
613: }
617: PetscErrorCode MatView_DSCPACK(Mat A,PetscViewer viewer) {
618: PetscErrorCode ierr;
619: PetscMPIInt size;
620: PetscTruth iascii;
621: PetscViewerFormat format;
622: Mat_DSC *lu=(Mat_DSC*)A->spptr;
625: /* This convertion ugliness is because MatView for BAIJ types calls MatConvert to AIJ */
626: size = lu->size;
627: if (size==1) {
628: MatConvert(A,MATSEQBAIJ,&A);
629: } else {
630: MatConvert(A,MATMPIBAIJ,&A);
631: }
633: MatView(A,viewer);
635: MatConvert(A,MATDSCPACK,&A);
637: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
638: if (iascii) {
639: PetscViewerGetFormat(viewer,&format);
640: if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
641: MatFactorInfo_DSCPACK(A,viewer);
642: }
643: }
644: return(0);
645: }
650: PetscErrorCode MatMPIBAIJSetPreallocation_MPIDSCPACK(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
651: {
652: Mat A;
653: Mat_DSC *lu = (Mat_DSC*)B->spptr;
657: /*
658: After performing the MPIBAIJ Preallocation, we need to convert the local diagonal block matrix
659: into DSCPACK type so that the block jacobi preconditioner (for example) can use DSCPACK. I would
660: like this to be done in the MatCreate routine, but the creation of this inner matrix requires
661: block size info so that PETSc can determine the local size properly. The block size info is set
662: in the preallocation routine.
663: */
664: (*lu->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
665: A = ((Mat_MPIBAIJ *)B->data)->A;
666: MatConvert_Base_DSCPACK(A,MATDSCPACK,&A);
667: return(0);
668: }
674: PetscErrorCode MatConvert_Base_DSCPACK(Mat A,const MatType type,Mat *newmat)
675: {
676: /* This routine is only called to convert to MATDSCPACK */
677: /* from MATSEQBAIJ if A has a single process communicator */
678: /* or MATMPIBAIJ otherwise, so we will ignore 'MatType type'. */
680: MPI_Comm comm;
681: Mat B=*newmat;
682: Mat_DSC *lu;
683: void (*f)(void);
686: if (B != A) {
687: MatDuplicate(A,MAT_COPY_VALUES,&B);
688: }
690: PetscObjectGetComm((PetscObject)A,&comm);
691: PetscNew(Mat_DSC,&lu);
692: PetscMemzero(lu,sizeof(Mat_DSC));
694: lu->MatDuplicate = A->ops->duplicate;
695: lu->MatView = A->ops->view;
696: lu->MatAssemblyEnd = A->ops->assemblyend;
697: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
698: lu->MatDestroy = A->ops->destroy;
699: lu->CleanUpDSCPACK = PETSC_FALSE;
700: lu->bs = A->bs;
702: B->spptr = (void*)lu;
703: B->ops->duplicate = MatDuplicate_DSCPACK;
704: B->ops->view = MatView_DSCPACK;
705: B->ops->assemblyend = MatAssemblyEnd_DSCPACK;
706: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_DSCPACK;
707: B->ops->destroy = MatDestroy_DSCPACK;
709: MPI_Comm_size(comm,&(lu->size));
710: if (lu->size == 1) {
711: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_dscpack_C",
712: "MatConvert_Base_DSCPACK",MatConvert_Base_DSCPACK);
713: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_dscpack_seqbaij_C",
714: "MatConvert_DSCPACK_Base",MatConvert_DSCPACK_Base);
715: } else {
716: /* I really don't like needing to know the tag: MatMPIBAIJSetPreallocation_C */
717: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",&f);
718: if (f) {
719: lu->MatPreallocate = (PetscErrorCode (*)(Mat,PetscInt,PetscInt,PetscInt*,PetscInt,PetscInt*))f;
720: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
721: "MatMPIBAIJSetPreallocation_MPIDSCPACK",
722: MatMPIBAIJSetPreallocation_MPIDSCPACK);
723: }
724: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_dscpack_C",
725: "MatConvert_Base_DSCPACK",MatConvert_Base_DSCPACK);
726: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_dscpack_mpibaij_C",
727: "MatConvert_DSCPACK_Base",MatConvert_DSCPACK_Base);
728: }
729: PetscObjectChangeTypeName((PetscObject)B,MATDSCPACK);
730: *newmat = B;
731: return(0);
732: }
737: PetscErrorCode MatDuplicate_DSCPACK(Mat A, MatDuplicateOption op, Mat *M) {
739: Mat_DSC *lu=(Mat_DSC *)A->spptr;
742: (*lu->MatDuplicate)(A,op,M);
743: PetscMemcpy((*M)->spptr,lu,sizeof(Mat_DSC));
744: return(0);
745: }
747: /*MC
748: MATDSCPACK - MATDSCPACK = "dscpack" - A matrix type providing direct solvers (Cholesky) for sequential
749: or distributed matrices via the external package DSCPACK.
751: If DSCPACK is installed (see the manual for
752: instructions on how to declare the existence of external packages),
753: a matrix type can be constructed which invokes DSCPACK solvers.
754: After calling MatCreate(...,A), simply call MatSetType(A,MATDSCPACK).
755: This matrix type is only supported for double precision real.
757: This matrix inherits from MATSEQBAIJ if constructed with a single process communicator,
758: and from MATMPIBAIJ otherwise. As a result, for sequential matrices, MatSeqBAIJSetPreallocation is
759: supported, and similarly MatMPIBAIJSetPreallocation is supported for distributed matrices. It is
760: recommended that you call both of the above preallocation routines for simplicity. Also,
761: MatConvert can be called to perform inplace conversion to and from MATSEQBAIJ or MATMPIBAIJ
762: for sequential or distributed matrices respectively.
764: Options Database Keys:
765: + -mat_type dscpack - sets the matrix type to dscpack during a call to MatSetFromOptions()
766: . -mat_dscpack_order <1,2,3> - DSCPACK ordering, 1:ND, 2:Hybrid with Minimum Degree, 3:Hybrid with Minimum Deficiency
767: . -mat_dscpack_scheme <1,2> - factorization scheme, 1:standard factorization, 2: factorization with selective inversion
768: . -mat_dscpack_factor <LLT,LDLT> - the type of factorization to be performed.
769: . -mat_dscpack_MaxMemAllowed <n> - the maximum memory to be used during factorization
770: . -mat_dscpack_stats <0,1> - display stats of the factorization and solves during MatDestroy(), 0: no display, 1: display
771: . -mat_dscpack_LBLAS <LBLAS1,LBLAS2,LBLAS3> - BLAS level used in the local phase
772: - -mat_dscpack_DBLAS <DBLAS1,DBLAS2> - BLAS level used in the distributed phase
774: Level: beginner
776: .seealso: PCCHOLESKY
777: M*/
782: PetscErrorCode MatCreate_DSCPACK(Mat A)
783: {
785: PetscMPIInt size;
788: /* Change type name before calling MatSetType to force proper construction of SeqBAIJ or MPIBAIJ */
789: /* and DSCPACK types */
790: PetscObjectChangeTypeName((PetscObject)A,MATDSCPACK);
791: MPI_Comm_size(A->comm,&size);
792: if (size == 1) {
793: MatSetType(A,MATSEQBAIJ);
794: } else {
795: MatSetType(A,MATMPIBAIJ);
796: }
797: MatConvert_Base_DSCPACK(A,MATDSCPACK,&A);
798: return(0);
799: }