Actual source code: dscpack.c
1: /*$Id: dscpack.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2: /*
3: Provides an interface to the DSCPACK (Domain-Separator Codes) sparse direct solver
4: */
6: #include src/mat/impls/baij/seq/baij.h
7: #include src/mat/impls/baij/mpi/mpibaij.h
9: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
11: EXTERN_C_BEGIN
12: #include "dscmain.h"
13: EXTERN_C_END
15: #define MAX_MEM_ALLOWED 100
16: typedef struct {
17: DSC_Solver My_DSC_Solver;
18: int num_local_strucs, *local_struc_old_num,
19: num_local_cols, num_local_nonz,
20: *global_struc_new_col_num,
21: *global_struc_new_num, *global_struc_owner,
22: dsc_id,bs,*local_cols_old_num,*replication;
23: int order_code,scheme_code,factor_type, stat,
24: LBLASLevel, DBLASLevel;
25: MatStructure flg;
26: IS my_cols,iden,iden_dsc;
27: Vec vec_dsc;
28: VecScatter scat;
29: } Mat_MPIBAIJ_DSC;
31: extern int MatDestroy_MPIBAIJ(Mat);
32: extern int MatDestroy_SeqBAIJ(Mat);
34: /* DSC function */
35: void isort2(int size, int *list, int *index)
36: {
37: /* in increasing order */
38: /* index will contain indices such that */
39: /* list can be accessed in sorted order */
40: int i, j, x, y;
42: for (i=0; i<size; i++) index[i] =i;
44: for (i=1; i<size; i++){
45: y= index[i];
46: x=list[index[i]];
47: for (j=i-1; ((j>=0) && (x<list[index[j]])); j--)
48: index[j+1]=index[j];
49: index[j+1]=y;
50: }
51: }/*end isort2*/
53: int BAIJtoMyANonz( int *AIndex, int *AStruct, int bs,
54: RealNumberType *ANonz, int NumLocalStructs,
55: int NumLocalNonz, int *GlobalStructNewColNum,
56: int *LocalStructOldNum,
57: int *LocalStructLocalNum,
58: RealNumberType **adr_MyANonz)
59: /*
60: Extract non-zero values of lower triangular part
61: of the permuted matrix that belong to this processor.
63: Only output parameter is adr_MyANonz -- is malloced and changed.
64: Rest are input parameters left unchanged.
66: When LocalStructLocalNum == PETSC_NULL,
67: AIndex, AStruct, and ANonz contain entire original matrix A
68: in PETSc SeqBAIJ format,
69: otherwise,
70: AIndex, AStruct, and ANonz are indeces for the submatrix
71: of A whose colomns (in increasing order) belong to this processor.
73: Other variables supply information on ownership of columns
74: and the new numbering in a fill-reducing permutation
76: This information is used to setup lower half of A nonzeroes
77: for columns owned by this processor
78: */
79: {
80: int i, j, k, iold,inew, jj, kk,ierr, bs2=bs*bs,
81: *idx, *NewColNum,
82: MyANonz_last, max_struct=0, struct_size;
83: RealNumberType *MyANonz;
87: /* loop: to find maximum number of subscripts over columns
88: assigned to this processor */
89: for (i=0; i <NumLocalStructs; i++) {
90: /* for each struct i (local) assigned to this processor */
91: if (LocalStructLocalNum){
92: iold = LocalStructLocalNum[i];
93: } else {
94: iold = LocalStructOldNum[i];
95: }
96:
97: struct_size = AIndex[iold+1] - AIndex[iold];
98: if ( max_struct <= struct_size) max_struct = struct_size;
99: }
101: /* allocate tmp arrays large enough to hold densest struct */
102: PetscMalloc((2*max_struct+1)*sizeof(int),&NewColNum);
103: idx = NewColNum + max_struct;
104:
105: PetscMalloc(NumLocalNonz*sizeof(RealNumberType),&MyANonz);
106: *adr_MyANonz = MyANonz;
108: /* loop to set up nonzeroes in MyANonz */
109: MyANonz_last = 0 ; /* points to first empty space in MyANonz */
110: for (i=0; i <NumLocalStructs; i++) {
112: /* for each struct i (local) assigned to this processor */
113: if (LocalStructLocalNum){
114: iold = LocalStructLocalNum[i];
115: } else {
116: iold = LocalStructOldNum[i];
117: }
119: struct_size = AIndex[iold+1] - AIndex[iold];
120: for (k=0, j=AIndex[iold]; j<AIndex[iold+1]; j++){
121: NewColNum[k] = GlobalStructNewColNum[AStruct[j]];
122: k++;
123: }
124: isort2(struct_size, NewColNum, idx);
125:
126: kk = AIndex[iold]*bs2; /* points to 1st element of iold block col in ANonz */
127: inew = GlobalStructNewColNum[LocalStructOldNum[i]];
129: for (jj = 0; jj < bs; jj++) {
130: for (j=0; j<struct_size; j++){
131: for ( k = 0; k<bs; k++){
132: if (NewColNum[idx[j]] + k >= inew)
133: MyANonz[MyANonz_last++] = ANonz[kk + idx[j]*bs2 + k*bs + jj];
134: }
135: }
136: inew++;
137: }
138: } /* end outer loop for i */
140: PetscFree(NewColNum);
141: if (MyANonz_last != NumLocalNonz)
142: SETERRQ2(1,"MyANonz_last %d != NumLocalNonz %dn",MyANonz_last, NumLocalNonz);
143: return(0);
144: }
146: int MatDestroy_MPIBAIJ_DSCPACK(Mat A)
147: {
148: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)A->spptr;
149: int ierr, size;
150:
152: MPI_Comm_size(A->comm,&size);
154: if (lu->dsc_id != -1) {
155: if(lu->stat) DSC_DoStats(lu->My_DSC_Solver);
156: DSC_FreeAll(lu->My_DSC_Solver);
157: DSC_Close0(lu->My_DSC_Solver);
159: PetscFree(lu->local_cols_old_num);
160: }
161: DSC_End(lu->My_DSC_Solver);
162:
163: ISDestroy(lu->my_cols);
164: PetscFree(lu->replication);
165: VecDestroy(lu->vec_dsc);
166: ISDestroy(lu->iden_dsc);
167: VecScatterDestroy(lu->scat);
168:
169: if (size >1) ISDestroy(lu->iden);
170: PetscFree(lu);
172: if (size == 1){
173: MatDestroy_SeqBAIJ(A);
174: } else {
175: MatDestroy_MPIBAIJ(A);
176: }
177:
178: return(0);
179: }
181: int MatSolve_MPIBAIJ_DSCPACK(Mat A,Vec b,Vec x)
182: {
183: Mat_MPIBAIJ_DSC *lu= (Mat_MPIBAIJ_DSC*)A->spptr;
184: int ierr;
185: RealNumberType *solution_vec, *rhs_vec;
188: /* scatter b into seq vec_dsc */
189: if ( !lu->scat ) {
190: VecScatterCreate(b,lu->my_cols,lu->vec_dsc,lu->iden_dsc,&lu->scat);
191: }
192: VecScatterBegin(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
193: VecScatterEnd(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
195: if (lu->dsc_id != -1){
196: VecGetArray(lu->vec_dsc,&rhs_vec);
197: DSC_InputRhsLocalVec(lu->My_DSC_Solver, rhs_vec, lu->num_local_cols);
198: VecRestoreArray(lu->vec_dsc,&rhs_vec);
199:
200: DSC_Solve(lu->My_DSC_Solver);
201: if (ierr != DSC_NO_ERROR) {
202: DSC_ErrorDisplay(lu->My_DSC_Solver);
203: SETERRQ(1,"Error in calling DSC_Solve");
204: }
206: /* get the permuted local solution */
207: VecGetArray(lu->vec_dsc,&solution_vec);
208: DSC_GetLocalSolution(lu->My_DSC_Solver,solution_vec, lu->num_local_cols);
209: VecRestoreArray(lu->vec_dsc,&solution_vec);
211: } /* end of if (lu->dsc_id != -1) */
213: /* put permuted local solution solution_vec into x in the original order */
214: VecScatterBegin(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
215: VecScatterEnd(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
217: return(0);
218: }
220: int MatCholeskyFactorNumeric_MPIBAIJ_DSCPACK(Mat A,Mat *F)
221: {
222: Mat_SeqBAIJ *a_seq;
223: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)(*F)->spptr;
224: Mat *tseq,A_seq;
225: RealNumberType *my_a_nonz;
226: int ierr, M=A->M, Mbs=M/lu->bs, size,
227: max_mem_estimate, max_single_malloc_blk,
228: number_of_procs,i,j,next,iold,
229: *idx,*iidx,*itmp;
230: IS my_cols_sorted;
231:
233: MPI_Comm_size(A->comm,&size);
234:
235: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
237: /* convert A to A_seq */
238: if (size > 1) {
239: ISCreateStride(PETSC_COMM_SELF,M,0,1,&lu->iden);
240: MatGetSubMatrices(A,1,&lu->iden,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
241:
242: A_seq = *tseq;
243: PetscFree(tseq);
244: a_seq = (Mat_SeqBAIJ*)A_seq->data;
245: } else {
246: a_seq = (Mat_SeqBAIJ*)A->data;
247: }
248:
249: PetscMalloc(Mbs*sizeof(int),&lu->replication);
250: for (i=0; i<Mbs; i++) lu->replication[i] = lu->bs;
252: number_of_procs = DSC_Analyze(Mbs, a_seq->i, a_seq->j, lu->replication);
253:
254: i = size;
255: if ( number_of_procs < i ) i = number_of_procs;
256: number_of_procs = 1;
257: while ( i > 1 ){
258: number_of_procs *= 2; i /= 2;
259: }
261: /* DSC_Solver starts */
262: lu->My_DSC_Solver = DSC_Begin();
263: DSC_Open0( lu->My_DSC_Solver, number_of_procs, &lu->dsc_id, PETSC_COMM_WORLD );
265: if (lu->dsc_id != -1) {
266: DSC_Order(lu->My_DSC_Solver,lu->order_code,Mbs,a_seq->i,a_seq->j,lu->replication,
267: &M,&lu->num_local_strucs,
268: &lu->num_local_cols, &lu->num_local_nonz, &lu->global_struc_new_col_num,
269: &lu->global_struc_new_num, &lu->global_struc_owner,
270: &lu->local_struc_old_num);
271: if (ierr != DSC_NO_ERROR) {
272: DSC_ErrorDisplay(lu->My_DSC_Solver);
273: SETERRQ(1,"Error when use DSC_Order()");
274: }
276: DSC_SFactor(lu->My_DSC_Solver,&max_mem_estimate,&max_single_malloc_blk,
277: MAX_MEM_ALLOWED, lu->LBLASLevel, lu->DBLASLevel);
278: if (ierr != DSC_NO_ERROR) {
279: DSC_ErrorDisplay(lu->My_DSC_Solver);
280: SETERRQ(1,"Error when use DSC_Order");
281: }
283: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
284: lu->num_local_strucs, lu->num_local_nonz,
285: lu->global_struc_new_col_num,
286: lu->local_struc_old_num,
287: PETSC_NULL,
288: &my_a_nonz);
289: if (ierr <0) {
290: DSC_ErrorDisplay(lu->My_DSC_Solver);
291: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
292: }
294: /* get local_cols_old_num and IS my_cols to be used later */
295: PetscMalloc(lu->num_local_cols*sizeof(int),&lu->local_cols_old_num);
296: for (next = 0, i=0; i<lu->num_local_strucs; i++){
297: iold = lu->bs*lu->local_struc_old_num[i];
298: for (j=0; j<lu->bs; j++)
299: lu->local_cols_old_num[next++] = iold++;
300: }
301: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
302:
303: } else { /* lu->dsc_id == -1 */
304: lu->num_local_cols = 0;
305: lu->local_cols_old_num = 0;
306: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
307: }
308: /* generate vec_dsc and iden_dsc to be used later */
309: VecCreateSeq(PETSC_COMM_SELF,lu->num_local_cols,&lu->vec_dsc);
310: ISCreateStride(PETSC_COMM_SELF,lu->num_local_cols,0,1,&lu->iden_dsc);
311: lu->scat = PETSC_NULL;
313: if ( size>1 ) {MatDestroy(A_seq); }
315: } else { /* use previously computed symbolic factor */
316: /* convert A to my A_seq */
317: if (size > 1) {
318: if (lu->dsc_id == -1) {
319: itmp = 0;
320: } else {
321: PetscMalloc(2*lu->num_local_strucs*sizeof(int),&idx);
322: iidx = idx + lu->num_local_strucs;
323: PetscMalloc(lu->num_local_cols*sizeof(int),&itmp);
324:
325: isort2(lu->num_local_strucs, lu->local_struc_old_num, idx);
326: for (next=0, i=0; i< lu->num_local_strucs; i++) {
327: iold = lu->bs*lu->local_struc_old_num[idx[i]];
328: for (j=0; j<lu->bs; j++){
329: itmp[next++] = iold++; /* sorted local_cols_old_num */
330: }
331: }
332: for (i=0; i< lu->num_local_strucs; i++) {
333: iidx[idx[i]] = i; /* inverse of idx */
334: }
335: } /* end of (lu->dsc_id == -1) */
336: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,itmp,&my_cols_sorted);
337: MatGetSubMatrices(A,1,&my_cols_sorted,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
338: ISDestroy(my_cols_sorted);
339:
340: A_seq = *tseq;
341: PetscFree(tseq);
342:
343: if (lu->dsc_id != -1) {
344: DSC_ReFactorInitialize(lu->My_DSC_Solver);
346: a_seq = (Mat_SeqBAIJ*)A_seq->data;
347: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
348: lu->num_local_strucs, lu->num_local_nonz,
349: lu->global_struc_new_col_num,
350: lu->local_struc_old_num,
351: iidx,
352: &my_a_nonz);
353: if (ierr <0) {
354: DSC_ErrorDisplay(lu->My_DSC_Solver);
355: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
356: }
357:
358: PetscFree(idx);
359: PetscFree(itmp);
360: } /* end of if(lu->dsc_id != -1) */
361: } else { /* size == 1 */
362: a_seq = (Mat_SeqBAIJ*)A->data;
363:
364: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
365: lu->num_local_strucs, lu->num_local_nonz,
366: lu->global_struc_new_col_num,
367: lu->local_struc_old_num,
368: PETSC_NULL,
369: &my_a_nonz);
370: if (ierr <0) {
371: DSC_ErrorDisplay(lu->My_DSC_Solver);
372: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
373: }
374: }
375: if ( size>1 ) {MatDestroy(A_seq); }
376: }
377:
378: if (lu->dsc_id != -1) {
379: DSC_NFactor(lu->My_DSC_Solver, lu->scheme_code, my_a_nonz, lu->factor_type, lu->LBLASLevel, lu->DBLASLevel);
380: PetscFree(my_a_nonz);
381: }
382:
383: (*F)->assembled = PETSC_TRUE;
384: lu->flg = SAME_NONZERO_PATTERN;
386: return(0);
387: }
389: /* Note the Petsc permutation r is ignored */
390: int MatCholeskyFactorSymbolic_MPIBAIJ_DSCPACK(Mat A,IS r,PetscReal f,Mat *F)
391: {
392: Mat_MPIBAIJ_DSC *lu;
393: int ierr,M=A->M,size;
394: PetscTruth flg;
395: char buff[32], *ftype[] = {"LLT","LDLT"},
396: *ltype[] = {"LBLAS1","LBLAS2","LBLAS3"},
397: *dtype[] = {"DBLAS1","DBLAS2"};
400: PetscNew(Mat_MPIBAIJ_DSC,&lu);
402: /* Create the factorization matrix F */
403: MatGetBlockSize(A,&lu->bs);
404: MatCreateMPIBAIJ(A->comm,lu->bs,PETSC_DECIDE,PETSC_DECIDE,M,M,0,PETSC_NULL,0,PETSC_NULL,F);
405:
406: (*F)->spptr = (Mat_MPIBAIJ_DSC*)lu;
407: (*F)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIBAIJ_DSCPACK;
408: (*F)->ops->solve = MatSolve_MPIBAIJ_DSCPACK;
409: (*F)->ops->destroy = MatDestroy_MPIBAIJ_DSCPACK;
410: (*F)->factor = FACTOR_CHOLESKY;
412: /* Set the default input options */
413: lu->order_code = 2;
414: lu->scheme_code = 1;
415: lu->factor_type = 1;
416: lu->stat = 0; /* do not display stats */
417: lu->LBLASLevel = DSC_LBLAS3;
418: lu->DBLASLevel = DSC_DBLAS2;
420: /* Get the runtime input options */
421: PetscOptionsBegin(A->comm,A->prefix,"DSCPACK Options","Mat");
423: PetscOptionsInt("-mat_baij_dscpack_order","order_code: n
424: 1 = ND, 2 = Hybrid with Minimum Degree, 3 = Hybrid with Minimum Deficiency", 425: "None",
426: lu->order_code,&lu->order_code,PETSC_NULL);
428: PetscOptionsInt("-mat_baij_dscpack_scheme","scheme_code: n
429: 1 = standard factorization, 2 = factorization + selective inversion", 430: "None",
431: lu->scheme_code,&lu->scheme_code,PETSC_NULL);
432:
433: PetscOptionsEList("-mat_baij_dscpack_factor","factor_type","None",
434: ftype,2,ftype[0],buff,32,&flg);
435: while (flg) {
436: PetscStrcmp(buff,"LLT",&flg);
437: if (flg) {
438: lu->factor_type = DSC_LLT;
439: break;
440: }
441: PetscStrcmp(buff,"LDLT",&flg);
442: if (flg) {
443: lu->factor_type = DSC_LDLT;
444: break;
445: }
446: SETERRQ1(1,"Unknown factor type %s",buff);
447: }
449: PetscOptionsInt("-mat_baij_dscpack_stats","display stats: 0 = no display, 1 = display",
450: "None", lu->stat,&lu->stat,PETSC_NULL);
451:
452: PetscOptionsEList("-mat_baij_dscpack_LBLAS","BLAS level used in the local phase","None",
453: ltype,3,ltype[2],buff,32,&flg);
454: while (flg) {
455: PetscStrcmp(buff,"LBLAS1",&flg);
456: if (flg) {
457: lu->LBLASLevel = DSC_LBLAS1;
458: break;
459: }
460: PetscStrcmp(buff,"LBLAS2",&flg);
461: if (flg) {
462: lu->LBLASLevel = DSC_LBLAS2;
463: break;
464: }
465: PetscStrcmp(buff,"LBLAS3",&flg);
466: if (flg) {
467: lu->LBLASLevel = DSC_LBLAS3;
468: break;
469: }
470: SETERRQ1(1,"Unknown local phase BLAS level %s",buff);
471: }
473: PetscOptionsEList("-mat_baij_dscpack_DBLAS","BLAS level used in the distributed phase","None",
474: dtype,2,dtype[1],buff,32,&flg);
475: while (flg) {
476: PetscStrcmp(buff,"DBLAS1",&flg);
477: if (flg) {
478: lu->DBLASLevel = DSC_DBLAS1;
479: break;
480: }
481: PetscStrcmp(buff,"DBLAS2",&flg);
482: if (flg) {
483: lu->DBLASLevel = DSC_DBLAS2;
484: break;
485: }
486: SETERRQ1(1,"Unknown distributed phase BLAS level %s",buff);
487: }
489: PetscOptionsEnd();
490:
491: lu->flg = DIFFERENT_NONZERO_PATTERN;
492: return(0);
493: }
495: int MatUseDSCPACK_MPIBAIJ(Mat A)
496: {
498: A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIBAIJ_DSCPACK;
499: return(0);
500: }
502: int MatMPIBAIJFactorInfo_DSCPACK(Mat A,PetscViewer viewer)
503: {
504: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)A->spptr;
505: int ierr;
506: char *s;
507:
509: /* check if matrix is dscpack type */
510: if (A->ops->solve != MatSolve_MPIBAIJ_DSCPACK) return(0);
512: PetscViewerASCIIPrintf(viewer,"DSCPACK run parameters:n");
514: switch (lu->order_code) {
515: case 1: s = "ND"; break;
516: case 2: s = "Hybrid with Minimum Degree"; break;
517: case 3: s = "Hybrid with Minimum Deficiency"; break;
518: }
519: PetscViewerASCIIPrintf(viewer," order_code: %s n",s);
521: switch (lu->scheme_code) {
522: case 1: s = "standard factorization"; break;
523: case 2: s = "factorization + selective inversion"; break;
524: }
525: PetscViewerASCIIPrintf(viewer," scheme_code: %s n",s);
527: switch (lu->stat) {
528: case 0: s = "NO"; break;
529: case 1: s = "YES"; break;
530: }
531: PetscViewerASCIIPrintf(viewer," display stats: %s n",s);
532:
533: if ( lu->factor_type == DSC_LLT) {
534: s = "LLT";
535: } else if ( lu->factor_type == DSC_LDLT){
536: s = "LDLT";
537: } else {
538: SETERRQ(1,"Unknown factor type");
539: }
540: PetscViewerASCIIPrintf(viewer," factor type: %s n",s);
542: if ( lu->LBLASLevel == DSC_LBLAS1) {
543: s = "BLAS1";
544: } else if ( lu->LBLASLevel == DSC_LBLAS2){
545: s = "BLAS2";
546: } else if ( lu->LBLASLevel == DSC_LBLAS3){
547: s = "BLAS3";
548: } else {
549: SETERRQ(1,"Unknown local phase BLAS level");
550: }
551: PetscViewerASCIIPrintf(viewer," local phase BLAS level: %s n",s);
553: if ( lu->DBLASLevel == DSC_DBLAS1) {
554: s = "BLAS1";
555: } else if ( lu->DBLASLevel == DSC_DBLAS2){
556: s = "BLAS2";
557: } else {
558: SETERRQ(1,"Unknown distributed phase BLAS level");
559: }
560: PetscViewerASCIIPrintf(viewer," distributed phase BLAS level: %s n",s);
561: return(0);
562: }
564: #else
566: int MatUseDSCPACK_MPIBAIJ(Mat A)
567: {
569: return(0);
570: }
572: #endif