Actual source code: dscpack.c
1: /*$Id: dscpack.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2: /*
3: Provides an interface to the DSCPACK (Domain-Separator Codes) sparse direct solver
4: */
6: #include src/mat/impls/baij/seq/baij.h
7: #include src/mat/impls/baij/mpi/mpibaij.h
9: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
11: EXTERN_C_BEGIN
12: #include "dscmain.h"
13: EXTERN_C_END
15: typedef struct {
16: DSC_Solver My_DSC_Solver;
17: int num_local_strucs, *local_struc_old_num,
18: num_local_cols, num_local_nonz,
19: *global_struc_new_col_num,
20: *global_struc_new_num, *global_struc_owner,
21: dsc_id,bs,*local_cols_old_num,*replication;
22: int order_code,scheme_code,factor_type, stat,
23: LBLASLevel,DBLASLevel,max_mem_allowed;
24: MatStructure flg;
25: IS my_cols,iden,iden_dsc;
26: Vec vec_dsc;
27: VecScatter scat;
28: } Mat_MPIBAIJ_DSC;
30: extern int MatDestroy_MPIBAIJ(Mat);
31: extern int MatDestroy_SeqBAIJ(Mat);
33: /* DSC function */
34: #undef __FUNCT__
36: void isort2(int size, int *list, int *index)
37: {
38: /* in increasing order */
39: /* index will contain indices such that */
40: /* list can be accessed in sorted order */
41: int i, j, x, y;
43: for (i=0; i<size; i++) index[i] =i;
45: for (i=1; i<size; i++){
46: y= index[i];
47: x=list[index[i]];
48: for (j=i-1; ((j>=0) && (x<list[index[j]])); j--)
49: index[j+1]=index[j];
50: index[j+1]=y;
51: }
52: }/*end isort2*/
54: #undef __FUNCT__
56: int BAIJtoMyANonz( int *AIndex, int *AStruct, int bs,
57: RealNumberType *ANonz, int NumLocalStructs,
58: int NumLocalNonz, int *GlobalStructNewColNum,
59: int *LocalStructOldNum,
60: int *LocalStructLocalNum,
61: RealNumberType **adr_MyANonz)
62: /*
63: Extract non-zero values of lower triangular part
64: of the permuted matrix that belong to this processor.
66: Only output parameter is adr_MyANonz -- is malloced and changed.
67: Rest are input parameters left unchanged.
69: When LocalStructLocalNum == PETSC_NULL,
70: AIndex, AStruct, and ANonz contain entire original matrix A
71: in PETSc SeqBAIJ format,
72: otherwise,
73: AIndex, AStruct, and ANonz are indeces for the submatrix
74: of A whose colomns (in increasing order) belong to this processor.
76: Other variables supply information on ownership of columns
77: and the new numbering in a fill-reducing permutation
79: This information is used to setup lower half of A nonzeroes
80: for columns owned by this processor
81: */
82: {
83: int i, j, k, iold,inew, jj, kk,ierr, bs2=bs*bs,
84: *idx, *NewColNum,
85: MyANonz_last, max_struct=0, struct_size;
86: RealNumberType *MyANonz;
90: /* loop: to find maximum number of subscripts over columns
91: assigned to this processor */
92: for (i=0; i <NumLocalStructs; i++) {
93: /* for each struct i (local) assigned to this processor */
94: if (LocalStructLocalNum){
95: iold = LocalStructLocalNum[i];
96: } else {
97: iold = LocalStructOldNum[i];
98: }
99:
100: struct_size = AIndex[iold+1] - AIndex[iold];
101: if ( max_struct <= struct_size) max_struct = struct_size;
102: }
104: /* allocate tmp arrays large enough to hold densest struct */
105: PetscMalloc((2*max_struct+1)*sizeof(int),&NewColNum);
106: idx = NewColNum + max_struct;
107:
108: PetscMalloc(NumLocalNonz*sizeof(RealNumberType),&MyANonz);
109: *adr_MyANonz = MyANonz;
111: /* loop to set up nonzeroes in MyANonz */
112: MyANonz_last = 0 ; /* points to first empty space in MyANonz */
113: for (i=0; i <NumLocalStructs; i++) {
115: /* for each struct i (local) assigned to this processor */
116: if (LocalStructLocalNum){
117: iold = LocalStructLocalNum[i];
118: } else {
119: iold = LocalStructOldNum[i];
120: }
122: struct_size = AIndex[iold+1] - AIndex[iold];
123: for (k=0, j=AIndex[iold]; j<AIndex[iold+1]; j++){
124: NewColNum[k] = GlobalStructNewColNum[AStruct[j]];
125: k++;
126: }
127: isort2(struct_size, NewColNum, idx);
128:
129: kk = AIndex[iold]*bs2; /* points to 1st element of iold block col in ANonz */
130: inew = GlobalStructNewColNum[LocalStructOldNum[i]];
132: for (jj = 0; jj < bs; jj++) {
133: for (j=0; j<struct_size; j++){
134: for ( k = 0; k<bs; k++){
135: if (NewColNum[idx[j]] + k >= inew)
136: MyANonz[MyANonz_last++] = ANonz[kk + idx[j]*bs2 + k*bs + jj];
137: }
138: }
139: inew++;
140: }
141: } /* end outer loop for i */
143: PetscFree(NewColNum);
144: if (MyANonz_last != NumLocalNonz)
145: SETERRQ2(1,"MyANonz_last %d != NumLocalNonz %dn",MyANonz_last, NumLocalNonz);
146: return(0);
147: }
149: #undef __FUNCT__
151: int MatDestroy_MPIBAIJ_DSCPACK(Mat A)
152: {
153: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)A->spptr;
154: int ierr, size;
155:
157: MPI_Comm_size(A->comm,&size);
159: if (lu->dsc_id != -1) {
160: if(lu->stat) DSC_DoStats(lu->My_DSC_Solver);
161: DSC_FreeAll(lu->My_DSC_Solver);
162: DSC_Close0(lu->My_DSC_Solver);
164: PetscFree(lu->local_cols_old_num);
165: }
166: DSC_End(lu->My_DSC_Solver);
167:
168: ISDestroy(lu->my_cols);
169: PetscFree(lu->replication);
170: VecDestroy(lu->vec_dsc);
171: ISDestroy(lu->iden_dsc);
172: VecScatterDestroy(lu->scat);
173:
174: if (size >1) ISDestroy(lu->iden);
175: PetscFree(lu);
177: if (size == 1){
178: MatDestroy_SeqBAIJ(A);
179: } else {
180: MatDestroy_MPIBAIJ(A);
181: }
182:
183: return(0);
184: }
186: #undef __FUNCT__
188: int MatSolve_MPIBAIJ_DSCPACK(Mat A,Vec b,Vec x)
189: {
190: Mat_MPIBAIJ_DSC *lu= (Mat_MPIBAIJ_DSC*)A->spptr;
191: int ierr;
192: RealNumberType *solution_vec, *rhs_vec;
195: /* scatter b into seq vec_dsc */
196: if ( !lu->scat ) {
197: VecScatterCreate(b,lu->my_cols,lu->vec_dsc,lu->iden_dsc,&lu->scat);
198: }
199: VecScatterBegin(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
200: VecScatterEnd(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
202: if (lu->dsc_id != -1){
203: VecGetArray(lu->vec_dsc,&rhs_vec);
204: DSC_InputRhsLocalVec(lu->My_DSC_Solver, rhs_vec, lu->num_local_cols);
205: VecRestoreArray(lu->vec_dsc,&rhs_vec);
206:
207: DSC_Solve(lu->My_DSC_Solver);
208: if (ierr != DSC_NO_ERROR) {
209: DSC_ErrorDisplay(lu->My_DSC_Solver);
210: SETERRQ(1,"Error in calling DSC_Solve");
211: }
213: /* get the permuted local solution */
214: VecGetArray(lu->vec_dsc,&solution_vec);
215: DSC_GetLocalSolution(lu->My_DSC_Solver,solution_vec, lu->num_local_cols);
216: VecRestoreArray(lu->vec_dsc,&solution_vec);
218: } /* end of if (lu->dsc_id != -1) */
220: /* put permuted local solution solution_vec into x in the original order */
221: VecScatterBegin(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
222: VecScatterEnd(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
224: return(0);
225: }
227: #undef __FUNCT__
229: int MatCholeskyFactorNumeric_MPIBAIJ_DSCPACK(Mat A,Mat *F)
230: {
231: Mat_SeqBAIJ *a_seq;
232: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)(*F)->spptr;
233: Mat *tseq,A_seq;
234: RealNumberType *my_a_nonz;
235: int ierr, M=A->M, Mbs=M/lu->bs, size,
236: max_mem_estimate, max_single_malloc_blk,
237: number_of_procs,i,j,next,iold,
238: *idx,*iidx,*itmp;
239: IS my_cols_sorted;
240:
242: MPI_Comm_size(A->comm,&size);
243:
244: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
246: /* convert A to A_seq */
247: if (size > 1) {
248: ISCreateStride(PETSC_COMM_SELF,M,0,1,&lu->iden);
249: MatGetSubMatrices(A,1,&lu->iden,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
250:
251: A_seq = *tseq;
252: PetscFree(tseq);
253: a_seq = (Mat_SeqBAIJ*)A_seq->data;
254: } else {
255: a_seq = (Mat_SeqBAIJ*)A->data;
256: }
257:
258: PetscMalloc(Mbs*sizeof(int),&lu->replication);
259: for (i=0; i<Mbs; i++) lu->replication[i] = lu->bs;
261: number_of_procs = DSC_Analyze(Mbs, a_seq->i, a_seq->j, lu->replication);
262:
263: i = size;
264: if ( number_of_procs < i ) i = number_of_procs;
265: number_of_procs = 1;
266: while ( i > 1 ){
267: number_of_procs *= 2; i /= 2;
268: }
270: /* DSC_Solver starts */
271: lu->My_DSC_Solver = DSC_Begin();
272: DSC_Open0( lu->My_DSC_Solver, number_of_procs, &lu->dsc_id, PETSC_COMM_WORLD );
274: if (lu->dsc_id != -1) {
275: DSC_Order(lu->My_DSC_Solver,lu->order_code,Mbs,a_seq->i,a_seq->j,lu->replication,
276: &M,&lu->num_local_strucs,
277: &lu->num_local_cols, &lu->num_local_nonz, &lu->global_struc_new_col_num,
278: &lu->global_struc_new_num, &lu->global_struc_owner,
279: &lu->local_struc_old_num);
280: if (ierr != DSC_NO_ERROR) {
281: DSC_ErrorDisplay(lu->My_DSC_Solver);
282: SETERRQ(1,"Error when use DSC_Order()");
283: }
285: DSC_SFactor(lu->My_DSC_Solver,&max_mem_estimate,&max_single_malloc_blk,
286: lu->max_mem_allowed, lu->LBLASLevel, lu->DBLASLevel);
287: if (ierr != DSC_NO_ERROR) {
288: DSC_ErrorDisplay(lu->My_DSC_Solver);
289: SETERRQ(1,"Error when use DSC_Order");
290: }
292: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
293: lu->num_local_strucs, lu->num_local_nonz,
294: lu->global_struc_new_col_num,
295: lu->local_struc_old_num,
296: PETSC_NULL,
297: &my_a_nonz);
298: if (ierr <0) {
299: DSC_ErrorDisplay(lu->My_DSC_Solver);
300: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
301: }
303: /* get local_cols_old_num and IS my_cols to be used later */
304: PetscMalloc(lu->num_local_cols*sizeof(int),&lu->local_cols_old_num);
305: for (next = 0, i=0; i<lu->num_local_strucs; i++){
306: iold = lu->bs*lu->local_struc_old_num[i];
307: for (j=0; j<lu->bs; j++)
308: lu->local_cols_old_num[next++] = iold++;
309: }
310: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
311:
312: } else { /* lu->dsc_id == -1 */
313: lu->num_local_cols = 0;
314: lu->local_cols_old_num = 0;
315: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
316: }
317: /* generate vec_dsc and iden_dsc to be used later */
318: VecCreateSeq(PETSC_COMM_SELF,lu->num_local_cols,&lu->vec_dsc);
319: ISCreateStride(PETSC_COMM_SELF,lu->num_local_cols,0,1,&lu->iden_dsc);
320: lu->scat = PETSC_NULL;
322: if ( size>1 ) {MatDestroy(A_seq); }
324: } else { /* use previously computed symbolic factor */
325: /* convert A to my A_seq */
326: if (size > 1) {
327: if (lu->dsc_id == -1) {
328: itmp = 0;
329: } else {
330: PetscMalloc(2*lu->num_local_strucs*sizeof(int),&idx);
331: iidx = idx + lu->num_local_strucs;
332: PetscMalloc(lu->num_local_cols*sizeof(int),&itmp);
333:
334: isort2(lu->num_local_strucs, lu->local_struc_old_num, idx);
335: for (next=0, i=0; i< lu->num_local_strucs; i++) {
336: iold = lu->bs*lu->local_struc_old_num[idx[i]];
337: for (j=0; j<lu->bs; j++){
338: itmp[next++] = iold++; /* sorted local_cols_old_num */
339: }
340: }
341: for (i=0; i< lu->num_local_strucs; i++) {
342: iidx[idx[i]] = i; /* inverse of idx */
343: }
344: } /* end of (lu->dsc_id == -1) */
345: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,itmp,&my_cols_sorted);
346: MatGetSubMatrices(A,1,&my_cols_sorted,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
347: ISDestroy(my_cols_sorted);
348:
349: A_seq = *tseq;
350: PetscFree(tseq);
351:
352: if (lu->dsc_id != -1) {
353: DSC_ReFactorInitialize(lu->My_DSC_Solver);
355: a_seq = (Mat_SeqBAIJ*)A_seq->data;
356: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
357: lu->num_local_strucs, lu->num_local_nonz,
358: lu->global_struc_new_col_num,
359: lu->local_struc_old_num,
360: iidx,
361: &my_a_nonz);
362: if (ierr <0) {
363: DSC_ErrorDisplay(lu->My_DSC_Solver);
364: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
365: }
366:
367: PetscFree(idx);
368: PetscFree(itmp);
369: } /* end of if(lu->dsc_id != -1) */
370: } else { /* size == 1 */
371: a_seq = (Mat_SeqBAIJ*)A->data;
372:
373: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
374: lu->num_local_strucs, lu->num_local_nonz,
375: lu->global_struc_new_col_num,
376: lu->local_struc_old_num,
377: PETSC_NULL,
378: &my_a_nonz);
379: if (ierr <0) {
380: DSC_ErrorDisplay(lu->My_DSC_Solver);
381: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
382: }
383: }
384: if ( size>1 ) {MatDestroy(A_seq); }
385: }
386:
387: if (lu->dsc_id != -1) {
388: DSC_NFactor(lu->My_DSC_Solver, lu->scheme_code, my_a_nonz, lu->factor_type, lu->LBLASLevel, lu->DBLASLevel);
389: PetscFree(my_a_nonz);
390: }
391:
392: (*F)->assembled = PETSC_TRUE;
393: lu->flg = SAME_NONZERO_PATTERN;
395: return(0);
396: }
398: /* Note the Petsc permutation r is ignored */
399: #undef __FUNCT__
401: int MatCholeskyFactorSymbolic_MPIBAIJ_DSCPACK(Mat A,IS r,PetscReal f,Mat *F)
402: {
403: Mat_MPIBAIJ_DSC *lu;
404: int ierr,size;
405: PetscTruth flg;
406: char buff[32], *ftype[] = {"LLT","LDLT"},
407: *ltype[] = {"LBLAS1","LBLAS2","LBLAS3"},
408: *dtype[] = {"DBLAS1","DBLAS2"};
411: PetscNew(Mat_MPIBAIJ_DSC,&lu);
413: /* Create the factorization matrix F */
414: MatGetBlockSize(A,&lu->bs);
415: MatCreateMPIBAIJ(A->comm,lu->bs,A->m,A->n,A->M,A->N,0,PETSC_NULL,0,PETSC_NULL,F);
416:
417: (*F)->spptr = (Mat_MPIBAIJ_DSC*)lu;
418: (*F)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIBAIJ_DSCPACK;
419: (*F)->ops->solve = MatSolve_MPIBAIJ_DSCPACK;
420: (*F)->ops->destroy = MatDestroy_MPIBAIJ_DSCPACK;
421: (*F)->factor = FACTOR_CHOLESKY;
423: /* Set the default input options */
424: lu->order_code = 2;
425: lu->scheme_code = 1;
426: lu->factor_type = 1;
427: lu->stat = 0; /* do not display stats */
428: lu->LBLASLevel = DSC_LBLAS3;
429: lu->DBLASLevel = DSC_DBLAS2;
430: lu->max_mem_allowed = 256;
432: /* Get the runtime input options */
433: PetscOptionsBegin(A->comm,A->prefix,"DSCPACK Options","Mat");
435: PetscOptionsInt("-mat_dscpack_order","order_code: n
436: 1 = ND, 2 = Hybrid with Minimum Degree, 3 = Hybrid with Minimum Deficiency", 437: "None",
438: lu->order_code,&lu->order_code,PETSC_NULL);
440: PetscOptionsInt("-mat_dscpack_scheme","scheme_code: n
441: 1 = standard factorization, 2 = factorization + selective inversion", 442: "None",
443: lu->scheme_code,&lu->scheme_code,PETSC_NULL);
444:
445: PetscOptionsEList("-mat_dscpack_factor","factor_type","None",
446: ftype,2,ftype[0],buff,32,&flg);
447: while (flg) {
448: PetscStrcmp(buff,"LLT",&flg);
449: if (flg) {
450: lu->factor_type = DSC_LLT;
451: break;
452: }
453: PetscStrcmp(buff,"LDLT",&flg);
454: if (flg) {
455: lu->factor_type = DSC_LDLT;
456: break;
457: }
458: SETERRQ1(1,"Unknown factor type %s",buff);
459: }
460: PetscOptionsInt("-mat_dscpack_MaxMemAllowed","", 461: "None",
462: lu->max_mem_allowed,&lu->max_mem_allowed,PETSC_NULL);
464: PetscOptionsInt("-mat_dscpack_stats","display stats: 0 = no display, 1 = display",
465: "None", lu->stat,&lu->stat,PETSC_NULL);
466:
467: PetscOptionsEList("-mat_dscpack_LBLAS","BLAS level used in the local phase","None",
468: ltype,3,ltype[2],buff,32,&flg);
469: while (flg) {
470: PetscStrcmp(buff,"LBLAS1",&flg);
471: if (flg) {
472: lu->LBLASLevel = DSC_LBLAS1;
473: break;
474: }
475: PetscStrcmp(buff,"LBLAS2",&flg);
476: if (flg) {
477: lu->LBLASLevel = DSC_LBLAS2;
478: break;
479: }
480: PetscStrcmp(buff,"LBLAS3",&flg);
481: if (flg) {
482: lu->LBLASLevel = DSC_LBLAS3;
483: break;
484: }
485: SETERRQ1(1,"Unknown local phase BLAS level %s",buff);
486: }
488: PetscOptionsEList("-mat_dscpack_DBLAS","BLAS level used in the distributed phase","None",
489: dtype,2,dtype[1],buff,32,&flg);
490: while (flg) {
491: PetscStrcmp(buff,"DBLAS1",&flg);
492: if (flg) {
493: lu->DBLASLevel = DSC_DBLAS1;
494: break;
495: }
496: PetscStrcmp(buff,"DBLAS2",&flg);
497: if (flg) {
498: lu->DBLASLevel = DSC_DBLAS2;
499: break;
500: }
501: SETERRQ1(1,"Unknown distributed phase BLAS level %s",buff);
502: }
504: PetscOptionsEnd();
505:
506: lu->flg = DIFFERENT_NONZERO_PATTERN;
507: return(0);
508: }
510: #undef __FUNCT__
512: int MatUseDSCPACK_MPIBAIJ(Mat A)
513: {
515: A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIBAIJ_DSCPACK;
516: return(0);
517: }
519: #undef __FUNCT__
521: int MatMPIBAIJFactorInfo_DSCPACK(Mat A,PetscViewer viewer)
522: {
523: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)A->spptr;
524: int ierr;
525: char *s;
526:
528: /* check if matrix is dscpack type */
529: if (A->ops->solve != MatSolve_MPIBAIJ_DSCPACK) return(0);
531: PetscViewerASCIIPrintf(viewer,"DSCPACK run parameters:n");
533: switch (lu->order_code) {
534: case 1: s = "ND"; break;
535: case 2: s = "Hybrid with Minimum Degree"; break;
536: case 3: s = "Hybrid with Minimum Deficiency"; break;
537: }
538: PetscViewerASCIIPrintf(viewer," order_code: %s n",s);
540: switch (lu->scheme_code) {
541: case 1: s = "standard factorization"; break;
542: case 2: s = "factorization + selective inversion"; break;
543: }
544: PetscViewerASCIIPrintf(viewer," scheme_code: %s n",s);
546: switch (lu->stat) {
547: case 0: s = "NO"; break;
548: case 1: s = "YES"; break;
549: }
550: PetscViewerASCIIPrintf(viewer," display stats: %s n",s);
551:
552: if ( lu->factor_type == DSC_LLT) {
553: s = "LLT";
554: } else if ( lu->factor_type == DSC_LDLT){
555: s = "LDLT";
556: } else {
557: SETERRQ(1,"Unknown factor type");
558: }
559: PetscViewerASCIIPrintf(viewer," factor type: %s n",s);
561: if ( lu->LBLASLevel == DSC_LBLAS1) {
562: s = "BLAS1";
563: } else if ( lu->LBLASLevel == DSC_LBLAS2){
564: s = "BLAS2";
565: } else if ( lu->LBLASLevel == DSC_LBLAS3){
566: s = "BLAS3";
567: } else {
568: SETERRQ(1,"Unknown local phase BLAS level");
569: }
570: PetscViewerASCIIPrintf(viewer," local phase BLAS level: %s n",s);
572: if ( lu->DBLASLevel == DSC_DBLAS1) {
573: s = "BLAS1";
574: } else if ( lu->DBLASLevel == DSC_DBLAS2){
575: s = "BLAS2";
576: } else {
577: SETERRQ(1,"Unknown distributed phase BLAS level");
578: }
579: PetscViewerASCIIPrintf(viewer," distributed phase BLAS level: %s n",s);
580: return(0);
581: }
583: #else
585: #undef __FUNCT__
587: int MatUseDSCPACK_MPIBAIJ(Mat A)
588: {
590: return(0);
591: }
593: #endif