Actual source code: superlu_dist.c
1: /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2: /*
3: Provides an interface to the SuperLU_DIST sparse solver
4: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/mat/impls/aij/mpi/mpiaij.h
8: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9: #include "stdlib.h"
10: #endif
12: #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
14: EXTERN_C_BEGIN
15: #if defined(PETSC_USE_COMPLEX)
16: #include "superlu_zdefs.h"
17: #else
18: #include "superlu_ddefs.h"
19: #endif
20: EXTERN_C_END
22: typedef struct {
23: int nprow,npcol;
24: gridinfo_t grid;
25: superlu_options_t options;
26: SuperMatrix A_sup;
27: ScalePermstruct_t ScalePermstruct;
28: LUstruct_t LUstruct;
29: int StatPrint;
30: } Mat_MPIAIJ_SuperLU_DIST;
32: extern int MatDestroy_MPIAIJ(Mat);
33: extern int MatDestroy_SeqAIJ(Mat);
35: #if !defined(PETSC_HAVE_SUPERLU)
36: /* SuperLU function: Convert a row compressed storage into a column compressed storage. */
37: #if !defined(PETSC_USE_COMPLEX)
38: #undef __FUNCT__
40: void dCompRow_to_CompCol(int m, int n, int nnz,
41: double *a, int *colind, int *rowptr,
42: double **at, int_t **rowind, int_t **colptr)
43: {
44: int i, j, col, relpos, *marker;
46: /* Allocate storage for another copy of the matrix. */
47: *at = (double *) doubleMalloc_dist(nnz);
48: *rowind = (int *) intMalloc_dist(nnz);
49: *colptr = (int *) intMalloc_dist(n+1);
50: marker = (int *) intCalloc_dist(n);
52: /* Get counts of each column of A, and set up column pointers */
53: for (i = 0; i < m; ++i)
54: for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
55: (*colptr)[0] = 0;
56: for (j = 0; j < n; ++j) {
57: (*colptr)[j+1] = (*colptr)[j] + marker[j];
58: marker[j] = (*colptr)[j];
59: }
61: /* Transfer the matrix into the compressed column storage. */
62: for (i = 0; i < m; ++i) {
63: for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
64: col = colind[j];
65: relpos = marker[col];
66: (*rowind)[relpos] = i;
67: (*at)[relpos] = a[j];
68: ++marker[col];
69: }
70: }
72: SUPERLU_FREE(marker);
73: }
74: #else
75: void zCompRow_to_CompCol(int m, int n, int nnz,
76: doublecomplex *a, int *colind, int *rowptr,
77: doublecomplex **at, int **rowind, int **colptr)
78: {
79: register int i, j, col, relpos;
80: int *marker;
81:
82: /* Allocate storage for another copy of the matrix. */
83: *at = (doublecomplex *) doublecomplexMalloc_dist(nnz);
84: *rowind = (int *) intMalloc_dist(nnz);
85: *colptr = (int *) intMalloc_dist(n+1);
86: marker = (int *) intCalloc_(n);
87:
88: /* Get counts of each column of A, and set up column pointers */
89: for (i = 0; i < m; ++i)
90: for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
91: (*colptr)[0] = 0;
92: for (j = 0; j < n; ++j) {
93: (*colptr)[j+1] = (*colptr)[j] + marker[j];
94: marker[j] = (*colptr)[j];
95: }
96:
97: /* Transfer the matrix into the compressed column storage. */
98: for (i = 0; i < m; ++i) {
99: for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
100: col = colind[j];
101: relpos = marker[col];
102: (*rowind)[relpos] = i;
103: (*at)[relpos] = a[j];
104: ++marker[col];
105: }
106: }
108: SUPERLU_FREE(marker);
109: }
110: #endif
111: #else
112: EXTERN_C_BEGIN
113: #if defined(PETSC_USE_COMPLEX)
114: extern void zCompRow_to_CompCol(int,int,int,doublecomplex*,int*,int*,doublecomplex**,int**,int**);
115: #else
116: extern void dCompRow_to_CompCol(int,int,int,double*, int*,int*,double**,int_t**,int_t**);
117: #endif
118: EXTERN_C_END
119: #endif /* PETSC_HAVE_SUPERLU*/
121: #undef __FUNCT__
123: int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A)
124: {
125: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
126: Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
127: int ierr, size=a->size;
128:
130: /* Deallocate SuperLU_DIST storage */
131: #if defined(PETSC_USE_COMPLEX)
132: Destroy_CompCol_Matrix(&lu->A_sup);
133: #else
134: Destroy_CompCol_Matrix_dist(&lu->A_sup);
135: #endif
136: Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
137: ScalePermstructFree(&lu->ScalePermstruct);
138: LUstructFree(&lu->LUstruct);
140: /* Release the SuperLU_DIST process grid. */
141: superlu_gridexit(&lu->grid);
143: PetscFree(lu);
144:
145: if (size == 1){
146: MatDestroy_SeqAIJ(A);
147: } else {
148: MatDestroy_MPIAIJ(A);
149: }
150:
151: return(0);
152: }
154: #undef __FUNCT__
156: int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
157: {
158: Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data;
159: Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
160: int ierr, size=aa->size;
161: int m=A->M, N=A->N;
162: superlu_options_t options=lu->options;
163: SuperLUStat_t stat;
164: double berr[1];
166: PetscScalar *bptr;
168: int info, nrhs=1;
169: Vec x_seq;
170: IS iden;
171: VecScatter scat;
172: PetscLogDouble time0,time,time_min,time_max;
173:
175: if (size > 1) { /* convert mpi vector b to seq vector x_seq */
176: VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
177: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
178: VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
179: ISDestroy(iden);
181: VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
182: VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
183: VecGetArray(x_seq,&bptr);
184: } else {
185: VecCopy(b_mpi,x);
186: VecGetArray(x,&bptr);
187: }
188:
189: options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
191: PStatInit(&stat); /* Initialize the statistics variables. */
192: if (lu->StatPrint) {
193: MPI_Barrier(A->comm); /* to be removed */
194: PetscGetTime(&time0); /* to be removed */
195: }
196: #if defined(PETSC_USE_COMPLEX)
197: pzgssvx_ABglobal(&options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
198: &lu->grid, &lu->LUstruct, berr, &stat, &info);
199: #else
200: pdgssvx_ABglobal(&options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
201: &lu->grid, &lu->LUstruct, berr, &stat, &info);
202: #endif
203: if (lu->StatPrint) {
204: PetscGetTime(&time); /* to be removed */
205: PStatPrint(&stat, &lu->grid); /* Print the statistics. */
206: }
207: PStatFree(&stat);
208:
209: if (size > 1) { /* convert seq x to mpi x */
210: VecRestoreArray(x_seq,&bptr);
211: VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
212: VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
213: VecScatterDestroy(scat);
214: VecDestroy(x_seq);
215: } else {
216: VecRestoreArray(x,&bptr);
217: }
218: if (lu->StatPrint) {
219: time0 = time - time0;
220: MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
221: MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
222: MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
223: time = time/size; /* average time */
224: PetscPrintf(A->comm, " Time for superlu_dist solve (max/min/avg): %g / %g / %gnn",time_max,time_min,time);
225: }
227: return(0);
228: }
230: #undef __FUNCT__
232: int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F)
233: {
234: Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data;
235: Mat *tseq,A_seq = PETSC_NULL;
236: Mat_SeqAIJ *aa;
237: Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr;
238: int M=A->M,N=A->N,info,ierr,size=fac->size,i;
239: SuperLUStat_t stat;
240: double *berr=0;
241: #if defined(PETSC_USE_COMPLEX)
242: doublecomplex *a,*bptr=0;
243: #else
244: double *a,*bptr=0;
245: #endif
246: int_t *asub, *xa;
247: SuperMatrix A_sup;
248: IS isrow;
249: PetscLogDouble time0[2],time[2],time_min[2],time_max[2];
252: if (lu->StatPrint) {
253: MPI_Barrier(A->comm);
254: PetscGetTime(&time0[0]);
255: }
257: if (size > 1) { /* convert mpi A to seq mat A */
258: ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
259: MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
260: ISDestroy(isrow);
261:
262: A_seq = *tseq;
263: PetscFree(tseq);
264: aa = (Mat_SeqAIJ*)A_seq->data;
265: } else {
266: aa = (Mat_SeqAIJ*)A->data;
267: }
269: /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
270: #if defined(PETSC_USE_COMPLEX)
271: zallocateA_dist(N, aa->nz, &a, &asub, &xa);
272: zCompRow_to_CompCol(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&a,&asub, &xa);
273: #else
274: dallocateA_dist(N, aa->nz, &a, &asub, &xa);
275: dCompRow_to_CompCol(M,N,aa->nz,aa->a,aa->j,aa->i,&a, &asub, &xa);
276: #endif
277: if (lu->StatPrint) {
278: PetscGetTime(&time[0]);
279: time0[0] = time[0] - time0[0];
280: }
282: /* Create compressed column matrix A_sup. */
283: #if defined(PETSC_USE_COMPLEX)
284: zCreate_CompCol_Matrix(&A_sup, M, N, aa->nz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE);
285: #else
286: dCreate_CompCol_Matrix_dist(&A_sup, M, N, aa->nz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
287: #endif
288: /* Factor the matrix. */
289: PStatInit(&stat); /* Initialize the statistics variables. */
291: if (lu->StatPrint) {
292: MPI_Barrier(A->comm);
293: PetscGetTime(&time0[1]);
294: }
295: #if defined(PETSC_USE_COMPLEX)
296: pzgssvx_ABglobal(&lu->options, &A_sup, &lu->ScalePermstruct, bptr, M, 0,
297: &lu->grid, &lu->LUstruct, berr, &stat, &info);
298: #else
299: pdgssvx_ABglobal(&lu->options, &A_sup, &lu->ScalePermstruct, bptr, M, 0,
300: &lu->grid, &lu->LUstruct, berr, &stat, &info);
301: #endif
302: if (lu->StatPrint) {
303: PetscGetTime(&time[1]); /* to be removed */
304: time0[1] = time[1] - time0[1];
305: if (lu->StatPrint) PStatPrint(&stat, &lu->grid); /* Print the statistics. */
306: }
307: PStatFree(&stat);
309: lu->A_sup = A_sup;
310: lu->options.Fact = SamePattern; /* Sparsity pattern of A and perm_c can be reused. */
311: if (size > 1){
312: MatDestroy(A_seq);
313: }
315: if (lu->StatPrint) {
316: MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
317: MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
318: MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
319: for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
320: PetscPrintf(A->comm, " Time for mat conversion (max/min/avg): %g / %g / %gn",time_max[0],time_min[0],time[0]);
321: PetscPrintf(A->comm, " Time for superlu_dist fact (max/min/avg): %g / %g / %gnn",time_max[1],time_min[1],time[1]);
322: }
323: (*F)->assembled = PETSC_TRUE;
324: return(0);
325: }
327: /* Note the Petsc r and c permutations are ignored */
328: #undef __FUNCT__
330: int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
331: {
332: Mat_MPIAIJ *fac;
333: Mat_MPIAIJ_SuperLU_DIST *lu;
334: int ierr,M=A->M,N=A->N,size;
335: gridinfo_t grid;
336: superlu_options_t options;
337: ScalePermstruct_t ScalePermstruct;
338: LUstruct_t LUstruct;
339: char buff[32];
340: PetscTruth flg;
341: char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
342: char *prtype[] = {"LargeDiag","NATURAL"};
344:
345: PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);
347: /* Create the factorization matrix F */
348: MatCreateMPIAIJ(A->comm,A->m,A->n,M,N,0,PETSC_NULL,0,PETSC_NULL,F);
350: (*F)->ops->lufactornumeric = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
351: (*F)->ops->solve = MatSolve_MPIAIJ_SuperLU_DIST;
352: (*F)->ops->destroy = MatDestroy_MPIAIJ_SuperLU_DIST;
353: (*F)->factor = FACTOR_LU;
354: (*F)->spptr = (void*)lu;
355: fac = (Mat_MPIAIJ*)(*F)->data;
357: /* Set the input options */
358: set_default_options(&options);
360: MPI_Comm_size(A->comm,&size);
361: lu->nprow = size/2; /* Default process rows. */
362: if (lu->nprow == 0) lu->nprow = 1;
363: lu->npcol = size/lu->nprow; /* Default process columns. */
365: PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");
366:
367: PetscOptionsInt("-mat_aij_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
368: PetscOptionsInt("-mat_aij_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
369: if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
370:
371: PetscOptionsLogical("-mat_aij_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
372: if (!flg) {
373: options.Equil = NO;
374: }
376: PetscOptionsEList("-mat_aij_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);
377: while (flg) {
378: PetscStrcmp(buff,"LargeDiag",&flg);
379: if (flg) {
380: options.RowPerm = LargeDiag;
381: break;
382: }
383: PetscStrcmp(buff,"NATURAL",&flg);
384: if (flg) {
385: options.RowPerm = NOROWPERM;
386: break;
387: }
388: SETERRQ1(1,"Unknown row permutation %s",buff);
389: }
391: PetscOptionsEList("-mat_aij_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);
392: while (flg) {
393: PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);
394: if (flg) {
395: options.ColPerm = MMD_AT_PLUS_A;
396: break;
397: }
398: PetscStrcmp(buff,"NATURAL",&flg);
399: if (flg) {
400: options.ColPerm = NATURAL;
401: break;
402: }
403: PetscStrcmp(buff,"MMD_ATA",&flg);
404: if (flg) {
405: options.ColPerm = MMD_ATA;
406: break;
407: }
408: PetscStrcmp(buff,"COLAMD",&flg);
409: if (flg) {
410: options.ColPerm = COLAMD;
411: break;
412: }
413: SETERRQ1(1,"Unknown column permutation %s",buff);
414: }
416: PetscOptionsLogical("-mat_aij_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
417: if (!flg) {
418: options.ReplaceTinyPivot = NO;
419: }
421: options.IterRefine = NOREFINE;
422: PetscOptionsLogical("-mat_aij_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
423: if (flg) {
424: options.IterRefine = DOUBLE;
425: }
427: if (PetscLogPrintInfo) {
428: lu->StatPrint = (int)PETSC_TRUE;
429: } else {
430: lu->StatPrint = (int)PETSC_FALSE;
431: }
432: PetscOptionsLogical("-mat_aij_superlu_dist_statprint","Print factorization information","None",
433: (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);
434: PetscOptionsEnd();
436: /* Initialize the SuperLU process grid. */
437: superlu_gridinit(A->comm, lu->nprow, lu->npcol, &grid);
439: /* Initialize ScalePermstruct and LUstruct. */
440: ScalePermstructInit(M, N, &ScalePermstruct);
441: LUstructInit(M, N, &LUstruct);
443: lu->ScalePermstruct = ScalePermstruct;
444: lu->LUstruct = LUstruct;
445: lu->options = options;
446: lu->grid = grid;
447: fac->size = size;
449: return(0);
450: }
452: #undef __FUNCT__
454: int MatUseSuperLU_DIST_MPIAIJ(Mat A)
455: {
457: A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST;
458: A->ops->lufactornumeric = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
459: return(0);
460: }
462: int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer)
463: {
464: Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
465: superlu_options_t options;
466: int ierr;
467: char *colperm;
470: /* check if matrix is superlu_dist type */
471: if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) return(0);
473: options = lu->options;
474: PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:n");
475: PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s n",(options.Equil != NO) ? "true": "false");
476: PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s n",(options.ReplaceTinyPivot != NO) ? "true": "false");
477: PetscViewerASCIIPrintf(viewer," Use iterative refinement %s n",(options.IterRefine == DOUBLE) ? "true": "false");
478: PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d n",lu->nprow,lu->npcol);
479: PetscViewerASCIIPrintf(viewer," Row permutation %s n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
480: if (options.ColPerm == NATURAL) {
481: colperm = "NATURAL";
482: } else if (options.ColPerm == MMD_AT_PLUS_A) {
483: colperm = "MMD_AT_PLUS_A";
484: } else if (options.ColPerm == MMD_ATA) {
485: colperm = "MMD_ATA";
486: } else if (options.ColPerm == COLAMD) {
487: colperm = "COLAMD";
488: } else {
489: SETERRQ(1,"Unknown column permutation");
490: }
491: PetscViewerASCIIPrintf(viewer," Column permutation %s n",colperm);
492: return(0);
493: }
495: #else
497: #undef __FUNCT__
499: int MatUseSuperLU_DIST_MPIAIJ(Mat A)
500: {
502: return(0);
503: }
505: #endif