Actual source code: superlu.c
1: /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
3: /*
4: Provides an interface to the SuperLU sparse solver
5: Modified for SuperLU 2.0 by Matthew Knepley
6: */
8: #include src/mat/impls/aij/seq/aij.h
10: #if defined(PETSC_HAVE_SUPERLU) && !defined(PETSC_USE_SINGLE)
11: EXTERN_C_BEGIN
12: #if defined(PETSC_USE_COMPLEX)
13: #include "zsp_defs.h"
14: #else
15: #include "dsp_defs.h"
16: #endif
17: #include "util.h"
18: EXTERN_C_END
20: typedef struct {
21: SuperMatrix A,B,AC,L,U;
22: int *perm_r,*perm_c,ispec,relax,panel_size;
23: double pivot_threshold;
24: NCformat *store;
25: MatStructure flg;
26: PetscTruth SuperluMatOdering;
27: } Mat_SeqAIJ_SuperLU;
30: extern int MatDestroy_SeqAIJ(Mat);
32: #undef __FUNCT__
34: int MatDestroy_SeqAIJ_SuperLU(Mat A)
35: {
36: Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
37: int ierr;
40: if (--A->refct > 0)return(0);
41: /* We have to free the global data or SuperLU crashes (sucky design)*/
42: /* Since we don't know if more solves on other matrices may be done
43: we cannot free the yucky SuperLU global data
44: StatFree();
45: */
47: /* Free the SuperLU datastructures */
48: Destroy_CompCol_Permuted(&lu->AC);
49: Destroy_SuperNode_Matrix(&lu->L);
50: Destroy_CompCol_Matrix(&lu->U);
51: PetscFree(lu->B.Store);
52: PetscFree(lu->A.Store);
53: PetscFree(lu->perm_r);
54: PetscFree(lu->perm_c);
55: PetscFree(lu);
56: MatDestroy_SeqAIJ(A);
57: return(0);
58: }
60: #include src/mat/impls/dense/seq/dense.h
61: #undef __FUNCT__
63: int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat)
64: {
65: Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
66: int numRows = A->m,numCols = A->n;
67: SCformat *Lstore;
68: int numNullCols,size;
69: #if defined(PETSC_USE_COMPLEX)
70: doublecomplex *nullVals,*workVals;
71: #else
72: PetscScalar *nullVals,*workVals;
73: #endif
74: int row,newRow,col,newCol,block,b,ierr;
79: if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix");
80: numNullCols = numCols - numRows;
81: if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems");
82: /* Create the null matrix */
83: MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);
84: if (numNullCols == 0) {
85: MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
86: MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
87: return(0);
88: }
89: #if defined(PETSC_USE_COMPLEX)
90: nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v;
91: #else
92: nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v;
93: #endif
94: /* Copy in the columns */
95: Lstore = (SCformat*)lu->L.Store;
96: for(block = 0; block <= Lstore->nsuper; block++) {
97: newRow = Lstore->sup_to_col[block];
98: size = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block];
99: for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) {
100: newCol = Lstore->rowind[col];
101: if (newCol >= numRows) {
102: for(b = 0; b < size; b++)
103: #if defined(PETSC_USE_COMPLEX)
104: nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
105: #else
106: nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
107: #endif
108: }
109: }
110: }
111: /* Permute rhs to form P^T_c B */
112: PetscMalloc(numRows*sizeof(double),&workVals);
113: for(b = 0; b < numNullCols; b++) {
114: for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row];
115: for(row = 0; row < numRows; row++) nullVals[b*numRows+row] = workVals[row];
116: }
117: /* Backward solve the upper triangle A x = b */
118: for(b = 0; b < numNullCols; b++) {
119: #if defined(PETSC_USE_COMPLEX)
120: sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
121: #else
122: sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
123: #endif
124: if (ierr < 0)
125: SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr);
126: }
127: PetscFree(workVals);
129: MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
130: MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
131: return(0);
132: }
134: #undef __FUNCT__
136: int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x)
137: {
138: Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
139: PetscScalar *array;
140: int m,ierr;
143: VecGetLocalSize(b,&m);
144: VecCopy(b,x);
145: VecGetArray(x,&array);
146: /* Create the Rhs */
147: lu->B.Stype = SLU_DN;
148: lu->B.Mtype = SLU_GE;
149: lu->B.nrow = m;
150: lu->B.ncol = 1;
151: ((DNformat*)lu->B.Store)->lda = m;
152: ((DNformat*)lu->B.Store)->nzval = array;
153: #if defined(PETSC_USE_COMPLEX)
154: lu->B.Dtype = SLU_Z;
155: zgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
156: #else
157: lu->B.Dtype = SLU_D;
158: dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
159: #endif
160: if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
161: VecRestoreArray(x,&array);
162: return(0);
163: }
165: static int StatInitCalled = 0;
167: #undef __FUNCT__
169: int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F)
170: {
171: Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data;
172: Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr;
173: int *etree,i,ierr;
174: PetscTruth flag;
177: /* Create the SuperMatrix for A^T:
178: Since SuperLU only likes column-oriented matrices,we pass it the transpose,
179: and then solve A^T X = B in MatSolve().
180: */
181:
182: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */
183: lu->A.Stype = SLU_NC;
184: #if defined(PETSC_USE_COMPLEX)
185: lu->A.Dtype = SLU_Z;
186: #else
187: lu->A.Dtype = SLU_D;
188: #endif
189: lu->A.Mtype = SLU_GE;
190: lu->A.nrow = A->n;
191: lu->A.ncol = A->m;
192:
193: PetscMalloc(sizeof(NCformat),&lu->store);
194: PetscMalloc(sizeof(DNformat),&lu->B.Store);
195: }
196: lu->store->nnz = aa->nz;
197: lu->store->colptr = aa->i;
198: lu->store->rowind = aa->j;
199: lu->store->nzval = aa->a;
200: lu->A.Store = lu->store;
201:
202: /* Shift indices down */
203: if (aa->indexshift) {
204: for(i = 0; i < A->m+1; i++) aa->i[i]--;
205: for(i = 0; i < aa->nz; i++) aa->j[i]--;
206: }
207:
208: /* Set SuperLU options */
209: lu->relax = sp_ienv(2);
210: lu->panel_size = sp_ienv(1);
211: /* We have to initialize global data or SuperLU crashes (sucky design) */
212: if (!StatInitCalled) {
213: StatInit(lu->panel_size,lu->relax);
214: }
215: StatInitCalled++;
217: PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");
218: /* use SuperLU mat ordeing */
219: PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)n 0: natural ordering;n 1: MMD applied to A'*A;n 2: MMD applied to A'+A;n 3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);
220: if (flag) {
221: get_perm_c(lu->ispec, &lu->A, lu->perm_c);
222: lu->SuperluMatOdering = PETSC_TRUE;
223: }
224: PetscOptionsEnd();
226: /* Create the elimination tree */
227: PetscMalloc(A->n*sizeof(int),&etree);
228: sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC);
229: /* Factor the matrix */
230: #if defined(PETSC_USE_COMPLEX)
231: zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
232: #else
233: dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
234: #endif
235: if (ierr < 0) {
236: SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
237: } else if (ierr > 0) {
238: if (ierr <= A->m) {
239: SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr);
240: } else {
241: SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m);
242: }
243: }
245: /* Shift indices up */
246: if (aa->indexshift) {
247: for (i = 0; i < A->n+1; i++) aa->i[i]++;
248: for (i = 0; i < aa->nz; i++) aa->j[i]++;
249: }
251: /* Cleanup */
252: PetscFree(etree);
254: lu->flg = SAME_NONZERO_PATTERN;
255: return(0);
256: }
258: /*
259: Note the r permutation is ignored
260: */
261: #undef __FUNCT__
263: int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
264: {
265: Mat B;
266: Mat_SeqAIJ_SuperLU *lu;
267: int ierr,*ca;
270:
271: ierr = MatCreateSeqAIJ(A->comm,A->m,A->n,0,PETSC_NULL,F);
272: B = *F;
273: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU;
274: B->ops->solve = MatSolve_SeqAIJ_SuperLU;
275: B->ops->destroy = MatDestroy_SeqAIJ_SuperLU;
276: B->factor = FACTOR_LU;
277: (*F)->assembled = PETSC_TRUE; /* required by -sles_view */
278:
279: ierr = PetscNew(Mat_SeqAIJ_SuperLU,&lu);
280: B->spptr = (void*)lu;
281: PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU",
282: (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);
284: /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */
285: PetscMalloc(A->n*sizeof(int),&lu->perm_r);
286: PetscMalloc(A->m*sizeof(int),&lu->perm_c);
288: /* use PETSc mat ordering */
289: ISGetIndices(c,&ca);
290: PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));
291: ISRestoreIndices(c,&ca);
292: lu->SuperluMatOdering = PETSC_FALSE;
294: lu->pivot_threshold = info->dtcol;
295: PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU));
297: lu->flg = DIFFERENT_NONZERO_PATTERN;
298: return(0);
299: }
301: #undef __FUNCT__
303: int MatUseSuperLU_SeqAIJ(Mat A)
304: {
305: PetscTruth flg;
306: int ierr;
310: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
311: if (!flg) return(0);
313: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU;
315: return(0);
316: }
318: /* used by -sles_view */
319: #undef __FUNCT__
321: int MatSeqAIJFactorInfo_SuperLU(Mat A,PetscViewer viewer)
322: {
323: Mat_SeqAIJ_SuperLU *lu= (Mat_SeqAIJ_SuperLU*)A->spptr;
324: int ierr;
326: /* check if matrix is SuperLU type */
327: if (A->ops->solve != MatSolve_SeqAIJ_SuperLU) return(0);
329: PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:n");
330: if(lu->SuperluMatOdering) PetscViewerASCIIPrintf(viewer," SuperLU mat ordering: %dn",lu->ispec);
332: return(0);
333: }
335: #else
337: #undef __FUNCT__
339: int MatUseSuperLU_SeqAIJ(Mat A)
340: {
342: return(0);
343: }
345: #endif