Actual source code: superlu.c

  1: /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/

  3: /* 
  4:         Provides an interface to the SuperLU sparse solver
  5:           Modified for SuperLU 2.0 by Matthew Knepley
  6: */

 8:  #include src/mat/impls/aij/seq/aij.h

 10: #if defined(PETSC_HAVE_SUPERLU) && !defined(PETSC_USE_SINGLE)
 11: EXTERN_C_BEGIN
 12: #if defined(PETSC_USE_COMPLEX)
 13: #include "zsp_defs.h"
 14: #else
 15: #include "dsp_defs.h"
 16: #endif  
 17: #include "util.h"
 18: EXTERN_C_END

 20: typedef struct {
 21:   SuperMatrix  A,B,AC,L,U;
 22:   int          *perm_r,*perm_c,ispec,relax,panel_size;
 23:   double       pivot_threshold;
 24:   NCformat     *store;
 25:   MatStructure flg;
 26:   PetscTruth   SuperluMatOdering;
 27: } Mat_SeqAIJ_SuperLU;


 30: extern int MatDestroy_SeqAIJ(Mat);

 32: #undef __FUNCT__  
 34: int MatDestroy_SeqAIJ_SuperLU(Mat A)
 35: {
 36:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
 37:   int                ierr;

 40:   if (--A->refct > 0)return(0);
 41:   /* We have to free the global data or SuperLU crashes (sucky design)*/
 42:   /* Since we don't know if more solves on other matrices may be done
 43:      we cannot free the yucky SuperLU global data
 44:     StatFree(); 
 45:   */

 47:   /* Free the SuperLU datastructures */
 48:   Destroy_CompCol_Permuted(&lu->AC);
 49:   Destroy_SuperNode_Matrix(&lu->L);
 50:   Destroy_CompCol_Matrix(&lu->U);
 51:   PetscFree(lu->B.Store);
 52:   PetscFree(lu->A.Store);
 53:   PetscFree(lu->perm_r);
 54:   PetscFree(lu->perm_c);
 55:   PetscFree(lu);
 56:   MatDestroy_SeqAIJ(A);
 57:   return(0);
 58: }

 60:  #include src/mat/impls/dense/seq/dense.h
 61: #undef __FUNCT__  
 63: int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat)
 64: {
 65:   Mat_SeqAIJ_SuperLU  *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
 66:   int                 numRows = A->m,numCols = A->n;
 67:   SCformat            *Lstore;
 68:   int                 numNullCols,size;
 69: #if defined(PETSC_USE_COMPLEX)
 70:   doublecomplex       *nullVals,*workVals;
 71: #else
 72:   PetscScalar         *nullVals,*workVals;
 73: #endif
 74:   int                 row,newRow,col,newCol,block,b,ierr;

 79:   if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix");
 80:   numNullCols = numCols - numRows;
 81:   if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems");
 82:   /* Create the null matrix */
 83:   MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);
 84:   if (numNullCols == 0) {
 85:     MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
 86:     MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
 87:     return(0);
 88:   }
 89: #if defined(PETSC_USE_COMPLEX)
 90:   nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v;
 91: #else
 92:   nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v;
 93: #endif
 94:   /* Copy in the columns */
 95:   Lstore = (SCformat*)lu->L.Store;
 96:   for(block = 0; block <= Lstore->nsuper; block++) {
 97:     newRow = Lstore->sup_to_col[block];
 98:     size   = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block];
 99:     for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) {
100:       newCol = Lstore->rowind[col];
101:       if (newCol >= numRows) {
102:         for(b = 0; b < size; b++)
103: #if defined(PETSC_USE_COMPLEX)
104:           nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
105: #else
106:           nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
107: #endif
108:       }
109:     }
110:   }
111:   /* Permute rhs to form P^T_c B */
112:   PetscMalloc(numRows*sizeof(double),&workVals);
113:   for(b = 0; b < numNullCols; b++) {
114:     for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row];
115:     for(row = 0; row < numRows; row++) nullVals[b*numRows+row]   = workVals[row];
116:   }
117:   /* Backward solve the upper triangle A x = b */
118:   for(b = 0; b < numNullCols; b++) {
119: #if defined(PETSC_USE_COMPLEX)
120:     sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
121: #else
122:     sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
123: #endif
124:     if (ierr < 0)
125:       SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr);
126:   }
127:   PetscFree(workVals);

129:   MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
130:   MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
131:   return(0);
132: }

134: #undef __FUNCT__  
136: int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x)
137: {
138:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
139:   PetscScalar        *array;
140:   int                m,ierr;

143:   VecGetLocalSize(b,&m);
144:   VecCopy(b,x);
145:   VecGetArray(x,&array);
146:   /* Create the Rhs */
147:   lu->B.Stype        = SLU_DN;
148:   lu->B.Mtype        = SLU_GE;
149:   lu->B.nrow         = m;
150:   lu->B.ncol         = 1;
151:   ((DNformat*)lu->B.Store)->lda   = m;
152:   ((DNformat*)lu->B.Store)->nzval = array;
153: #if defined(PETSC_USE_COMPLEX)
154:   lu->B.Dtype        = SLU_Z;
155:   zgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
156: #else
157:   lu->B.Dtype        = SLU_D;
158:   dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
159: #endif
160:   if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
161:   VecRestoreArray(x,&array);
162:   return(0);
163: }

165: static int StatInitCalled = 0;

167: #undef __FUNCT__  
169: int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F)
170: {
171:   Mat_SeqAIJ         *aa = (Mat_SeqAIJ*)(A)->data;
172:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr;
173:   int                *etree,i,ierr;
174:   PetscTruth         flag;

177:   /* Create the SuperMatrix for A^T:
178:        Since SuperLU only likes column-oriented matrices,we pass it the transpose,
179:        and then solve A^T X = B in MatSolve().
180:   */
181: 
182:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */
183:     lu->A.Stype   = SLU_NC;
184: #if defined(PETSC_USE_COMPLEX)
185:     lu->A.Dtype   = SLU_Z;
186: #else
187:     lu->A.Dtype   = SLU_D;
188: #endif
189:     lu->A.Mtype   = SLU_GE;
190:     lu->A.nrow    = A->n;
191:     lu->A.ncol    = A->m;
192: 
193:     PetscMalloc(sizeof(NCformat),&lu->store);
194:     PetscMalloc(sizeof(DNformat),&lu->B.Store);
195:   }
196:   lu->store->nnz    = aa->nz;
197:   lu->store->colptr = aa->i;
198:   lu->store->rowind = aa->j;
199:   lu->store->nzval  = aa->a;
200:   lu->A.Store       = lu->store;
201: 
202:   /* Shift indices down */
203:   if (aa->indexshift) {
204:     for(i = 0; i < A->m+1; i++) aa->i[i]--;
205:     for(i = 0; i < aa->nz; i++) aa->j[i]--;
206:   }
207: 
208:   /* Set SuperLU options */
209:   lu->relax      = sp_ienv(2);
210:   lu->panel_size = sp_ienv(1);
211:   /* We have to initialize global data or SuperLU crashes (sucky design) */
212:   if (!StatInitCalled) {
213:     StatInit(lu->panel_size,lu->relax);
214:   }
215:   StatInitCalled++;

217:   PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");
218:   /* use SuperLU mat ordeing */
219:   PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)n   0: natural ordering;n   1: MMD applied to A'*A;n   2: MMD applied to A'+A;n   3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);
220:   if (flag) {
221:     get_perm_c(lu->ispec, &lu->A, lu->perm_c);
222:     lu->SuperluMatOdering = PETSC_TRUE;
223:   }
224:   PetscOptionsEnd();

226:   /* Create the elimination tree */
227:   PetscMalloc(A->n*sizeof(int),&etree);
228:   sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC);
229:   /* Factor the matrix */
230: #if defined(PETSC_USE_COMPLEX)
231:   zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
232: #else
233:   dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
234: #endif
235:   if (ierr < 0) {
236:     SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
237:   } else if (ierr > 0) {
238:     if (ierr <= A->m) {
239:       SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr);
240:     } else {
241:       SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m);
242:     }
243:   }

245:   /* Shift indices up */
246:   if (aa->indexshift) {
247:     for (i = 0; i < A->n+1; i++)  aa->i[i]++;
248:     for (i = 0; i < aa->nz; i++)  aa->j[i]++;
249:   }

251:   /* Cleanup */
252:   PetscFree(etree);

254:   lu->flg = SAME_NONZERO_PATTERN;
255:   return(0);
256: }

258: /*
259:    Note the r permutation is ignored
260: */
261: #undef __FUNCT__  
263: int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
264: {
265:   Mat                 B;
266:   Mat_SeqAIJ_SuperLU  *lu;
267:   int                 ierr,*ca;

270: 
271:   ierr            = MatCreateSeqAIJ(A->comm,A->m,A->n,0,PETSC_NULL,F);
272:   B               = *F;
273:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU;
274:   B->ops->solve           = MatSolve_SeqAIJ_SuperLU;
275:   B->ops->destroy         = MatDestroy_SeqAIJ_SuperLU;
276:   B->factor               = FACTOR_LU;
277:   (*F)->assembled         = PETSC_TRUE;  /* required by -sles_view */
278: 
279:   ierr            = PetscNew(Mat_SeqAIJ_SuperLU,&lu);
280:   B->spptr        = (void*)lu;
281:   PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU",
282:                                     (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);

284:   /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */
285:   PetscMalloc(A->n*sizeof(int),&lu->perm_r);
286:   PetscMalloc(A->m*sizeof(int),&lu->perm_c);

288:   /* use PETSc mat ordering */
289:   ISGetIndices(c,&ca);
290:   PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));
291:   ISRestoreIndices(c,&ca);
292:   lu->SuperluMatOdering = PETSC_FALSE;

294:   lu->pivot_threshold = info->dtcol;
295:   PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU));

297:   lu->flg = DIFFERENT_NONZERO_PATTERN;
298:   return(0);
299: }

301: #undef __FUNCT__  
303: int MatUseSuperLU_SeqAIJ(Mat A)
304: {
305:   PetscTruth flg;
306:   int        ierr;

310:   PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
311:   if (!flg) return(0);

313:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU;

315:   return(0);
316: }

318: /* used by -sles_view */
319: #undef __FUNCT__  
321: int MatSeqAIJFactorInfo_SuperLU(Mat A,PetscViewer viewer)
322: {
323:   Mat_SeqAIJ_SuperLU      *lu= (Mat_SeqAIJ_SuperLU*)A->spptr;
324:   int                     ierr;
326:   /* check if matrix is SuperLU type */
327:   if (A->ops->solve != MatSolve_SeqAIJ_SuperLU) return(0);

329:   PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:n");
330:   if(lu->SuperluMatOdering) PetscViewerASCIIPrintf(viewer,"  SuperLU mat ordering: %dn",lu->ispec);

332:   return(0);
333: }

335: #else

337: #undef __FUNCT__  
339: int MatUseSuperLU_SeqAIJ(Mat A)
340: {
342:   return(0);
343: }

345: #endif