Actual source code: superlu.c

  1: /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/

  3: /* 
  4:         Provides an interface to the SuperLU sparse solver
  5:           Modified for SuperLU 2.0 by Matthew Knepley

  7: */

 9:  #include src/mat/impls/aij/seq/aij.h

 11: #if defined(PETSC_HAVE_SUPERLU) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
 12: EXTERN_C_BEGIN
 13: #include "dsp_defs.h"
 14: #include "util.h"
 15: EXTERN_C_END


 18: typedef struct {
 19:   SuperMatrix A;
 20:   SuperMatrix B;
 21:   SuperMatrix AC;
 22:   SuperMatrix L;
 23:   SuperMatrix U;
 24:   int        *perm_r;
 25:   int        *perm_c;
 26:   int         relax;
 27:   int         panel_size;
 28:   double      pivot_threshold;
 29: } Mat_SeqAIJ_SuperLU;


 32: extern int MatDestroy_SeqAIJ(Mat);

 34: extern int MatDestroy_SeqAIJ_SuperLU(Mat A)
 35: {
 36:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data;
 37:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)a->spptr;
 38:   int                ierr;

 41:   if (--A->refct > 0)return(0);
 42:   /* We have to free the global data or SuperLU crashes (sucky design)*/
 43:   StatFree();
 44:   /* Free the SuperLU datastructures */
 45:   Destroy_CompCol_Permuted(&lu->AC);
 46:   Destroy_SuperNode_Matrix(&lu->L);
 47:   Destroy_CompCol_Matrix(&lu->U);
 48:   PetscFree(lu->B.Store);
 49:   PetscFree(lu->A.Store);
 50:   PetscFree(lu->perm_r);
 51:   PetscFree(lu->perm_c);
 52:   PetscFree(lu);
 53:   MatDestroy_SeqAIJ(A);
 54:   return(0);
 55: }

 57:  #include src/mat/impls/dense/seq/dense.h
 58: int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat)
 59: {
 60:   Mat_SeqAIJ         *a       = (Mat_SeqAIJ*)A->data;
 61:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)a->spptr;
 62:   int                 numRows = A->m;
 63:   int                 numCols = A->n;
 64:   SCformat           *Lstore;
 65:   int                 numNullCols,size;
 66:   PetscScalar             *nullVals,*workVals;
 67:   int                 row,newRow,col,newCol,block,b;
 68:   int                 ierr;

 73:   if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix");
 74:   numNullCols = numCols - numRows;
 75:   if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems");
 76:   /* Create the null matrix */
 77:   MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);
 78:   if (numNullCols == 0) {
 79:     MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
 80:     MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
 81:     return(0);
 82:   }
 83:   nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v;
 84:   /* Copy in the columns */
 85:   Lstore = (SCformat*)lu->L.Store;
 86:   for(block = 0; block <= Lstore->nsuper; block++) {
 87:     newRow = Lstore->sup_to_col[block];
 88:     size   = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block];
 89:     for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) {
 90:       newCol = Lstore->rowind[col];
 91:       if (newCol >= numRows) {
 92:         for(b = 0; b < size; b++)
 93:           nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
 94:       }
 95:     }
 96:   }
 97:   /* Permute rhs to form P^T_c B */
 98:   PetscMalloc(numRows*sizeof(double),&workVals);
 99:   for(b = 0; b < numNullCols; b++) {
100:     for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row];
101:     for(row = 0; row < numRows; row++) nullVals[b*numRows+row]   = workVals[row];
102:   }
103:   /* Backward solve the upper triangle A x = b */
104:   for(b = 0; b < numNullCols; b++) {
105:     sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
106:     if (ierr < 0)
107:       SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr);
108:   }
109:   PetscFree(workVals);

111:   MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
112:   MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
113:   return(0);
114: }

116: extern int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x)
117: {
118:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data;
119:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)a->spptr;
120:   PetscScalar             *array;
121:   int                 m;
122:   int                 ierr;

125:   VecGetLocalSize(b,&m);
126:   VecCopy(b,x);
127:   VecGetArray(x,&array);
128:   /* Create the Rhs */
129:   lu->B.Stype        = DN;
130:   lu->B.Dtype        = _D;
131:   lu->B.Mtype        = GE;
132:   lu->B.nrow         = m;
133:   lu->B.ncol         = 1;
134:   ((DNformat*)lu->B.Store)->lda   = m;
135:   ((DNformat*)lu->B.Store)->nzval = array;
136:   dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
137:   if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
138:   VecRestoreArray(x,&array);
139:   return(0);
140: }

142: /*
143:    Note the r permutation is ignored
144: */
145: extern int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
146: {
147:   Mat_SeqAIJ         *b;
148:   Mat                 B;
149:   Mat_SeqAIJ_SuperLU *lu;
150:   int                 ierr,*ca;

153:   ierr            = MatCreateSeqAIJ(A->comm,A->m,A->n,0,PETSC_NULL,F);
154:   B               = *F;
155:   B->ops->solve   = MatSolve_SeqAIJ_SuperLU;
156:   B->ops->destroy = MatDestroy_SeqAIJ_SuperLU;
157:   B->factor       = FACTOR_LU;
158:   b               = (Mat_SeqAIJ*)B->data;
159:   ierr            = PetscNew(Mat_SeqAIJ_SuperLU,&lu);
160:   b->spptr        = (void*)lu;
161:   PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU",
162:                                     (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);

164:   /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */
165:   PetscMalloc(A->n*sizeof(int),&lu->perm_r);
166:   PetscMalloc(A->m*sizeof(int),&lu->perm_c);
167:   ISGetIndices(c,&ca);
168:   PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));
169:   ISRestoreIndices(c,&ca);
170: 
171:   if (info) {
172:     lu->pivot_threshold = info->dtcol;
173:   } else {
174:     lu->pivot_threshold = 0.0; /* no pivoting */
175:   }

177:   PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU));
178:   return(0);
179: }

181: extern int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F)
182: {
183:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)(*F)->data;
184:   Mat_SeqAIJ         *aa = (Mat_SeqAIJ*)(A)->data;
185:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)a->spptr;
186:   NCformat           *store;
187:   int                *etree,i,ierr;

190:   /* Create the SuperMatrix for A^T:

192:        Since SuperLU only likes column-oriented matrices,we pass it the transpose,
193:        and then solve A^T X = B in MatSolve().
194:   */
195:   lu->A.Stype   = NC;
196:   lu->A.Dtype   = _D;
197:   lu->A.Mtype   = GE;
198:   lu->A.nrow    = A->n;
199:   lu->A.ncol    = A->m;
200:   ierr          = PetscMalloc(sizeof(NCformat),&store);
201:   store->nnz    = aa->nz;
202:   store->colptr = aa->i;
203:   store->rowind = aa->j;
204:   store->nzval  = aa->a;
205:   lu->A.Store   = store;
206:   ierr          = PetscMalloc(sizeof(DNformat),&lu->B.Store);

208:   /* Shift indices down */
209:   if (aa->indexshift) {
210:     for(i = 0; i < A->m+1; i++) aa->i[i]--;
211:     for(i = 0; i < aa->nz; i++) aa->j[i]--;
212:   }
213: 
214:   /* Set SuperLU options */
215:   lu->relax      = sp_ienv(2);
216:   lu->panel_size = sp_ienv(1);
217:   ierr           = PetscMalloc(A->n*sizeof(int),&etree);
218:   /* We have to initialize global data or SuperLU crashes (sucky design) */
219:   StatInit(lu->panel_size,lu->relax);

221:   /* Create the elimination tree */
222:   sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC);
223:   /* Factor the matrix */
224:   dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
225:   if (ierr < 0) {
226:     SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
227:   } else if (ierr > 0) {
228:     if (ierr <= A->m) {
229:       SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr);
230:     } else {
231:       SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m);
232:     }
233:   }

235:   /* Shift indices up */
236:   if (aa->indexshift) {
237:     for (i = 0; i < A->n+1; i++)  aa->i[i]++;
238:     for (i = 0; i < aa->nz; i++)  aa->j[i]++;
239:   }

241:   /* Cleanup */
242:   PetscFree(etree);
243:   return(0);
244: }


247: int MatUseSuperLU_SeqAIJ(Mat A)
248: {
249:   PetscTruth flg;
250:   int        ierr;

254:   PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
255:   if (!flg) return(0);

257:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU;
258:   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ_SuperLU;
259:   A->ops->solve            = MatSolve_SeqAIJ_SuperLU;

261:   return(0);
262: }

264: #else

266: int MatUseSuperLU_SeqAIJ(Mat A)
267: {
269:   return(0);
270: }

272: #endif