Actual source code: superlu_dist.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:         Provides an interface to the SuperLU_DIST_2.0 sparse solver
  5: */

  7: #include "src/mat/impls/aij/seq/aij.h"
  8: #include "src/mat/impls/aij/mpi/mpiaij.h"
  9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
 10: #include "stdlib.h"
 11: #endif

 14: #if defined(PETSC_USE_COMPLEX)
 15: #include "superlu_zdefs.h"
 16: #else
 17: #include "superlu_ddefs.h"
 18: #endif

 21: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;

 23: typedef struct {
 24:   int_t                   nprow,npcol,*row,*col;
 25:   gridinfo_t              grid;
 26:   superlu_options_t       options;
 27:   SuperMatrix             A_sup;
 28:   ScalePermstruct_t       ScalePermstruct;
 29:   LUstruct_t              LUstruct;
 30:   int                     StatPrint;
 31:   int                     MatInputMode;
 32:   SOLVEstruct_t           SOLVEstruct;
 33:   fact_t                  FactPattern;
 34:   MPI_Comm                comm_superlu;
 35: #if defined(PETSC_USE_COMPLEX)
 36:   doublecomplex           *val;
 37: #else
 38:   double                  *val;
 39: #endif

 41:   /* A few function pointers for inheritance */
 42:   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
 43:   PetscErrorCode (*MatView)(Mat,PetscViewer);
 44:   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
 45:   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
 46:   PetscErrorCode (*MatDestroy)(Mat);

 48:   /* Flag to clean up (non-global) SuperLU objects during Destroy */
 49:   PetscTruth CleanUpSuperLU_Dist;
 50: } Mat_SuperLU_DIST;

 52: EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);

 65: PetscErrorCode  MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 66: {
 67:   PetscErrorCode   ierr;
 68:   Mat              B=*newmat;
 69:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;

 72:   if (reuse == MAT_INITIAL_MATRIX) {
 73:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 74:   }
 75:   /* Reset the original function pointers */
 76:   B->ops->duplicate        = lu->MatDuplicate;
 77:   B->ops->view             = lu->MatView;
 78:   B->ops->assemblyend      = lu->MatAssemblyEnd;
 79:   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
 80:   B->ops->destroy          = lu->MatDestroy;
 81:   PetscFree(lu);
 82:   A->spptr = PETSC_NULL;

 84:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);
 85:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);
 86:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);
 87:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);

 89:   PetscObjectChangeTypeName((PetscObject)B,type);
 90:   *newmat = B;
 91:   return(0);
 92: }

 97: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
 98: {
 99:   PetscErrorCode   ierr;
100:   PetscMPIInt      size;
101:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
102: 
104:   if (lu->CleanUpSuperLU_Dist) {
105:     /* Deallocate SuperLU_DIST storage */
106:     if (lu->MatInputMode == GLOBAL) {
107:       Destroy_CompCol_Matrix_dist(&lu->A_sup);
108:     } else {
109:       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
110:       if ( lu->options.SolveInitialized ) {
111: #if defined(PETSC_USE_COMPLEX)
112:         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
113: #else
114:         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
115: #endif
116:       }
117:     }
118:     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
119:     ScalePermstructFree(&lu->ScalePermstruct);
120:     LUstructFree(&lu->LUstruct);
121: 
122:     /* Release the SuperLU_DIST process grid. */
123:     superlu_gridexit(&lu->grid);
124: 
125:     MPI_Comm_free(&(lu->comm_superlu));
126:   }

128:   MPI_Comm_size(((PetscObject)A)->comm,&size);
129:   if (size == 1) {
130:     MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);
131:   } else {
132:     MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);
133:   }
134:   (*A->ops->destroy)(A);
135:   return(0);
136: }

140: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
141: {
142:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
143:   PetscErrorCode   ierr;
144:   PetscMPIInt      size;
145:   PetscInt         m=A->rmap.N, N=A->cmap.N;
146:   SuperLUStat_t    stat;
147:   double           berr[1];
148:   PetscScalar      *bptr;
149:   PetscInt         info, nrhs=1;
150:   Vec              x_seq;
151:   IS               iden;
152:   VecScatter       scat;
153: 
155:   MPI_Comm_size(((PetscObject)A)->comm,&size);
156:   if (size > 1) {
157:     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
158:       VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
159:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
160:       VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
161:       ISDestroy(iden);

163:       VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
164:       VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
165:       VecGetArray(x_seq,&bptr);
166:     } else { /* distributed mat input */
167:       VecCopy(b_mpi,x);
168:       VecGetArray(x,&bptr);
169:     }
170:   } else { /* size == 1 */
171:     VecCopy(b_mpi,x);
172:     VecGetArray(x,&bptr);
173:   }
174: 
175:   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");

177:   PStatInit(&stat);        /* Initialize the statistics variables. */
178:   if (lu->MatInputMode == GLOBAL) {
179: #if defined(PETSC_USE_COMPLEX)
180:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182: #else
183:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185: #endif 
186:   } else { /* distributed mat input */
187: #if defined(PETSC_USE_COMPLEX)
188:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190:     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191: #else
192:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194:     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195: #endif
196:   }
197:   if (lu->options.PrintStat) {
198:      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199:   }
200:   PStatFree(&stat);
201: 
202:   if (size > 1) {
203:     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204:       VecRestoreArray(x_seq,&bptr);
205:       VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
206:       VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
207:       VecScatterDestroy(scat);
208:       VecDestroy(x_seq);
209:     } else {
210:       VecRestoreArray(x,&bptr);
211:     }
212:   } else {
213:     VecRestoreArray(x,&bptr);
214:   }
215:   return(0);
216: }

220: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221: {
222:   Mat              *tseq,A_seq = PETSC_NULL;
223:   Mat_SeqAIJ       *aa,*bb;
224:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225:   PetscErrorCode   ierr;
226:   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227:                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228:   PetscMPIInt      size,rank;
229:   SuperLUStat_t    stat;
230:   double           *berr=0;
231:   IS               isrow;
232:   PetscLogDouble   time0,time,time_min,time_max;
233:   Mat              F_diag=PETSC_NULL;
234: #if defined(PETSC_USE_COMPLEX)
235:   doublecomplex    *av, *bv;
236: #else
237:   double           *av, *bv;
238: #endif

241:   MPI_Comm_size(((PetscObject)A)->comm,&size);
242:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);
243: 
244:   if (lu->options.PrintStat) { /* collect time for mat conversion */
245:     MPI_Barrier(((PetscObject)A)->comm);
246:     PetscGetTime(&time0);
247:   }

249:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250:     if (size > 1) { /* convert mpi A to seq mat A */
251:       ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
252:       MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
253:       ISDestroy(isrow);
254: 
255:       A_seq = *tseq;
256:       PetscFree(tseq);
257:       aa =  (Mat_SeqAIJ*)A_seq->data;
258:     } else {
259:       aa =  (Mat_SeqAIJ*)A->data;
260:     }

262:     /* Convert Petsc NR matrix to SuperLU_DIST NC. 
263:        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264:     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265:       if (lu->FactPattern == SamePattern_SameRowPerm){
266:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269:       } else {
270:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271:         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272:         lu->options.Fact = SamePattern;
273:       }
274:     }
275: #if defined(PETSC_USE_COMPLEX)
276:     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277: #else
278:     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279: #endif

281:     /* Create compressed column matrix A_sup. */
282: #if defined(PETSC_USE_COMPLEX)
283:     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284: #else
285:     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286: #endif
287:   } else { /* distributed mat input */
288:     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289:     aa=(Mat_SeqAIJ*)(mat->A)->data;
290:     bb=(Mat_SeqAIJ*)(mat->B)->data;
291:     ai=aa->i; aj=aa->j;
292:     bi=bb->i; bj=bb->j;
293: #if defined(PETSC_USE_COMPLEX)
294:     av=(doublecomplex*)aa->a;
295:     bv=(doublecomplex*)bb->a;
296: #else
297:     av=aa->a;
298:     bv=bb->a;
299: #endif
300:     rstart = A->rmap.rstart;
301:     nz     = aa->nz + bb->nz;
302:     garray = mat->garray;
303: 
304:     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305: #if defined(PETSC_USE_COMPLEX)
306:       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307: #else
308:       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309: #endif
310:     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311:       if (lu->FactPattern == SamePattern_SameRowPerm){
312:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314:       } else {
315:         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316:         lu->options.Fact = SamePattern;
317:       }
318:     }
319:     nz = 0; irow = rstart;
320:     for ( i=0; i<m; i++ ) {
321:       lu->row[i] = nz;
322:       countA = ai[i+1] - ai[i];
323:       countB = bi[i+1] - bi[i];
324:       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325:       bjj = bj + bi[i];

327:       /* B part, smaller col index */
328:       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329:       jB = 0;
330:       for (j=0; j<countB; j++){
331:         jcol = garray[bjj[j]];
332:         if (jcol > colA_start) {
333:           jB = j;
334:           break;
335:         }
336:         lu->col[nz] = jcol;
337:         lu->val[nz++] = *bv++;
338:         if (j==countB-1) jB = countB;
339:       }

341:       /* A part */
342:       for (j=0; j<countA; j++){
343:         lu->col[nz] = rstart + ajj[j];
344:         lu->val[nz++] = *av++;
345:       }

347:       /* B part, larger col index */
348:       for (j=jB; j<countB; j++){
349:         lu->col[nz] = garray[bjj[j]];
350:         lu->val[nz++] = *bv++;
351:       }
352:     }
353:     lu->row[m] = nz;
354: #if defined(PETSC_USE_COMPLEX)
355:     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357: #else
358:     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360: #endif
361:   }
362:   if (lu->options.PrintStat) {
363:     PetscGetTime(&time);
364:     time0 = time - time0;
365:   }

367:   /* Factor the matrix. */
368:   PStatInit(&stat);   /* Initialize the statistics variables. */

370:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371: #if defined(PETSC_USE_COMPLEX)
372:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374: #else
375:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377: #endif 
378:   } else { /* distributed mat input */
379: #if defined(PETSC_USE_COMPLEX)
380:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383: #else
384:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387: #endif
388:   }

390:   if (lu->MatInputMode == GLOBAL && size > 1){
391:     MatDestroy(A_seq);
392:   }

394:   if (lu->options.PrintStat) {
395:     if (size > 1){
396:       MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
397:       MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
398:       MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
399:       time = time/size; /* average time */
400:       if (!rank) {
401:         PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);
402:       }
403:     } else {
404:       PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);
405:     }
406:     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
407:   }
408:   PStatFree(&stat);
409:   if (size > 1){
410:     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
411:     F_diag->assembled = PETSC_TRUE;
412:   }
413:   (*F)->assembled  = PETSC_TRUE;
414:   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
415:   return(0);
416: }

418: /* Note the Petsc r and c permutations are ignored */
421: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
422: {
423:   Mat               B;
424:   Mat_SuperLU_DIST  *lu;
425:   PetscErrorCode    ierr;
426:   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
427:   PetscMPIInt       size;
428:   superlu_options_t options;
429:   PetscTruth        flg;
430:   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
431:   const char        *prtype[] = {"LargeDiag","NATURAL"};
432:   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};

435:   /* Create the factorization matrix */
436:   MatCreate(((PetscObject)A)->comm,&B);
437:   MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);
438:   MatSetType(B,((PetscObject)A)->type_name);
439:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
440:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

442:   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
443:   B->ops->solve            = MatSolve_SuperLU_DIST;
444:   B->factor                = FACTOR_LU;

446:   lu = (Mat_SuperLU_DIST*)(B->spptr);

448:   /*   Set the default input options:
449:         options.Fact = DOFACT;
450:         options.Equil = YES;
451:         options.ColPerm = MMD_AT_PLUS_A;
452:         options.RowPerm = LargeDiag;
453:         options.ReplaceTinyPivot = YES;
454:         options.Trans = NOTRANS;
455:         options.IterRefine = DOUBLE;
456:         options.SolveInitialized = NO;
457:         options.RefineInitialized = NO;
458:         options.PrintStat = YES;
459:   */
460:   set_default_options_dist(&options);

462:   MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));
463:   MPI_Comm_size(((PetscObject)A)->comm,&size);
464:   /* Default num of process columns and rows */
465:   lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size));
466:   if (!lu->npcol) lu->npcol = 1;
467:   while (lu->npcol > 0) {
468:     lu->nprow = (PetscMPIInt)(size/lu->npcol);
469:     if (size == lu->nprow * lu->npcol) break;
470:     lu->npcol --;
471:   }
472: 
473:   PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
474:     PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
475:     PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
476:     if (size != lu->nprow * lu->npcol)
477:       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
478: 
479:     lu->MatInputMode = DISTRIBUTED;
480:     PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);
481:     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;

483:     PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
484:     if (!flg) {
485:       options.Equil = NO;
486:     }

488:     PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
489:     if (flg) {
490:       switch (indx) {
491:       case 0:
492:         options.RowPerm = LargeDiag;
493:         break;
494:       case 1:
495:         options.RowPerm = NOROWPERM;
496:         break;
497:       }
498:     }

500:     PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);
501:     if (flg) {
502:       switch (indx) {
503:       case 0:
504:         options.ColPerm = MMD_AT_PLUS_A;
505:         break;
506:       case 1:
507:         options.ColPerm = NATURAL;
508:         break;
509:       case 2:
510:         options.ColPerm = MMD_ATA;
511:         break;
512:       }
513:     }

515:     PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
516:     if (!flg) {
517:       options.ReplaceTinyPivot = NO;
518:     }

520:     lu->FactPattern = SamePattern;
521:     PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
522:     if (flg) {
523:       switch (indx) {
524:       case 0:
525:         lu->FactPattern = SamePattern;
526:         break;
527:       case 1:
528:         lu->FactPattern = SamePattern_SameRowPerm;
529:         break;
530:       }
531:     }
532: 
533:     options.IterRefine = NOREFINE;
534:     PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
535:     if (flg) {
536:       options.IterRefine = DOUBLE;
537:     }

539:     if (PetscLogPrintInfo) {
540:       options.PrintStat = YES;
541:     } else {
542:       options.PrintStat = NO;
543:     }
544:     PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
545:                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
546:   PetscOptionsEnd();

548:   /* Initialize the SuperLU process grid. */
549:   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);

551:   /* Initialize ScalePermstruct and LUstruct. */
552:   ScalePermstructInit(M, N, &lu->ScalePermstruct);
553:   LUstructInit(M, N, &lu->LUstruct);

555:   lu->options             = options;
556:   lu->options.Fact        = DOFACT;
557:   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
558:   *F = B;
559:   return(0);
560: }

564: PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
565:   PetscErrorCode   ierr;
566:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);

569:   (*lu->MatAssemblyEnd)(A,mode);
570:   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
571:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
572:   return(0);
573: }

577: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
578: {
579:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
580:   superlu_options_t options;
581:   PetscErrorCode    ierr;

584:   /* check if matrix is superlu_dist type */
585:   if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);

587:   options = lu->options;
588:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
589:   PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
590:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
591:   PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);
592:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
593:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
594:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
595:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
596:   if (options.ColPerm == NATURAL) {
597:     PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");
598:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
599:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");
600:   } else if (options.ColPerm == MMD_ATA) {
601:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");
602:   } else {
603:     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
604:   }
605: 
606:   if (lu->FactPattern == SamePattern){
607:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");
608:   } else {
609:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");
610:   }
611:   return(0);
612: }

616: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
617: {
618:   PetscErrorCode    ierr;
619:   PetscTruth        iascii;
620:   PetscViewerFormat format;
621:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);

624:   (*lu->MatView)(A,viewer);

626:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
627:   if (iascii) {
628:     PetscViewerGetFormat(viewer,&format);
629:     if (format == PETSC_VIEWER_ASCII_INFO) {
630:       MatFactorInfo_SuperLU_DIST(A,viewer);
631:     }
632:   }
633:   return(0);
634: }


640: PetscErrorCode  MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
641: {
642:   /* This routine is only called to convert to MATSUPERLU_DIST */
643:   /* from MATSEQAIJ if A has a single process communicator */
644:   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
645:   PetscErrorCode   ierr;
646:   PetscMPIInt      size;
647:   MPI_Comm         comm;
648:   Mat              B=*newmat;
649:   Mat_SuperLU_DIST *lu;

652:   PetscNewLog(B,Mat_SuperLU_DIST,&lu);
653:   if (reuse == MAT_INITIAL_MATRIX) {
654:     MatDuplicate(A,MAT_COPY_VALUES,&B);
655:     lu->MatDuplicate         = B->ops->duplicate;
656:     lu->MatView              = B->ops->view;
657:     lu->MatAssemblyEnd       = B->ops->assemblyend;
658:     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
659:     lu->MatDestroy           = B->ops->destroy;
660:   } else {
661:     lu->MatDuplicate         = A->ops->duplicate;
662:     lu->MatView              = A->ops->view;
663:     lu->MatAssemblyEnd       = A->ops->assemblyend;
664:     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
665:     lu->MatDestroy           = A->ops->destroy;
666:   }
667:   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;

669:   B->spptr                 = (void*)lu;
670:   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
671:   B->ops->view             = MatView_SuperLU_DIST;
672:   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
673:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
674:   B->ops->destroy          = MatDestroy_SuperLU_DIST;

676:   PetscObjectGetComm((PetscObject)A,&comm);
677:   MPI_Comm_size(comm,&size);
678:   if (size == 1) {
679:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
680:     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);
681:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
682:     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);
683:   } else {
684:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
685:                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);
686:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
687:                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);
688:   }
689:   PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");
690:   PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);
691:   *newmat = B;
692:   return(0);
693: }

698: PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
699:   PetscErrorCode   ierr;
700:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;

703:   (*lu->MatDuplicate)(A,op,M);
704:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));
705:   return(0);
706: }

708: /*MC
709:   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 
710:   via the external package SuperLU_DIST.

712:   If SuperLU_DIST is installed (see the manual for
713:   instructions on how to declare the existence of external packages),
714:   a matrix type can be constructed which invokes SuperLU_DIST solvers.
715:   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 
716:   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
717:   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!

719:   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
720:   and from MATMPIAIJ otherwise.  As a result, for single process communicators, 
721:   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 
722:   for communicators controlling multiple processes.  It is recommended that you call both of
723:   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
724:   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
725:   without data copy; but this MUST be called AFTER the matrix values are set. 

727:   

729:   Options Database Keys:
730: + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
731: . -mat_superlu_dist_r <n> - number of rows in processor partition
732: . -mat_superlu_dist_c <n> - number of columns in processor partition
733: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
734: . -mat_superlu_dist_equil - equilibrate the matrix
735: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
736: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
737: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
738: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
739: . -mat_superlu_dist_iterrefine - use iterative refinement
740: - -mat_superlu_dist_statprint - print factorization information

742:    Level: beginner

744: .seealso: PCLU
745: M*/

750: PetscErrorCode  MatCreate_SuperLU_DIST(Mat A)
751: {
753:   PetscMPIInt    size;

756:   MPI_Comm_size(((PetscObject)A)->comm,&size);
757:   if (size == 1) {
758:     MatSetType(A,MATSEQAIJ);
759:   } else {
760:     MatSetType(A,MATMPIAIJ);
761:     /*  A_diag = 0x0 ???  -- do we need it?
762:     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
763:     MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);
764:     */
765:   }
766:   MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);
767:   return(0);
768: }