Actual source code: superlu_dist.c

  1: /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
  2: /* 
  3:         Provides an interface to the SuperLU_DIST sparse solver
  4: */

 6:  #include src/mat/impls/aij/seq/aij.h
 7:  #include src/mat/impls/aij/mpi/mpiaij.h
  8: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
  9: #include "stdlib.h"
 10: #endif

 12: #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)

 14: EXTERN_C_BEGIN
 15: #if defined(PETSC_USE_COMPLEX)
 16: #include "superlu_zdefs.h"
 17: #else
 18: #include "superlu_ddefs.h"
 19: #endif
 20: EXTERN_C_END

 22: typedef struct {
 23:   int                     nprow,npcol;
 24:   gridinfo_t              grid;
 25:   superlu_options_t       options;
 26:   SuperMatrix             A_sup;
 27:   ScalePermstruct_t       ScalePermstruct;
 28:   LUstruct_t              LUstruct;
 29:   int                     StatPrint;
 30: } Mat_MPIAIJ_SuperLU_DIST;

 32: extern int MatDestroy_MPIAIJ(Mat);
 33: extern int MatDestroy_SeqAIJ(Mat);

 35: #if !defined(PETSC_HAVE_SUPERLU)
 36: /* SuperLU function: Convert a row compressed storage into a column compressed storage. */
 37: #if !defined(PETSC_USE_COMPLEX)
 38: #undef __FUNCT__  
 40: void dCompRow_to_CompCol(int m, int n, int nnz,
 41:                     double *a, int *colind, int *rowptr,
 42:                     double **at, int_t **rowind, int_t **colptr)
 43: {
 44:     int i, j, col, relpos, *marker;

 46:     /* Allocate storage for another copy of the matrix. */
 47:     *at = (double *) doubleMalloc_dist(nnz);
 48:     *rowind = (int *) intMalloc_dist(nnz);
 49:     *colptr = (int *) intMalloc_dist(n+1);
 50:     marker = (int *) intCalloc_dist(n);

 52:     /* Get counts of each column of A, and set up column pointers */
 53:     for (i = 0; i < m; ++i)
 54:         for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
 55:     (*colptr)[0] = 0;
 56:     for (j = 0; j < n; ++j) {
 57:         (*colptr)[j+1] = (*colptr)[j] + marker[j];
 58:         marker[j] = (*colptr)[j];
 59:     }

 61:     /* Transfer the matrix into the compressed column storage. */
 62:     for (i = 0; i < m; ++i) {
 63:         for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
 64:             col = colind[j];
 65:             relpos = marker[col];
 66:             (*rowind)[relpos] = i;
 67:             (*at)[relpos] = a[j];
 68:             ++marker[col];
 69:         }
 70:     }

 72:     SUPERLU_FREE(marker);
 73: }
 74: #else
 75: void zCompRow_to_CompCol(int m, int n, int nnz,
 76:                     doublecomplex *a, int *colind, int *rowptr,
 77:                     doublecomplex **at, int **rowind, int **colptr)
 78: {
 79:     register int i, j, col, relpos;
 80:     int *marker;
 81: 
 82:     /* Allocate storage for another copy of the matrix. */
 83:     *at = (doublecomplex *) doublecomplexMalloc_dist(nnz);
 84:     *rowind = (int *) intMalloc_dist(nnz);
 85:     *colptr = (int *) intMalloc_dist(n+1);
 86:     marker = (int *) intCalloc_(n);
 87: 
 88:     /* Get counts of each column of A, and set up column pointers */
 89:     for (i = 0; i < m; ++i)
 90:         for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
 91:     (*colptr)[0] = 0;
 92:     for (j = 0; j < n; ++j) {
 93:         (*colptr)[j+1] = (*colptr)[j] + marker[j];
 94:         marker[j] = (*colptr)[j];
 95:     }
 96: 
 97:     /* Transfer the matrix into the compressed column storage. */
 98:     for (i = 0; i < m; ++i) {
 99:         for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
100:             col = colind[j];
101:             relpos = marker[col];
102:             (*rowind)[relpos] = i;
103:             (*at)[relpos] = a[j];
104:             ++marker[col];
105:         }
106:     }

108:     SUPERLU_FREE(marker);
109: }
110: #endif
111: #else
112: EXTERN_C_BEGIN
113: #if defined(PETSC_USE_COMPLEX)
114: extern void zCompRow_to_CompCol(int,int,int,doublecomplex*,int*,int*,doublecomplex**,int**,int**);
115: #else
116: extern void dCompRow_to_CompCol(int,int,int,double*,       int*,int*,double**,int_t**,int_t**);
117: #endif
118: EXTERN_C_END
119: #endif /* PETSC_HAVE_SUPERLU*/

121: #undef __FUNCT__  
123: int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A)
124: {
125:   Mat_MPIAIJ              *a  = (Mat_MPIAIJ*)A->data;
126:   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
127:   int                     ierr, size=a->size;
128: 
130:   /* Deallocate SuperLU_DIST storage */
131: #if defined(PETSC_USE_COMPLEX)
132:   Destroy_CompCol_Matrix(&lu->A_sup);
133: #else
134:   Destroy_CompCol_Matrix_dist(&lu->A_sup);
135: #endif
136:   Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
137:   ScalePermstructFree(&lu->ScalePermstruct);
138:   LUstructFree(&lu->LUstruct);

140:   /* Release the SuperLU_DIST process grid. */
141:   superlu_gridexit(&lu->grid);

143:   PetscFree(lu);
144: 
145:   if (size == 1){
146:     MatDestroy_SeqAIJ(A);
147:   } else {
148:     MatDestroy_MPIAIJ(A);
149:   }
150: 
151:   return(0);
152: }

154: #undef __FUNCT__  
156: int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
157: {
158:   Mat_MPIAIJ              *aa = (Mat_MPIAIJ*)A->data;
159:   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
160:   int                     ierr, size=aa->size;
161:   int                     m=A->M, N=A->N;
162:   superlu_options_t       options=lu->options;
163:   SuperLUStat_t           stat;
164:   double                  berr[1];

166:   PetscScalar             *bptr;

168:   int                     info, nrhs=1;
169:   Vec                     x_seq;
170:   IS                      iden;
171:   VecScatter              scat;
172:   PetscLogDouble          time0,time,time_min,time_max;
173: 
175:   if (size > 1) {  /* convert mpi vector b to seq vector x_seq */
176:     VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
177:     ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
178:     VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
179:     ISDestroy(iden);

181:     VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
182:     VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
183:     VecGetArray(x_seq,&bptr);
184:   } else {
185:     VecCopy(b_mpi,x);
186:     VecGetArray(x,&bptr);
187:   }
188: 
189:   options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/

191:   PStatInit(&stat);        /* Initialize the statistics variables. */
192:   if (lu->StatPrint) {
193:     MPI_Barrier(A->comm); /* to be removed */
194:     PetscGetTime(&time0);  /* to be removed */
195:   }
196: #if defined(PETSC_USE_COMPLEX)
197:   pzgssvx_ABglobal(&options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
198:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
199: #else
200:   pdgssvx_ABglobal(&options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
201:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
202: #endif 
203:   if (lu->StatPrint) {
204:     PetscGetTime(&time);  /* to be removed */
205:      PStatPrint(&stat, &lu->grid);     /* Print the statistics. */
206:   }
207:   PStatFree(&stat);
208: 
209:   if (size > 1) {    /* convert seq x to mpi x */
210:     VecRestoreArray(x_seq,&bptr);
211:     VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
212:     VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
213:     VecScatterDestroy(scat);
214:     VecDestroy(x_seq);
215:   } else {
216:     VecRestoreArray(x,&bptr);
217:   }
218:   if (lu->StatPrint) {
219:     time0 = time - time0;
220:     MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
221:     MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
222:     MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
223:     time = time/size; /* average time */
224:     PetscPrintf(A->comm, "  Time for superlu_dist solve (max/min/avg): %g / %g / %gnn",time_max,time_min,time);
225:   }

227:   return(0);
228: }

230: #undef __FUNCT__   
232: int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F)
233: {
234:   Mat_MPIAIJ              *fac = (Mat_MPIAIJ*)(*F)->data;
235:   Mat                     *tseq,A_seq = PETSC_NULL;
236:   Mat_SeqAIJ              *aa;
237:   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr;
238:   int                     M=A->M,N=A->N,info,ierr,size=fac->size,i;
239:   SuperLUStat_t           stat;
240:   double                  *berr=0;
241: #if defined(PETSC_USE_COMPLEX)
242:   doublecomplex           *a,*bptr=0;
243: #else
244:   double                  *a,*bptr=0;
245: #endif
246:   int_t                   *asub, *xa;
247:   SuperMatrix             A_sup;
248:   IS                      isrow;
249:   PetscLogDouble          time0[2],time[2],time_min[2],time_max[2];

252:   if (lu->StatPrint) {
253:     MPI_Barrier(A->comm);
254:     PetscGetTime(&time0[0]);
255:   }

257:   if (size > 1) { /* convert mpi A to seq mat A */
258:     ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
259:     MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
260:     ISDestroy(isrow);
261: 
262:     A_seq = *tseq;
263:     PetscFree(tseq);
264:     aa =  (Mat_SeqAIJ*)A_seq->data;
265:   } else {
266:     aa =  (Mat_SeqAIJ*)A->data;
267:   }

269:   /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
270: #if defined(PETSC_USE_COMPLEX)
271:   zallocateA_dist(N, aa->nz, &a, &asub, &xa);
272:   zCompRow_to_CompCol(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&a,&asub, &xa);
273: #else
274:   dallocateA_dist(N, aa->nz, &a, &asub, &xa);
275:   dCompRow_to_CompCol(M,N,aa->nz,aa->a,aa->j,aa->i,&a, &asub, &xa);
276: #endif
277:   if (lu->StatPrint) {
278:     PetscGetTime(&time[0]);
279:     time0[0] = time[0] - time0[0];
280:   }

282:   /* Create compressed column matrix A_sup. */
283: #if defined(PETSC_USE_COMPLEX)
284:   zCreate_CompCol_Matrix(&A_sup, M, N, aa->nz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE);
285: #else
286:   dCreate_CompCol_Matrix_dist(&A_sup, M, N, aa->nz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
287: #endif
288:   /* Factor the matrix. */
289:   PStatInit(&stat);                /* Initialize the statistics variables. */

291:   if (lu->StatPrint) {
292:     MPI_Barrier(A->comm);
293:     PetscGetTime(&time0[1]);
294:   }
295: #if defined(PETSC_USE_COMPLEX)
296:   pzgssvx_ABglobal(&lu->options, &A_sup, &lu->ScalePermstruct, bptr, M, 0,
297:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
298: #else
299:   pdgssvx_ABglobal(&lu->options, &A_sup, &lu->ScalePermstruct, bptr, M, 0,
300:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
301: #endif 
302:   if (lu->StatPrint) {
303:     PetscGetTime(&time[1]);  /* to be removed */
304:     time0[1] = time[1] - time0[1];
305:     if (lu->StatPrint) PStatPrint(&stat, &lu->grid);        /* Print the statistics. */
306:   }
307:   PStatFree(&stat);

309:   lu->A_sup        = A_sup;
310:   lu->options.Fact = SamePattern; /* Sparsity pattern of A and perm_c can be reused. */
311:   if (size > 1){
312:     MatDestroy(A_seq);
313:   }

315:   if (lu->StatPrint) {
316:     MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
317:     MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
318:     MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
319:     for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
320:     PetscPrintf(A->comm, "  Time for mat conversion (max/min/avg):    %g / %g / %gn",time_max[0],time_min[0],time[0]);
321:     PetscPrintf(A->comm, "  Time for superlu_dist fact (max/min/avg): %g / %g / %gnn",time_max[1],time_min[1],time[1]);
322:   }
323:   (*F)->assembled             = PETSC_TRUE;
324:   return(0);
325: }

327: /* Note the Petsc r and c permutations are ignored */
328: #undef __FUNCT__  
330: int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
331: {
332:   Mat_MPIAIJ              *fac;
333:   Mat_MPIAIJ_SuperLU_DIST *lu;
334:   int                     ierr,M=A->M,N=A->N,size;
335:   gridinfo_t              grid;
336:   superlu_options_t       options;
337:   ScalePermstruct_t       ScalePermstruct;
338:   LUstruct_t              LUstruct;
339:   char                    buff[32];
340:   PetscTruth              flg;
341:   char                    *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
342:   char                    *prtype[] = {"LargeDiag","NATURAL"};
344: 
345:   PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);

347:   /* Create the factorization matrix F */
348:   MatCreateMPIAIJ(A->comm,A->m,A->n,M,N,0,PETSC_NULL,0,PETSC_NULL,F);

350:   (*F)->ops->lufactornumeric  = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
351:   (*F)->ops->solve            = MatSolve_MPIAIJ_SuperLU_DIST;
352:   (*F)->ops->destroy          = MatDestroy_MPIAIJ_SuperLU_DIST;
353:   (*F)->factor                = FACTOR_LU;
354:   (*F)->spptr                  = (void*)lu;
355:   fac                         = (Mat_MPIAIJ*)(*F)->data;

357:   /* Set the input options */
358:   set_default_options(&options);

360:   MPI_Comm_size(A->comm,&size);
361:   lu->nprow = size/2;               /* Default process rows.      */
362:   if (lu->nprow == 0) lu->nprow = 1;
363:   lu->npcol = size/lu->nprow;           /* Default process columns.   */

365:   PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");
366: 
367:     PetscOptionsInt("-mat_aij_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
368:     PetscOptionsInt("-mat_aij_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
369:     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
370: 
371:     PetscOptionsLogical("-mat_aij_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
372:     if (!flg) {
373:       options.Equil = NO;
374:     }

376:     PetscOptionsEList("-mat_aij_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);
377:     while (flg) {
378:       PetscStrcmp(buff,"LargeDiag",&flg);
379:       if (flg) {
380:         options.RowPerm = LargeDiag;
381:         break;
382:       }
383:       PetscStrcmp(buff,"NATURAL",&flg);
384:       if (flg) {
385:         options.RowPerm = NOROWPERM;
386:         break;
387:       }
388:       SETERRQ1(1,"Unknown row permutation %s",buff);
389:     }

391:     PetscOptionsEList("-mat_aij_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);
392:     while (flg) {
393:       PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);
394:       if (flg) {
395:         options.ColPerm = MMD_AT_PLUS_A;
396:         break;
397:       }
398:       PetscStrcmp(buff,"NATURAL",&flg);
399:       if (flg) {
400:         options.ColPerm = NATURAL;
401:         break;
402:       }
403:       PetscStrcmp(buff,"MMD_ATA",&flg);
404:       if (flg) {
405:         options.ColPerm = MMD_ATA;
406:         break;
407:       }
408:       PetscStrcmp(buff,"COLAMD",&flg);
409:       if (flg) {
410:         options.ColPerm = COLAMD;
411:         break;
412:       }
413:       SETERRQ1(1,"Unknown column permutation %s",buff);
414:     }

416:     PetscOptionsLogical("-mat_aij_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
417:     if (!flg) {
418:       options.ReplaceTinyPivot = NO;
419:     }

421:     options.IterRefine = NOREFINE;
422:     PetscOptionsLogical("-mat_aij_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
423:     if (flg) {
424:       options.IterRefine = DOUBLE;
425:     }

427:     if (PetscLogPrintInfo) {
428:       lu->StatPrint = (int)PETSC_TRUE;
429:     } else {
430:       lu->StatPrint = (int)PETSC_FALSE;
431:     }
432:     PetscOptionsLogical("-mat_aij_superlu_dist_statprint","Print factorization information","None",
433:                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);
434:   PetscOptionsEnd();

436:   /* Initialize the SuperLU process grid. */
437:   superlu_gridinit(A->comm, lu->nprow, lu->npcol, &grid);

439:   /* Initialize ScalePermstruct and LUstruct. */
440:   ScalePermstructInit(M, N, &ScalePermstruct);
441:   LUstructInit(M, N, &LUstruct);

443:   lu->ScalePermstruct = ScalePermstruct;
444:   lu->LUstruct        = LUstruct;
445:   lu->options         = options;
446:   lu->grid            = grid;
447:   fac->size           = size;

449:   return(0);
450: }

452: #undef __FUNCT__  
454: int MatUseSuperLU_DIST_MPIAIJ(Mat A)
455: {
457:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST;
458:   A->ops->lufactornumeric  = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
459:   return(0);
460: }

462: int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer)
463: {
464:   Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
465:   superlu_options_t       options;
466:   int                     ierr;
467:   char                    *colperm;

470:   /* check if matrix is superlu_dist type */
471:   if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) return(0);

473:   options = lu->options;
474:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:n");
475:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s n",(options.Equil != NO) ? "true": "false");
476:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s n",(options.ReplaceTinyPivot != NO) ? "true": "false");
477:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s n",(options.IterRefine == DOUBLE) ? "true": "false");
478:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d n",lu->nprow,lu->npcol);
479:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
480:   if (options.ColPerm == NATURAL) {
481:     colperm = "NATURAL";
482:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
483:     colperm = "MMD_AT_PLUS_A";
484:   } else if (options.ColPerm == MMD_ATA) {
485:     colperm = "MMD_ATA";
486:   } else if (options.ColPerm == COLAMD) {
487:     colperm = "COLAMD";
488:   } else {
489:     SETERRQ(1,"Unknown column permutation");
490:   }
491:   PetscViewerASCIIPrintf(viewer,"  Column permutation %s n",colperm);
492:   return(0);
493: }

495: #else

497: #undef __FUNCT__  
499: int MatUseSuperLU_DIST_MPIAIJ(Mat A)
500: {
502:   return(0);
503: }

505: #endif