Actual source code: mumps.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:     Provides an interface to the MUMPS sparse solver
  5: */
 6:  #include src/mat/impls/aij/seq/aij.h
 7:  #include src/mat/impls/aij/mpi/mpiaij.h
 8:  #include src/mat/impls/sbaij/seq/sbaij.h
 9:  #include src/mat/impls/sbaij/mpi/mpisbaij.h

 12: #if defined(PETSC_USE_COMPLEX)
 13: #include "zmumps_c.h"
 14: #else
 15: #include "dmumps_c.h" 
 16: #endif
 18: #define JOB_INIT -1
 19: #define JOB_END -2
 20: /* macros s.t. indices match MUMPS documentation */
 21: #define ICNTL(I) icntl[(I)-1] 
 22: #define CNTL(I) cntl[(I)-1] 
 23: #define INFOG(I) infog[(I)-1]
 24: #define INFO(I) info[(I)-1]
 25: #define RINFOG(I) rinfog[(I)-1]
 26: #define RINFO(I) rinfo[(I)-1]

 28: typedef struct {
 29: #if defined(PETSC_USE_COMPLEX)
 30:   ZMUMPS_STRUC_C id;
 31: #else
 32:   DMUMPS_STRUC_C id;
 33: #endif
 34:   MatStructure   matstruc;
 35:   PetscMPIInt    myid,size;
 36:   PetscInt       *irn,*jcn,sym,nSolve;
 37:   PetscScalar    *val;
 38:   MPI_Comm       comm_mumps;
 39:   VecScatter     scat_rhs, scat_sol;
 40:   PetscTruth     isAIJ,CleanUpMUMPS;
 41:   Vec            b_seq,x_seq;
 42:   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
 43:   PetscErrorCode (*MatView)(Mat,PetscViewer);
 44:   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
 45:   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
 46:   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
 47:   PetscErrorCode (*MatDestroy)(Mat);
 48:   PetscErrorCode (*specialdestroy)(Mat);
 49:   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
 50: } Mat_MUMPS;

 52: EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
 54: PetscErrorCode  MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
 56: /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
 57: /*
 58:   input: 
 59:     A       - matrix in mpiaij or mpisbaij (bs=1) format
 60:     shift   - 0: C style output triple; 1: Fortran style output triple.
 61:     valOnly - FALSE: spaces are allocated and values are set for the triple  
 62:               TRUE:  only the values in v array are updated
 63:   output:     
 64:     nnz     - dim of r, c, and v (number of local nonzero entries of A)
 65:     r, c, v - row and col index, matrix values (matrix triples) 
 66:  */
 67: PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
 68:   PetscInt       *ai, *aj, *bi, *bj, rstart,nz, *garray;
 70:   PetscInt       i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol;
 71:   PetscInt       *row,*col;
 72:   PetscScalar    *av, *bv,*val;
 73:   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;

 76:   if (mumps->isAIJ){
 77:     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
 78:     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
 79:     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
 80:     nz = aa->nz + bb->nz;
 81:     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
 82:     garray = mat->garray;
 83:     av=aa->a; bv=bb->a;
 84: 
 85:   } else {
 86:     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
 87:     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
 88:     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
 89:     if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs);
 90:     nz = aa->nz + bb->nz;
 91:     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
 92:     garray = mat->garray;
 93:     av=aa->a; bv=bb->a;
 94:   }

 96:   if (!valOnly){
 97:     PetscMalloc(nz*sizeof(PetscInt) ,&row);
 98:     PetscMalloc(nz*sizeof(PetscInt),&col);
 99:     PetscMalloc(nz*sizeof(PetscScalar),&val);
100:     *r = row; *c = col; *v = val;
101:   } else {
102:     row = *r; col = *c; val = *v;
103:   }
104:   *nnz = nz;

106:   jj = 0; irow = rstart;
107:   for ( i=0; i<m; i++ ) {
108:     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
109:     countA = ai[i+1] - ai[i];
110:     countB = bi[i+1] - bi[i];
111:     bjj = bj + bi[i];

113:     /* get jB, the starting local col index for the 2nd B-part */
114:     colA_start = rstart + ajj[0]; /* the smallest col index for A */
115:     j=-1;
116:     do {
117:       j++;
118:       if (j == countB) break;
119:       jcol = garray[bjj[j]];
120:     } while (jcol < colA_start);
121:     jB = j;
122: 
123:     /* B-part, smaller col index */
124:     colA_start = rstart + ajj[0]; /* the smallest col index for A */
125:     for (j=0; j<jB; j++){
126:       jcol = garray[bjj[j]];
127:       if (!valOnly){
128:         row[jj] = irow + shift; col[jj] = jcol + shift;

130:       }
131:       val[jj++] = *bv++;
132:     }
133:     /* A-part */
134:     for (j=0; j<countA; j++){
135:       if (!valOnly){
136:         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
137:       }
138:       val[jj++] = *av++;
139:     }
140:     /* B-part, larger col index */
141:     for (j=jB; j<countB; j++){
142:       if (!valOnly){
143:         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
144:       }
145:       val[jj++] = *bv++;
146:     }
147:     irow++;
148:   }
149: 
150:   return(0);
151: }

156: PetscErrorCode  MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
157: {
159:   Mat            B=*newmat;
160:   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
161:   void           (*f)(void);

164:   if (reuse == MAT_INITIAL_MATRIX) {
165:     MatDuplicate(A,MAT_COPY_VALUES,&B);
166:   }
167:   B->ops->duplicate              = mumps->MatDuplicate;
168:   B->ops->view                   = mumps->MatView;
169:   B->ops->assemblyend            = mumps->MatAssemblyEnd;
170:   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
171:   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
172:   B->ops->destroy                = mumps->MatDestroy;

174:   /* put back original composed preallocation function */
175:   PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);
176:   if (f) {
177:     PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);
178:   }
179:   PetscFree(mumps);
180:   A->spptr = PETSC_NULL;

182:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);
183:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);
184:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);
185:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);
186:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);
187:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);
188:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);
189:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);

191:   PetscObjectChangeTypeName((PetscObject)B,type);
192:   *newmat = B;
193:   return(0);
194: }

199: PetscErrorCode MatDestroy_MUMPS(Mat A)
200: {
201:   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
203:   PetscMPIInt    size=lu->size;
204:   PetscErrorCode (*specialdestroy)(Mat);
206:   if (lu->CleanUpMUMPS) {
207:     /* Terminate instance, deallocate memories */
208:     if (size > 1){
209:       PetscFree(lu->id.sol_loc);
210:       VecScatterDestroy(lu->scat_rhs);
211:       VecDestroy(lu->b_seq);
212:       VecScatterDestroy(lu->scat_sol);
213:       VecDestroy(lu->x_seq);
214:       PetscFree(lu->val);
215:     }
216:     lu->id.job=JOB_END;
217: #if defined(PETSC_USE_COMPLEX)
218:     zmumps_c(&lu->id);
219: #else
220:     dmumps_c(&lu->id);
221: #endif
222:     PetscFree(lu->irn);
223:     PetscFree(lu->jcn);
224:     MPI_Comm_free(&(lu->comm_mumps));
225:   }
226:   specialdestroy = lu->specialdestroy;
227:   (*specialdestroy)(A);
228:   (*A->ops->destroy)(A);
229:   return(0);
230: }

234: PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
235: {
237:   PetscMPIInt    size;

240:   MPI_Comm_size(((PetscObject)A)->comm,&size);
241:   if (size==1) {
242:     MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);
243:   } else {
244:     MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);
245:   }
246:   return(0);
247: }

251: PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
252: {
254:   PetscMPIInt    size;

257:   MPI_Comm_size(((PetscObject)A)->comm,&size);
258:   if (size==1) {
259:     MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);
260:   } else {
261:     MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);
262:   }
263:   return(0);
264: }

268: PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) {
269:   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
270:   PetscScalar    *array;
271:   Vec            x_seq;
272:   IS             is_iden,is_petsc;
274:   PetscInt       i;

277:   lu->id.nrhs = 1;
278:   x_seq = lu->b_seq;
279:   if (lu->size > 1){
280:     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
281:     VecScatterBegin(lu->scat_rhs,b,x_seq,INSERT_VALUES,SCATTER_FORWARD);
282:     VecScatterEnd(lu->scat_rhs,b,x_seq,INSERT_VALUES,SCATTER_FORWARD);
283:     if (!lu->myid) {VecGetArray(x_seq,&array);}
284:   } else {  /* size == 1 */
285:     VecCopy(b,x);
286:     VecGetArray(x,&array);
287:   }
288:   if (!lu->myid) { /* define rhs on the host */
289: #if defined(PETSC_USE_COMPLEX)
290:     lu->id.rhs = (mumps_double_complex*)array;
291: #else
292:     lu->id.rhs = array;
293: #endif
294:   }
295:   if (lu->size == 1){
296:     VecRestoreArray(x,&array);
297:   } else if (!lu->myid){
298:     VecRestoreArray(x_seq,&array);
299:   }

301:   if (lu->size > 1){
302:     /* distributed solution */
303:     lu->id.ICNTL(21) = 1;
304:     if (!lu->nSolve){
305:       /* Create x_seq=sol_loc for repeated use */
306:       PetscInt    lsol_loc;
307:       PetscScalar *sol_loc;
308:       lsol_loc = lu->id.INFO(23); /* length of sol_loc */
309:       PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);
310:       lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc);
311:       lu->id.lsol_loc = lsol_loc;
312: #if defined(PETSC_USE_COMPLEX)
313:       lu->id.sol_loc  = (ZMUMPS_DOUBLE *)sol_loc;
314: #else
315:       lu->id.sol_loc  = (DMUMPS_DOUBLE *)sol_loc;
316: #endif
317:       VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);
318:     }
319:   }

321:   /* solve phase */
322:   /*-------------*/
323:   lu->id.job = 3;
324: #if defined(PETSC_USE_COMPLEX)
325:   zmumps_c(&lu->id);
326: #else
327:   dmumps_c(&lu->id);
328: #endif
329:   if (lu->id.INFOG(1) < 0) {
330:     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
331:   }

333:   if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
334:     if (!lu->nSolve){ /* create scatter scat_sol */
335:       ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden); /* from */
336:       for (i=0; i<lu->id.lsol_loc; i++){
337:         lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
338:       }
339:       ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);  /* to */
340:       VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);
341:       ISDestroy(is_iden);
342:       ISDestroy(is_petsc);
343:     }
344:     VecScatterBegin(lu->scat_sol,lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
345:     VecScatterEnd(lu->scat_sol,lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
346:   }
347:   lu->nSolve++;
348:   return(0);
349: }

351: /* 
352:   input:
353:    F:        numeric factor
354:   output:
355:    nneg:     total number of negative pivots
356:    nzero:    0
357:    npos:     (global dimension of F) - nneg
358: */

362: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
363: {
364:   Mat_MUMPS      *lu =(Mat_MUMPS*)F->spptr;
366:   PetscMPIInt    size;

369:   MPI_Comm_size(((PetscObject)F)->comm,&size);
370:   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
371:   if (size > 1 && lu->id.ICNTL(13) != 1){
372:     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
373:   }
374:   if (nneg){
375:     if (!lu->myid){
376:       *nneg = lu->id.INFOG(12);
377:     }
378:     MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);
379:   }
380:   if (nzero) *nzero = 0;
381:   if (npos)  *npos  = F->rmap.N - (*nneg);
382:   return(0);
383: }

387: PetscErrorCode MatFactorNumeric_MUMPS(Mat A,MatFactorInfo *info,Mat *F)
388: {
389:   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
390:   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
392:   PetscInt       rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl;
393:   PetscTruth     valOnly,flg;
394:   Mat            F_diag;

397:   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
398:     (*F)->ops->solve    = MatSolve_MUMPS;

400:     /* Initialize a MUMPS instance */
401:     MPI_Comm_rank(((PetscObject)A)->comm, &lu->myid);
402:     MPI_Comm_size(((PetscObject)A)->comm,&lu->size);
403:     lua->myid = lu->myid; lua->size = lu->size;
404:     lu->id.job = JOB_INIT;
405:     MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_mumps));
406:     MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));

408:     /* Set mumps options */
409:     PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"MUMPS Options","Mat");
410:     lu->id.par=1;  /* host participates factorizaton and solve */
411:     lu->id.sym=lu->sym;
412:     if (lu->sym == 2){
413:       PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);
414:       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
415:     }
416: #if defined(PETSC_USE_COMPLEX)
417:     zmumps_c(&lu->id);
418: #else
419:     dmumps_c(&lu->id);
420: #endif
421: 
422:     if (lu->size == 1){
423:       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
424:     } else {
425:       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
426:     }

428:     icntl=-1;
429:     lu->id.ICNTL(4) = 0;  /* level of printing; overwrite mumps default ICNTL(4)=2 */
430:     PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);
431:     if ((flg && icntl > 0) || PetscLogPrintInfo) {
432:       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
433:     } else { /* no output */
434:       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
435:       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
436:       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
437:     }
438:     PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);
439:     icntl=-1;
440:     PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);
441:     if (flg) {
442:       if (icntl== 1){
443:         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
444:       } else {
445:         lu->id.ICNTL(7) = icntl;
446:       }
447:     }
448:     PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);
449:     PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);
450:     PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);
451:     PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);
452:     PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);
453:     PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);
454:     PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);

456:     PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);
457:     PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);
458:     PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);
459:     PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);
460:     PetscOptionsEnd();
461:   }

463:   /* define matrix A */
464:   switch (lu->id.ICNTL(18)){
465:   case 0:  /* centralized assembled matrix input (size=1) */
466:     if (!lu->myid) {
467:       if (lua->isAIJ){
468:         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
469:         nz               = aa->nz;
470:         ai = aa->i; aj = aa->j; lu->val = aa->a;
471:       } else {
472:         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
473:         nz                  =  aa->nz;
474:         ai = aa->i; aj = aa->j; lu->val = aa->a;
475:       }
476:       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
477:         PetscMalloc(nz*sizeof(PetscInt),&lu->irn);
478:         PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);
479:         nz = 0;
480:         for (i=0; i<M; i++){
481:           rnz = ai[i+1] - ai[i];
482:           while (rnz--) {  /* Fortran row/col index! */
483:             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
484:           }
485:         }
486:       }
487:     }
488:     break;
489:   case 3:  /* distributed assembled matrix input (size>1) */
490:     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
491:       valOnly = PETSC_FALSE;
492:     } else {
493:       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
494:     }
495:     MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);
496:     break;
497:   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
498:   }

500:   /* analysis phase */
501:   /*----------------*/
502:   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
503:     lu->id.job = 1;

505:     lu->id.n = M;
506:     switch (lu->id.ICNTL(18)){
507:     case 0:  /* centralized assembled matrix input */
508:       if (!lu->myid) {
509:         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
510:         if (lu->id.ICNTL(6)>1){
511: #if defined(PETSC_USE_COMPLEX)
512:           lu->id.a = (mumps_double_complex*)lu->val;
513: #else
514:           lu->id.a = lu->val;
515: #endif
516:         }
517:       }
518:       break;
519:     case 3:  /* distributed assembled matrix input (size>1) */
520:       lu->id.nz_loc = nnz;
521:       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
522:       if (lu->id.ICNTL(6)>1) {
523: #if defined(PETSC_USE_COMPLEX)
524:         lu->id.a_loc = (mumps_double_complex*)lu->val;
525: #else
526:         lu->id.a_loc = lu->val;
527: #endif
528:       }
529:       /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
530:       IS  is_iden;
531:       Vec b;
532:       if (!lu->myid){
533:         VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);
534:         ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);
535:       } else {
536:         VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);
537:         ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);
538:       }
539:       VecCreate(((PetscObject)A)->comm,&b);
540:       VecSetSizes(b,A->rmap.n,PETSC_DECIDE);
541:       VecSetFromOptions(b);

543:       VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);
544:       ISDestroy(is_iden);
545:       VecDestroy(b);
546:       break;
547:     }
548: #if defined(PETSC_USE_COMPLEX)
549:     zmumps_c(&lu->id);
550: #else
551:     dmumps_c(&lu->id);
552: #endif
553:     if (lu->id.INFOG(1) < 0) {
554:       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
555:     }
556:   }

558:   /* numerical factorization phase */
559:   /*-------------------------------*/
560:   lu->id.job = 2;
561:   if(!lu->id.ICNTL(18)) {
562:     if (!lu->myid) {
563: #if defined(PETSC_USE_COMPLEX)
564:       lu->id.a = (mumps_double_complex*)lu->val;
565: #else
566:       lu->id.a = lu->val;
567: #endif
568:     }
569:   } else {
570: #if defined(PETSC_USE_COMPLEX)
571:     lu->id.a_loc = (mumps_double_complex*)lu->val;
572: #else
573:     lu->id.a_loc = lu->val;
574: #endif
575:   }
576: #if defined(PETSC_USE_COMPLEX)
577:   zmumps_c(&lu->id);
578: #else
579:   dmumps_c(&lu->id);
580: #endif
581:   if (lu->id.INFOG(1) < 0) {
582:     if (lu->id.INFO(1) == -13) {
583:       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
584:     } else {
585:       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
586:     }
587:   }

589:   if (!lu->myid && lu->id.ICNTL(16) > 0){
590:     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
591:   }

593:   if (lu->size > 1){
594:     if ((*F)->factor == FACTOR_LU){
595:       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
596:     } else {
597:       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
598:     }
599:     F_diag->assembled = PETSC_TRUE;
600:     if (lu->nSolve){
601:       VecScatterDestroy(lu->scat_sol);
602:       PetscFree(lu->id.sol_loc);
603:       VecDestroy(lu->x_seq);
604:     }
605:   }
606:   (*F)->assembled   = PETSC_TRUE;
607:   lu->matstruc      = SAME_NONZERO_PATTERN;
608:   lu->CleanUpMUMPS  = PETSC_TRUE;
609:   lu->nSolve        = 0;
610:   return(0);
611: }

613: /* Note the Petsc r and c permutations are ignored */
616: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
617:   Mat            B;
618:   Mat_MUMPS      *lu;

622:   /* Create the factorization matrix */
623:   MatCreate(((PetscObject)A)->comm,&B);
624:   MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);
625:   MatSetType(B,((PetscObject)A)->type_name);
626:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
627:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

629:   B->ops->lufactornumeric = MatFactorNumeric_MUMPS;
630:   B->factor               = FACTOR_LU;
631:   lu                      = (Mat_MUMPS*)B->spptr;
632:   lu->sym                 = 0;
633:   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;

635:   *F = B;
636:   return(0);
637: }

639: /* Note the Petsc r permutation is ignored */
642: PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
643:   Mat            B;
644:   Mat_MUMPS      *lu;

648:   /* Create the factorization matrix */
649:   MatCreate(((PetscObject)A)->comm,&B);
650:   MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);
651:   MatSetType(B,((PetscObject)A)->type_name);
652:   MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);
653:   MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);

655:   B->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
656:   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
657:   B->factor                     = FACTOR_CHOLESKY;
658:   lu                            = (Mat_MUMPS*)B->spptr;
659:   lu->sym                       = 2;
660:   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;

662:   *F = B;
663:   return(0);
664: }

668: PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
669:   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;

673:   /* check if matrix is mumps type */
674:   if (A->ops->solve != MatSolve_MUMPS) return(0);

676:   PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");
677:   PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);
678:   PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);
679:   PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):        %d \n",lu->id.ICNTL(1));
680:   PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));
681:   PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):  %d \n",lu->id.ICNTL(3));
682:   PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));
683:   PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));
684:   PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));
685:   PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));
686:   PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scalling strategy):       %d \n",lu->id.ICNTL(8));
687:   PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));
688:   PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));
689:   PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));
690:   if (!lu->myid && lu->id.ICNTL(11)>0) {
691:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));
692:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));
693:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));
694:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));
695:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));
696:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));
697: 
698:   }
699:   PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));
700:   PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));
701:   PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));
702:   /* ICNTL(15-17) not used */
703:   PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));
704:   PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Shur complement info):                       %d \n",lu->id.ICNTL(19));
705:   PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",lu->id.ICNTL(20));
706:   PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",lu->id.ICNTL(21));

708:   PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));
709:   PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));
710:   PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));
711:   PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",lu->id.CNTL(4));

713:   /* infomation local to each processor */
714:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");}
715:   PetscSynchronizedPrintf(((PetscObject)A)->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));
716:   PetscSynchronizedFlush(((PetscObject)A)->comm);
717:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");}
718:   PetscSynchronizedPrintf(((PetscObject)A)->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));
719:   PetscSynchronizedFlush(((PetscObject)A)->comm);
720:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");}
721:   PetscSynchronizedPrintf(((PetscObject)A)->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));
722:   PetscSynchronizedFlush(((PetscObject)A)->comm);
723:   /*
724:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      INFO(2) (info about error or warning ): \n");}
725:   PetscSynchronizedPrintf(((PetscObject)A)->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(2));
726:   PetscSynchronizedFlush(((PetscObject)A)->comm);
727:   */

729:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");}
730:   PetscSynchronizedPrintf(((PetscObject)A)->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(15));
731:   PetscSynchronizedFlush(((PetscObject)A)->comm);

733:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");}
734:   PetscSynchronizedPrintf(((PetscObject)A)->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(16));
735:   PetscSynchronizedFlush(((PetscObject)A)->comm);

737:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      INFO(23) (num of pivots eliminated on this processor after factorization): \n");}
738:   PetscSynchronizedPrintf(((PetscObject)A)->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(23));
739:   PetscSynchronizedFlush(((PetscObject)A)->comm);

741:   if (!lu->myid){ /* information from the host */
742:     PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));
743:     PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));
744:     PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));

746:     PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));
747:     PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));
748:     PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));
749:     PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));
750:     PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));
751:     PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));
752:     PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));
753:     PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));
754:     PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));
755:     PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));
756:     PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));
757:     PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));
758:     PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));
759:     PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));
760:     PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));
761:     PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));
762:     PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));
763:      PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));
764:      PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));
765:      PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));
766:      PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));
767:      PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));
768:      PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));
769:   }

771:   return(0);
772: }

776: PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) {
777:   PetscErrorCode    ierr;
778:   PetscTruth        iascii;
779:   PetscViewerFormat format;
780:   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);

783:   (*mumps->MatView)(A,viewer);

785:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
786:   if (iascii) {
787:     PetscViewerGetFormat(viewer,&format);
788:     if (format == PETSC_VIEWER_ASCII_INFO){
789:       MatFactorInfo_MUMPS(A,viewer);
790:     }
791:   }
792:   return(0);
793: }

797: PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
799:   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;

802:   (*mumps->MatAssemblyEnd)(A,mode);

804:   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
805:   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
806:   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
807:   return(0);
808: }

813: PetscErrorCode  MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
814: {
816:   PetscMPIInt    size;
817:   MPI_Comm       comm;
818:   Mat            B=*newmat;
819:   Mat_MUMPS      *mumps;

822:   PetscObjectGetComm((PetscObject)A,&comm);
823:   PetscNewLog(B,Mat_MUMPS,&mumps);

825:   if (reuse == MAT_INITIAL_MATRIX) {
826:     MatDuplicate(A,MAT_COPY_VALUES,&B);
827:     /* A may have special container that is not duplicated, 
828:        e.g., A is obtainted from MatMatMult(,&A). Save B->ops instead */
829:     mumps->MatDuplicate              = B->ops->duplicate;
830:     mumps->MatView                   = B->ops->view;
831:     mumps->MatAssemblyEnd            = B->ops->assemblyend;
832:     mumps->MatLUFactorSymbolic       = B->ops->lufactorsymbolic;
833:     mumps->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic;
834:     mumps->MatDestroy                = B->ops->destroy;
835:   } else {
836:     mumps->MatDuplicate              = A->ops->duplicate;
837:     mumps->MatView                   = A->ops->view;
838:     mumps->MatAssemblyEnd            = A->ops->assemblyend;
839:     mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
840:     mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
841:     mumps->MatDestroy                = A->ops->destroy;
842:   }
843:   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
844:   mumps->CleanUpMUMPS              = PETSC_FALSE;
845:   mumps->isAIJ                     = PETSC_TRUE;

847:   B->spptr                         = (void*)mumps;
848:   B->ops->duplicate                = MatDuplicate_MUMPS;
849:   B->ops->view                     = MatView_MUMPS;
850:   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
851:   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
852:   B->ops->destroy                  = MatDestroy_MUMPS;

854:   MPI_Comm_size(comm,&size);
855:   if (size == 1) {
856:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
857:                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);
858:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
859:                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);
860:   } else {
861:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
862:                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);
863:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
864:                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);
865:   }

867:   PetscInfo(A,"Using MUMPS for LU factorization and solves.\n");
868:   PetscObjectChangeTypeName((PetscObject)B,newtype);
869:   *newmat = B;
870:   return(0);
871: }

874: /*MC
875:   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
876:   and sequential matrices via the external package MUMPS.

878:   If MUMPS is installed (see the manual for instructions
879:   on how to declare the existence of external packages),
880:   a matrix type can be constructed which invokes MUMPS solvers.
881:   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS), then 
882:   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() etc DO NOT
883:   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!

885:   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
886:   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
887:   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 
888:   for communicators controlling multiple processes.  It is recommended that you call both of
889:   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
890:   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
891:   without data copy AFTER the matrix values are set.

893:   Options Database Keys:
894: + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
895: . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
896: . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
897: . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
898: . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
899: . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
900: . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
901: . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
902: . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
903: . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
904: . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
905: . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
906: . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
907: . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
908: - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold

910:   Level: beginner

912: .seealso: MATSBAIJMUMPS
913: M*/

918: PetscErrorCode  MatCreate_AIJMUMPS(Mat A)
919: {
921:   PetscMPIInt    size;
922: 
924:   MPI_Comm_size(((PetscObject)A)->comm,&size);
925:   if (size == 1) {
926:     MatSetType(A,MATSEQAIJ);
927:   } else {
928:     MatSetType(A,MATMPIAIJ);
929:     /*
930:     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
931:     MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);
932:     */
933:   }
934:   MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);
935:   return(0);
936: }

941: PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
942: {
944:   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;

947:   (*mumps->MatAssemblyEnd)(A,mode);
948:   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
949:   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
950:   return(0);
951: }

956: PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
957: {
958:   Mat       A;
959:   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;

963:   /*
964:     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
965:     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
966:     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
967:     block size info so that PETSc can determine the local size properly.  The block size info is set
968:     in the preallocation routine.
969:   */
970:   (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
971:   A    = ((Mat_MPISBAIJ *)B->data)->A;
972:   MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);
973:   return(0);
974: }

980: PetscErrorCode  MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
981: {
983:   PetscMPIInt    size;
984:   MPI_Comm       comm;
985:   Mat            B=*newmat;
986:   Mat_MUMPS      *mumps;
987:   void           (*f)(void);

990:   if (reuse == MAT_INITIAL_MATRIX) {
991:     MatDuplicate(A,MAT_COPY_VALUES,&B);
992:   }

994:   PetscObjectGetComm((PetscObject)A,&comm);
995:   PetscNewLog(B,Mat_MUMPS,&mumps);

997:   mumps->MatDuplicate              = A->ops->duplicate;
998:   mumps->MatView                   = A->ops->view;
999:   mumps->MatAssemblyEnd            = A->ops->assemblyend;
1000:   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
1001:   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
1002:   mumps->MatDestroy                = A->ops->destroy;
1003:   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
1004:   mumps->CleanUpMUMPS              = PETSC_FALSE;
1005:   mumps->isAIJ                     = PETSC_FALSE;
1006: 
1007:   B->spptr                         = (void*)mumps;
1008:   B->ops->duplicate                = MatDuplicate_MUMPS;
1009:   B->ops->view                     = MatView_MUMPS;
1010:   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
1011:   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
1012:   B->ops->destroy                  = MatDestroy_MUMPS;

1014:   MPI_Comm_size(comm,&size);
1015:   if (size == 1) {
1016:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
1017:                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);
1018:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
1019:                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);
1020:   } else {
1021:   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
1022:     PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);
1023:     if (f) { /* This case should always be true when this routine is called */
1024:       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
1025:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1026:                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
1027:                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);
1028:     }
1029:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
1030:                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);
1031:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
1032:                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);
1033:   }

1035:   PetscInfo(A,"Using MUMPS for Cholesky factorization and solves.\n");
1036:   PetscObjectChangeTypeName((PetscObject)B,newtype);
1037:   *newmat = B;
1038:   return(0);
1039: }

1044: PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
1046:   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;

1049:   (*lu->MatDuplicate)(A,op,M);
1050:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));
1051:   return(0);
1052: }

1054: /*MC
1055:   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
1056:   distributed and sequential matrices via the external package MUMPS.

1058:   If MUMPS is installed (see the manual for instructions
1059:   on how to declare the existence of external packages),
1060:   a matrix type can be constructed which invokes MUMPS solvers.
1061:   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS), then 
1062:   optionally call MatSeqSBAIJSetPreallocation() or MatMPISBAIJSetPreallocation() DO NOT
1063:   call MatCreateSeqSBAIJ/MPISBAIJ() directly or the preallocation information will be LOST!

1065:   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
1066:   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
1067:   MatSeqSBAIJSetPreallocation() is supported, and similarly MatMPISBAIJSetPreallocation() is supported 
1068:   for communicators controlling multiple processes.  It is recommended that you call both of
1069:   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
1070:   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
1071:   without data copy AFTER the matrix values have been set.

1073:   Options Database Keys:
1074: + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
1075: . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
1076: . -mat_mumps_icntl_4 <0,...,4> - print level
1077: . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1078: . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
1079: . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1080: . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1081: . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1082: . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1083: . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1084: . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1085: . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1086: . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1087: . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1088: - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold

1090:   Level: beginner

1092: .seealso: MATAIJMUMPS
1093: M*/

1098: PetscErrorCode  MatCreate_SBAIJMUMPS(Mat A)
1099: {
1101:   PetscMPIInt    size;

1104:   MPI_Comm_size(((PetscObject)A)->comm,&size);
1105:   if (size == 1) {
1106:     MatSetType(A,MATSEQSBAIJ);
1107:   } else {
1108:     MatSetType(A,MATMPISBAIJ);
1109:   }
1110:   MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);
1111:   return(0);
1112: }