Actual source code: mumps.c
petsc-master 2020-08-25
2: /*
3: Provides an interface to the MUMPS sparse solver
4: */
5: #include <petscpkg_version.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
10: EXTERN_C_BEGIN
11: #if defined(PETSC_USE_COMPLEX)
12: #if defined(PETSC_USE_REAL_SINGLE)
13: #include <cmumps_c.h>
14: #else
15: #include <zmumps_c.h>
16: #endif
17: #else
18: #if defined(PETSC_USE_REAL_SINGLE)
19: #include <smumps_c.h>
20: #else
21: #include <dmumps_c.h>
22: #endif
23: #endif
24: EXTERN_C_END
25: #define JOB_INIT -1
26: #define JOB_FACTSYMBOLIC 1
27: #define JOB_FACTNUMERIC 2
28: #define JOB_SOLVE 3
29: #define JOB_END -2
31: /* calls to MUMPS */
32: #if defined(PETSC_USE_COMPLEX)
33: #if defined(PETSC_USE_REAL_SINGLE)
34: #define MUMPS_c cmumps_c
35: #else
36: #define MUMPS_c zmumps_c
37: #endif
38: #else
39: #if defined(PETSC_USE_REAL_SINGLE)
40: #define MUMPS_c smumps_c
41: #else
42: #define MUMPS_c dmumps_c
43: #endif
44: #endif
46: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48: naming convention in PetscMPIInt, PetscBLASInt etc.
49: */
50: typedef MUMPS_INT PetscMUMPSInt;
52: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55: #endif
56: #else
57: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59: #endif
60: #endif
62: #define MPIU_MUMPSINT MPI_INT
63: #define PETSC_MUMPS_INT_MAX 2147483647
64: #define PETSC_MUMPS_INT_MIN -2147483648
66: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67: PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68: {
70: if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71: *b = (PetscMUMPSInt)(a);
72: return(0);
73: }
75: /* Put these utility routines here since they are only used in this file */
76: PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77: {
79: PetscInt myval;
80: PetscBool myset;
82: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83: PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);
84: if (myset) {PetscMUMPSIntCast(myval,value);}
85: if (set) *set = myset;
86: return(0);
87: }
88: #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
90: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92: #define PetscMUMPS_c(mumps) \
93: do { \
94: if (mumps->use_petsc_omp_support) { \
95: if (mumps->is_omp_master) { \
96: PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl); \
97: MUMPS_c(&mumps->id); \
98: PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl); \
99: } \
100: PetscOmpCtrlBarrier(mumps->omp_ctrl); \
101: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
102: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
103: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
104: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105: */ \
106: MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm); \
107: MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm); \
108: MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm); \
109: } else { \
110: MUMPS_c(&mumps->id); \
111: } \
112: } while (0)
113: #else
114: #define PetscMUMPS_c(mumps) \
115: do { MUMPS_c(&mumps->id); } while (0)
116: #endif
118: /* declare MumpsScalar */
119: #if defined(PETSC_USE_COMPLEX)
120: #if defined(PETSC_USE_REAL_SINGLE)
121: #define MumpsScalar mumps_complex
122: #else
123: #define MumpsScalar mumps_double_complex
124: #endif
125: #else
126: #define MumpsScalar PetscScalar
127: #endif
129: /* macros s.t. indices match MUMPS documentation */
130: #define ICNTL(I) icntl[(I)-1]
131: #define CNTL(I) cntl[(I)-1]
132: #define INFOG(I) infog[(I)-1]
133: #define INFO(I) info[(I)-1]
134: #define RINFOG(I) rinfog[(I)-1]
135: #define RINFO(I) rinfo[(I)-1]
137: typedef struct Mat_MUMPS Mat_MUMPS;
138: struct Mat_MUMPS {
139: #if defined(PETSC_USE_COMPLEX)
140: #if defined(PETSC_USE_REAL_SINGLE)
141: CMUMPS_STRUC_C id;
142: #else
143: ZMUMPS_STRUC_C id;
144: #endif
145: #else
146: #if defined(PETSC_USE_REAL_SINGLE)
147: SMUMPS_STRUC_C id;
148: #else
149: DMUMPS_STRUC_C id;
150: #endif
151: #endif
153: MatStructure matstruc;
154: PetscMPIInt myid,petsc_size;
155: PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */
156: PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157: PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
158: PetscMUMPSInt sym;
159: MPI_Comm mumps_comm;
160: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
161: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
162: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
163: PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc;
164: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165: PetscInt *rhs_nrow,max_nrhs;
166: PetscMPIInt *rhs_recvcounts,*rhs_disps;
167: PetscScalar *rhs_loc,*rhs_recvbuf;
168: #endif
169: #endif
170: Vec b_seq,x_seq;
171: PetscInt ninfo,*info; /* which INFO to display */
172: PetscInt sizeredrhs;
173: PetscScalar *schur_sol;
174: PetscInt schur_sizesol;
175: PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
176: PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
177: PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
179: /* stuff used by petsc/mumps OpenMP support*/
180: PetscBool use_petsc_omp_support;
181: PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
182: MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */
183: PetscInt64 *recvcount; /* a collection of nnz on omp_master */
184: PetscMPIInt tag,omp_comm_size;
185: PetscBool is_omp_master; /* is this rank the master of omp_comm */
186: MPI_Request *reqs;
187: };
189: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
190: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
191: */
192: static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
193: {
195: PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
198: #if defined(PETSC_USE_64BIT_INDICES)
199: {
200: PetscInt i;
201: if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
202: PetscFree(mumps->ia_alloc);
203: PetscMalloc1(nrow+1,&mumps->ia_alloc);
204: mumps->cur_ilen = nrow+1;
205: }
206: if (nnz > mumps->cur_jlen) {
207: PetscFree(mumps->ja_alloc);
208: PetscMalloc1(nnz,&mumps->ja_alloc);
209: mumps->cur_jlen = nnz;
210: }
211: for (i=0; i<nrow+1; i++) {PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));}
212: for (i=0; i<nnz; i++) {PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));}
213: *ia_mumps = mumps->ia_alloc;
214: *ja_mumps = mumps->ja_alloc;
215: }
216: #else
217: *ia_mumps = ia;
218: *ja_mumps = ja;
219: #endif
220: PetscMUMPSIntCast(nnz,nnz_mumps);
221: return(0);
222: }
224: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
225: {
229: PetscFree(mumps->id.listvar_schur);
230: PetscFree(mumps->id.redrhs);
231: PetscFree(mumps->schur_sol);
232: mumps->id.size_schur = 0;
233: mumps->id.schur_lld = 0;
234: mumps->id.ICNTL(19) = 0;
235: return(0);
236: }
238: /* solve with rhs in mumps->id.redrhs and return in the same location */
239: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
240: {
241: Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
242: Mat S,B,X;
243: MatFactorSchurStatus schurstatus;
244: PetscInt sizesol;
245: PetscErrorCode ierr;
248: MatFactorFactorizeSchurComplement(F);
249: MatFactorGetSchurComplement(F,&S,&schurstatus);
250: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);
251: MatSetType(B,((PetscObject)S)->type_name);
252: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
253: MatBindToCPU(B,S->boundtocpu);
254: #endif
255: switch (schurstatus) {
256: case MAT_FACTOR_SCHUR_FACTORED:
257: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);
258: MatSetType(X,((PetscObject)S)->type_name);
259: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
260: MatBindToCPU(X,S->boundtocpu);
261: #endif
262: if (!mumps->id.ICNTL(9)) { /* transpose solve */
263: MatMatSolveTranspose(S,B,X);
264: } else {
265: MatMatSolve(S,B,X);
266: }
267: break;
268: case MAT_FACTOR_SCHUR_INVERTED:
269: sizesol = mumps->id.nrhs*mumps->id.size_schur;
270: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
271: PetscFree(mumps->schur_sol);
272: PetscMalloc1(sizesol,&mumps->schur_sol);
273: mumps->schur_sizesol = sizesol;
274: }
275: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);
276: MatSetType(X,((PetscObject)S)->type_name);
277: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
278: MatBindToCPU(X,S->boundtocpu);
279: #endif
280: MatProductCreateWithMat(S,B,NULL,X);
281: if (!mumps->id.ICNTL(9)) { /* transpose solve */
282: MatProductSetType(X,MATPRODUCT_AtB);
283: } else {
284: MatProductSetType(X,MATPRODUCT_AB);
285: }
286: MatProductSetFromOptions(X);
287: MatProductSymbolic(X);
288: MatProductNumeric(X);
290: MatCopy(X,B,SAME_NONZERO_PATTERN);
291: break;
292: default:
293: SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
294: break;
295: }
296: MatFactorRestoreSchurComplement(F,&S,schurstatus);
297: MatDestroy(&B);
298: MatDestroy(&X);
299: return(0);
300: }
302: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
303: {
304: Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
308: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
309: return(0);
310: }
311: if (!expansion) { /* prepare for the condensation step */
312: PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
313: /* allocate MUMPS internal array to store reduced right-hand sides */
314: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
315: PetscFree(mumps->id.redrhs);
316: mumps->id.lredrhs = mumps->id.size_schur;
317: PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);
318: mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
319: }
320: mumps->id.ICNTL(26) = 1; /* condensation phase */
321: } else { /* prepare for the expansion step */
322: /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
323: MatMumpsSolveSchur_Private(F);
324: mumps->id.ICNTL(26) = 2; /* expansion phase */
325: PetscMUMPS_c(mumps);
326: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
327: /* restore defaults */
328: mumps->id.ICNTL(26) = -1;
329: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
330: if (mumps->id.nrhs > 1) {
331: PetscFree(mumps->id.redrhs);
332: mumps->id.lredrhs = 0;
333: mumps->sizeredrhs = 0;
334: }
335: }
336: return(0);
337: }
339: /*
340: MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
342: input:
343: A - matrix in aij,baij or sbaij format
344: shift - 0: C style output triple; 1: Fortran style output triple.
345: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
346: MAT_REUSE_MATRIX: only the values in v array are updated
347: output:
348: nnz - dim of r, c, and v (number of local nonzero entries of A)
349: r, c, v - row and col index, matrix values (matrix triples)
351: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
352: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
353: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
355: */
357: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
358: {
359: const PetscScalar *av;
360: const PetscInt *ai,*aj,*ajj,M=A->rmap->n;
361: PetscInt64 nz,rnz,i,j,k;
362: PetscErrorCode ierr;
363: PetscMUMPSInt *row,*col;
364: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
367: MatSeqAIJGetArrayRead(A,&av);
368: mumps->val = (PetscScalar*)av;
369: if (reuse == MAT_INITIAL_MATRIX) {
370: nz = aa->nz;
371: ai = aa->i;
372: aj = aa->j;
373: PetscMalloc2(nz,&row,nz,&col);
374: for (i=k=0; i<M; i++) {
375: rnz = ai[i+1] - ai[i];
376: ajj = aj + ai[i];
377: for (j=0; j<rnz; j++) {
378: PetscMUMPSIntCast(i+shift,&row[k]);
379: PetscMUMPSIntCast(ajj[j] + shift,&col[k]);
380: k++;
381: }
382: }
383: mumps->irn = row;
384: mumps->jcn = col;
385: mumps->nnz = nz;
386: }
387: MatSeqAIJRestoreArrayRead(A,&av);
388: return(0);
389: }
391: PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
392: {
394: PetscInt64 nz,i,j,k,r;
395: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
396: PetscMUMPSInt *row,*col;
399: mumps->val = a->val;
400: if (reuse == MAT_INITIAL_MATRIX) {
401: nz = a->sliidx[a->totalslices];
402: PetscMalloc2(nz,&row,nz,&col);
403: for (i=k=0; i<a->totalslices; i++) {
404: for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
405: PetscMUMPSIntCast(8*i+r+shift,&row[k++]);
406: }
407: }
408: for (i=0;i<nz;i++) {PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);}
409: mumps->irn = row;
410: mumps->jcn = col;
411: mumps->nnz = nz;
412: }
413: return(0);
414: }
416: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
417: {
418: Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data;
419: const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
420: PetscInt64 M,nz,idx=0,rnz,i,j,k,m;
421: PetscInt bs;
423: PetscMUMPSInt *row,*col;
426: MatGetBlockSize(A,&bs);
427: M = A->rmap->N/bs;
428: mumps->val = aa->a;
429: if (reuse == MAT_INITIAL_MATRIX) {
430: ai = aa->i; aj = aa->j;
431: nz = bs2*aa->nz;
432: PetscMalloc2(nz,&row,nz,&col);
433: for (i=0; i<M; i++) {
434: ajj = aj + ai[i];
435: rnz = ai[i+1] - ai[i];
436: for (k=0; k<rnz; k++) {
437: for (j=0; j<bs; j++) {
438: for (m=0; m<bs; m++) {
439: PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);
440: PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);
441: idx++;
442: }
443: }
444: }
445: }
446: mumps->irn = row;
447: mumps->jcn = col;
448: mumps->nnz = nz;
449: }
450: return(0);
451: }
453: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
454: {
455: const PetscInt *ai, *aj,*ajj;
456: PetscInt bs;
457: PetscInt64 nz,rnz,i,j,k,m;
458: PetscErrorCode ierr;
459: PetscMUMPSInt *row,*col;
460: PetscScalar *val;
461: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
462: const PetscInt bs2=aa->bs2,mbs=aa->mbs;
463: #if defined(PETSC_USE_COMPLEX)
464: PetscBool hermitian;
465: #endif
468: #if defined(PETSC_USE_COMPLEX)
469: MatGetOption(A,MAT_HERMITIAN,&hermitian);
470: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
471: #endif
472: ai = aa->i;
473: aj = aa->j;
474: MatGetBlockSize(A,&bs);
475: if (reuse == MAT_INITIAL_MATRIX) {
476: nz = aa->nz;
477: PetscMalloc2(bs2*nz,&row,bs2*nz,&col);
478: if (bs>1) {
479: PetscMalloc1(bs2*nz,&mumps->val_alloc);
480: mumps->val = mumps->val_alloc;
481: } else {
482: mumps->val = aa->a;
483: }
484: mumps->irn = row;
485: mumps->jcn = col;
486: } else {
487: if (bs == 1) mumps->val = aa->a;
488: row = mumps->irn;
489: col = mumps->jcn;
490: }
491: val = mumps->val;
493: nz = 0;
494: if (bs>1) {
495: for (i=0; i<mbs; i++) {
496: rnz = ai[i+1] - ai[i];
497: ajj = aj + ai[i];
498: for (j=0; j<rnz; j++) {
499: for (k=0; k<bs; k++) {
500: for (m=0; m<bs; m++) {
501: if (ajj[j]>i || k>=m) {
502: if (reuse == MAT_INITIAL_MATRIX) {
503: PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);
504: PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);
505: }
506: val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
507: }
508: }
509: }
510: }
511: }
512: } else if (reuse == MAT_INITIAL_MATRIX) {
513: for (i=0; i<mbs; i++) {
514: rnz = ai[i+1] - ai[i];
515: ajj = aj + ai[i];
516: for (j=0; j<rnz; j++) {
517: PetscMUMPSIntCast(i+shift,&row[nz]);
518: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
519: nz++;
520: }
521: }
522: if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
523: }
524: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
525: return(0);
526: }
528: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
529: {
530: const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n;
531: PetscInt64 nz,rnz,i,j;
532: const PetscScalar *av,*v1;
533: PetscScalar *val;
534: PetscErrorCode ierr;
535: PetscMUMPSInt *row,*col;
536: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
537: PetscBool missing;
538: #if defined(PETSC_USE_COMPLEX)
539: PetscBool hermitian;
540: #endif
543: #if defined(PETSC_USE_COMPLEX)
544: MatGetOption(A,MAT_HERMITIAN,&hermitian);
545: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
546: #endif
547: MatSeqAIJGetArrayRead(A,&av);
548: ai = aa->i; aj = aa->j;
549: adiag = aa->diag;
550: MatMissingDiagonal_SeqAIJ(A,&missing,NULL);
551: if (reuse == MAT_INITIAL_MATRIX) {
552: /* count nz in the upper triangular part of A */
553: nz = 0;
554: if (missing) {
555: for (i=0; i<M; i++) {
556: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
557: for (j=ai[i];j<ai[i+1];j++) {
558: if (aj[j] < i) continue;
559: nz++;
560: }
561: } else {
562: nz += ai[i+1] - adiag[i];
563: }
564: }
565: } else {
566: for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
567: }
568: PetscMalloc2(nz,&row,nz,&col);
569: PetscMalloc1(nz,&val);
570: mumps->nnz = nz;
571: mumps->irn = row;
572: mumps->jcn = col;
573: mumps->val = mumps->val_alloc = val;
575: nz = 0;
576: if (missing) {
577: for (i=0; i<M; i++) {
578: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
579: for (j=ai[i];j<ai[i+1];j++) {
580: if (aj[j] < i) continue;
581: PetscMUMPSIntCast(i+shift,&row[nz]);
582: PetscMUMPSIntCast(aj[j]+shift,&col[nz]);
583: val[nz] = av[j];
584: nz++;
585: }
586: } else {
587: rnz = ai[i+1] - adiag[i];
588: ajj = aj + adiag[i];
589: v1 = av + adiag[i];
590: for (j=0; j<rnz; j++) {
591: PetscMUMPSIntCast(i+shift,&row[nz]);
592: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
593: val[nz++] = v1[j];
594: }
595: }
596: }
597: } else {
598: for (i=0; i<M; i++) {
599: rnz = ai[i+1] - adiag[i];
600: ajj = aj + adiag[i];
601: v1 = av + adiag[i];
602: for (j=0; j<rnz; j++) {
603: PetscMUMPSIntCast(i+shift,&row[nz]);
604: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
605: val[nz++] = v1[j];
606: }
607: }
608: }
609: } else {
610: nz = 0;
611: val = mumps->val;
612: if (missing) {
613: for (i=0; i <M; i++) {
614: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
615: for (j=ai[i];j<ai[i+1];j++) {
616: if (aj[j] < i) continue;
617: val[nz++] = av[j];
618: }
619: } else {
620: rnz = ai[i+1] - adiag[i];
621: v1 = av + adiag[i];
622: for (j=0; j<rnz; j++) {
623: val[nz++] = v1[j];
624: }
625: }
626: }
627: } else {
628: for (i=0; i <M; i++) {
629: rnz = ai[i+1] - adiag[i];
630: v1 = av + adiag[i];
631: for (j=0; j<rnz; j++) {
632: val[nz++] = v1[j];
633: }
634: }
635: }
636: }
637: MatSeqAIJRestoreArrayRead(A,&av);
638: return(0);
639: }
641: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
642: {
643: PetscErrorCode ierr;
644: const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
645: PetscInt bs;
646: PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB;
647: PetscMUMPSInt *row,*col;
648: const PetscScalar *av,*bv,*v1,*v2;
649: PetscScalar *val;
650: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
651: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data;
652: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
653: const PetscInt bs2=aa->bs2,mbs=aa->mbs;
654: #if defined(PETSC_USE_COMPLEX)
655: PetscBool hermitian;
656: #endif
659: #if defined(PETSC_USE_COMPLEX)
660: MatGetOption(A,MAT_HERMITIAN,&hermitian);
661: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
662: #endif
663: MatGetBlockSize(A,&bs);
664: rstart = A->rmap->rstart;
665: ai = aa->i;
666: aj = aa->j;
667: bi = bb->i;
668: bj = bb->j;
669: av = aa->a;
670: bv = bb->a;
672: garray = mat->garray;
674: if (reuse == MAT_INITIAL_MATRIX) {
675: nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
676: PetscMalloc2(nz,&row,nz,&col);
677: PetscMalloc1(nz,&val);
678: /* can not decide the exact mumps->nnz now because of the SBAIJ */
679: mumps->irn = row;
680: mumps->jcn = col;
681: mumps->val = mumps->val_alloc = val;
682: } else {
683: val = mumps->val;
684: }
686: jj = 0; irow = rstart;
687: for (i=0; i<mbs; i++) {
688: ajj = aj + ai[i]; /* ptr to the beginning of this row */
689: countA = ai[i+1] - ai[i];
690: countB = bi[i+1] - bi[i];
691: bjj = bj + bi[i];
692: v1 = av + ai[i]*bs2;
693: v2 = bv + bi[i]*bs2;
695: if (bs>1) {
696: /* A-part */
697: for (j=0; j<countA; j++) {
698: for (k=0; k<bs; k++) {
699: for (m=0; m<bs; m++) {
700: if (rstart + ajj[j]*bs>irow || k>=m) {
701: if (reuse == MAT_INITIAL_MATRIX) {
702: PetscMUMPSIntCast(irow + m + shift,&row[jj]);
703: PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);
704: }
705: val[jj++] = v1[j*bs2 + m + k*bs];
706: }
707: }
708: }
709: }
711: /* B-part */
712: for (j=0; j < countB; j++) {
713: for (k=0; k<bs; k++) {
714: for (m=0; m<bs; m++) {
715: if (reuse == MAT_INITIAL_MATRIX) {
716: PetscMUMPSIntCast(irow + m + shift,&row[jj]);
717: PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);
718: }
719: val[jj++] = v2[j*bs2 + m + k*bs];
720: }
721: }
722: }
723: } else {
724: /* A-part */
725: for (j=0; j<countA; j++) {
726: if (reuse == MAT_INITIAL_MATRIX) {
727: PetscMUMPSIntCast(irow + shift,&row[jj]);
728: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
729: }
730: val[jj++] = v1[j];
731: }
733: /* B-part */
734: for (j=0; j < countB; j++) {
735: if (reuse == MAT_INITIAL_MATRIX) {
736: PetscMUMPSIntCast(irow + shift,&row[jj]);
737: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
738: }
739: val[jj++] = v2[j];
740: }
741: }
742: irow+=bs;
743: }
744: mumps->nnz = jj;
745: return(0);
746: }
748: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
749: {
750: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
751: PetscErrorCode ierr;
752: PetscInt64 rstart,nz,i,j,jj,irow,countA,countB;
753: PetscMUMPSInt *row,*col;
754: const PetscScalar *av, *bv,*v1,*v2;
755: PetscScalar *val;
756: Mat Ad,Ao;
757: Mat_SeqAIJ *aa;
758: Mat_SeqAIJ *bb;
761: MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);
762: MatSeqAIJGetArrayRead(Ad,&av);
763: MatSeqAIJGetArrayRead(Ao,&bv);
765: aa = (Mat_SeqAIJ*)(Ad)->data;
766: bb = (Mat_SeqAIJ*)(Ao)->data;
767: ai = aa->i;
768: aj = aa->j;
769: bi = bb->i;
770: bj = bb->j;
772: rstart = A->rmap->rstart;
774: if (reuse == MAT_INITIAL_MATRIX) {
775: nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
776: PetscMalloc2(nz,&row,nz,&col);
777: PetscMalloc1(nz,&val);
778: mumps->nnz = nz;
779: mumps->irn = row;
780: mumps->jcn = col;
781: mumps->val = mumps->val_alloc = val;
782: } else {
783: val = mumps->val;
784: }
786: jj = 0; irow = rstart;
787: for (i=0; i<m; i++) {
788: ajj = aj + ai[i]; /* ptr to the beginning of this row */
789: countA = ai[i+1] - ai[i];
790: countB = bi[i+1] - bi[i];
791: bjj = bj + bi[i];
792: v1 = av + ai[i];
793: v2 = bv + bi[i];
795: /* A-part */
796: for (j=0; j<countA; j++) {
797: if (reuse == MAT_INITIAL_MATRIX) {
798: PetscMUMPSIntCast(irow + shift,&row[jj]);
799: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
800: }
801: val[jj++] = v1[j];
802: }
804: /* B-part */
805: for (j=0; j < countB; j++) {
806: if (reuse == MAT_INITIAL_MATRIX) {
807: PetscMUMPSIntCast(irow + shift,&row[jj]);
808: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
809: }
810: val[jj++] = v2[j];
811: }
812: irow++;
813: }
814: MatSeqAIJRestoreArrayRead(Ad,&av);
815: MatSeqAIJRestoreArrayRead(Ao,&bv);
816: return(0);
817: }
819: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
820: {
821: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
822: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data;
823: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
824: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
825: const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
826: const PetscInt bs2=mat->bs2;
827: PetscErrorCode ierr;
828: PetscInt bs;
829: PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx;
830: PetscMUMPSInt *row,*col;
831: const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
832: PetscScalar *val;
835: MatGetBlockSize(A,&bs);
836: if (reuse == MAT_INITIAL_MATRIX) {
837: nz = bs2*(aa->nz + bb->nz);
838: PetscMalloc2(nz,&row,nz,&col);
839: PetscMalloc1(nz,&val);
840: mumps->nnz = nz;
841: mumps->irn = row;
842: mumps->jcn = col;
843: mumps->val = mumps->val_alloc = val;
844: } else {
845: val = mumps->val;
846: }
848: jj = 0; irow = rstart;
849: for (i=0; i<mbs; i++) {
850: countA = ai[i+1] - ai[i];
851: countB = bi[i+1] - bi[i];
852: ajj = aj + ai[i];
853: bjj = bj + bi[i];
854: v1 = av + bs2*ai[i];
855: v2 = bv + bs2*bi[i];
857: idx = 0;
858: /* A-part */
859: for (k=0; k<countA; k++) {
860: for (j=0; j<bs; j++) {
861: for (n=0; n<bs; n++) {
862: if (reuse == MAT_INITIAL_MATRIX) {
863: PetscMUMPSIntCast(irow + n + shift,&row[jj]);
864: PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);
865: }
866: val[jj++] = v1[idx++];
867: }
868: }
869: }
871: idx = 0;
872: /* B-part */
873: for (k=0; k<countB; k++) {
874: for (j=0; j<bs; j++) {
875: for (n=0; n<bs; n++) {
876: if (reuse == MAT_INITIAL_MATRIX) {
877: PetscMUMPSIntCast(irow + n + shift,&row[jj]);
878: PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);
879: }
880: val[jj++] = v2[idx++];
881: }
882: }
883: }
884: irow += bs;
885: }
886: return(0);
887: }
889: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
890: {
891: const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
892: PetscErrorCode ierr;
893: PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
894: PetscMUMPSInt *row,*col;
895: const PetscScalar *av, *bv,*v1,*v2;
896: PetscScalar *val;
897: Mat Ad,Ao;
898: Mat_SeqAIJ *aa;
899: Mat_SeqAIJ *bb;
900: #if defined(PETSC_USE_COMPLEX)
901: PetscBool hermitian;
902: #endif
905: #if defined(PETSC_USE_COMPLEX)
906: MatGetOption(A,MAT_HERMITIAN,&hermitian);
907: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
908: #endif
909: MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);
910: MatSeqAIJGetArrayRead(Ad,&av);
911: MatSeqAIJGetArrayRead(Ao,&bv);
913: aa = (Mat_SeqAIJ*)(Ad)->data;
914: bb = (Mat_SeqAIJ*)(Ao)->data;
915: ai = aa->i;
916: aj = aa->j;
917: adiag = aa->diag;
918: bi = bb->i;
919: bj = bb->j;
921: rstart = A->rmap->rstart;
923: if (reuse == MAT_INITIAL_MATRIX) {
924: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
925: nzb = 0; /* num of upper triangular entries in mat->B */
926: for (i=0; i<m; i++) {
927: nza += (ai[i+1] - adiag[i]);
928: countB = bi[i+1] - bi[i];
929: bjj = bj + bi[i];
930: for (j=0; j<countB; j++) {
931: if (garray[bjj[j]] > rstart) nzb++;
932: }
933: }
935: nz = nza + nzb; /* total nz of upper triangular part of mat */
936: PetscMalloc2(nz,&row,nz,&col);
937: PetscMalloc1(nz,&val);
938: mumps->nnz = nz;
939: mumps->irn = row;
940: mumps->jcn = col;
941: mumps->val = mumps->val_alloc = val;
942: } else {
943: val = mumps->val;
944: }
946: jj = 0; irow = rstart;
947: for (i=0; i<m; i++) {
948: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
949: v1 = av + adiag[i];
950: countA = ai[i+1] - adiag[i];
951: countB = bi[i+1] - bi[i];
952: bjj = bj + bi[i];
953: v2 = bv + bi[i];
955: /* A-part */
956: for (j=0; j<countA; j++) {
957: if (reuse == MAT_INITIAL_MATRIX) {
958: PetscMUMPSIntCast(irow + shift,&row[jj]);
959: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
960: }
961: val[jj++] = v1[j];
962: }
964: /* B-part */
965: for (j=0; j < countB; j++) {
966: if (garray[bjj[j]] > rstart) {
967: if (reuse == MAT_INITIAL_MATRIX) {
968: PetscMUMPSIntCast(irow + shift,&row[jj]);
969: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
970: }
971: val[jj++] = v2[j];
972: }
973: }
974: irow++;
975: }
976: MatSeqAIJRestoreArrayRead(Ad,&av);
977: MatSeqAIJRestoreArrayRead(Ao,&bv);
978: return(0);
979: }
981: PetscErrorCode MatDestroy_MUMPS(Mat A)
982: {
984: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
987: PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);
988: VecScatterDestroy(&mumps->scat_rhs);
989: VecScatterDestroy(&mumps->scat_sol);
990: VecDestroy(&mumps->b_seq);
991: VecDestroy(&mumps->x_seq);
992: PetscFree(mumps->id.perm_in);
993: PetscFree2(mumps->irn,mumps->jcn);
994: PetscFree(mumps->val_alloc);
995: PetscFree(mumps->info);
996: MatMumpsResetSchur_Private(mumps);
997: mumps->id.job = JOB_END;
998: PetscMUMPS_c(mumps);
999: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1000: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1001: if (mumps->use_petsc_omp_support) {
1002: PetscOmpCtrlDestroy(&mumps->omp_ctrl);
1003: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1004: PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);
1005: PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);
1006: #endif
1007: }
1008: #endif
1009: PetscFree(mumps->ia_alloc);
1010: PetscFree(mumps->ja_alloc);
1011: PetscFree(mumps->recvcount);
1012: PetscFree(mumps->reqs);
1013: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1014: PetscFree(mumps->irhs_loc);
1015: #endif
1016: PetscFree(A->data);
1018: /* clear composed functions */
1019: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1020: PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);
1021: PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);
1022: PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);
1023: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);
1024: PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);
1025: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);
1026: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);
1027: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);
1028: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);
1029: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);
1030: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);
1031: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);
1032: return(0);
1033: }
1035: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1036: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1037: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1038: {
1039: PetscErrorCode ierr;
1040: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1041: const PetscMPIInt ompsize=mumps->omp_comm_size;
1042: PetscInt i,m,M,rstart;
1045: MatGetSize(A,&M,NULL);
1046: MatGetLocalSize(A,&m,NULL);
1047: if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1048: if (ompsize == 1) {
1049: if (!mumps->irhs_loc) {
1050: mumps->nloc_rhs = m;
1051: PetscMalloc1(m,&mumps->irhs_loc);
1052: MatGetOwnershipRange(A,&rstart,NULL);
1053: for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1054: }
1055: mumps->id.rhs_loc = (MumpsScalar*)array;
1056: } else {
1057: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1058: const PetscInt *ranges;
1059: PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks;
1060: MPI_Group petsc_group,omp_group;
1061: PetscScalar *recvbuf=NULL;
1063: if (mumps->is_omp_master) {
1064: /* Lazily initialize the omp stuff for distributed rhs */
1065: if (!mumps->irhs_loc) {
1066: PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);
1067: PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);
1068: MPI_Comm_group(mumps->petsc_comm,&petsc_group);
1069: MPI_Comm_group(mumps->omp_comm,&omp_group);
1070: for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1071: MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);
1073: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1074: mumps->nloc_rhs = 0;
1075: MatGetOwnershipRanges(A,&ranges);
1076: for (j=0; j<ompsize; j++) {
1077: mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1078: mumps->nloc_rhs += mumps->rhs_nrow[j];
1079: }
1080: PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);
1081: for (j=k=0; j<ompsize; j++) {
1082: for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1083: }
1085: PetscFree2(omp_ranks,petsc_ranks);
1086: MPI_Group_free(&petsc_group);
1087: MPI_Group_free(&omp_group);
1088: }
1090: /* Realloc buffers when current nrhs is bigger than what we have met */
1091: if (nrhs > mumps->max_nrhs) {
1092: PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);
1093: PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);
1094: mumps->max_nrhs = nrhs;
1095: }
1097: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1098: for (j=0; j<ompsize; j++) {PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);}
1099: mumps->rhs_disps[0] = 0;
1100: for (j=1; j<ompsize; j++) {
1101: mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1102: if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1103: }
1104: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1105: }
1107: PetscMPIIntCast(m*nrhs,&sendcount);
1108: MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);
1110: if (mumps->is_omp_master) {
1111: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1112: PetscScalar *dst,*dstbase = mumps->rhs_loc;
1113: for (j=0; j<ompsize; j++) {
1114: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1115: dst = dstbase;
1116: for (i=0; i<nrhs; i++) {
1117: PetscArraycpy(dst,src,mumps->rhs_nrow[j]);
1118: src += mumps->rhs_nrow[j];
1119: dst += mumps->nloc_rhs;
1120: }
1121: dstbase += mumps->rhs_nrow[j];
1122: }
1123: }
1124: mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1125: }
1126: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1127: }
1128: mumps->id.nrhs = nrhs;
1129: mumps->id.nloc_rhs = mumps->nloc_rhs;
1130: mumps->id.lrhs_loc = mumps->nloc_rhs;
1131: mumps->id.irhs_loc = mumps->irhs_loc;
1132: return(0);
1133: }
1134: #endif /* PETSC_PKG_MUMPS_VERSION_GE(5,3,0) */
1136: PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1137: {
1138: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1139: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1140: const PetscScalar *rarray;
1141: #endif
1142: PetscScalar *array;
1143: IS is_iden,is_petsc;
1144: PetscErrorCode ierr;
1145: PetscInt i;
1146: PetscBool second_solve = PETSC_FALSE;
1147: static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1150: PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);
1151: PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);
1153: if (A->factorerrortype) {
1154: PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1155: VecSetInf(x);
1156: return(0);
1157: }
1159: mumps->id.nrhs = 1;
1160: if (mumps->petsc_size > 1) {
1161: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1162: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1163: VecGetArrayRead(b,&rarray);
1164: MatMumpsSetUpDistRHSInfo(A,1,rarray);
1165: #else
1166: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/
1167: VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);
1168: VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);
1169: if (!mumps->myid) {
1170: VecGetArray(mumps->b_seq,&array);
1171: mumps->id.rhs = (MumpsScalar*)array;
1172: }
1173: #endif
1174: } else { /* petsc_size == 1 */
1175: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1176: VecCopy(b,x);
1177: VecGetArray(x,&array);
1178: mumps->id.rhs = (MumpsScalar*)array;
1179: }
1181: /*
1182: handle condensation step of Schur complement (if any)
1183: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1184: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1185: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1186: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1187: */
1188: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1189: if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1190: second_solve = PETSC_TRUE;
1191: MatMumpsHandleSchur_Private(A,PETSC_FALSE);
1192: }
1193: /* solve phase */
1194: /*-------------*/
1195: mumps->id.job = JOB_SOLVE;
1196: PetscMUMPS_c(mumps);
1197: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1199: /* handle expansion step of Schur complement (if any) */
1200: if (second_solve) {
1201: MatMumpsHandleSchur_Private(A,PETSC_TRUE);
1202: }
1204: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1205: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1206: /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1207: VecScatterDestroy(&mumps->scat_sol);
1208: }
1209: if (!mumps->scat_sol) { /* create scatter scat_sol */
1210: PetscInt *isol2_loc=NULL;
1211: ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden); /* from */
1212: PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);
1213: for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1214: ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc); /* to */
1215: VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);
1216: ISDestroy(&is_iden);
1217: ISDestroy(&is_petsc);
1218: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1219: }
1221: VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
1222: VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
1223: }
1225: if (mumps->petsc_size > 1) {
1226: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1227: VecRestoreArrayRead(b,&rarray);
1228: #else
1229: if (!mumps->myid) {VecRestoreArray(mumps->b_seq,&array);}
1230: #endif
1231: } else {VecRestoreArray(x,&array);}
1233: PetscLogFlops(2.0*mumps->id.RINFO(3));
1234: return(0);
1235: }
1237: PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1238: {
1239: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1243: mumps->id.ICNTL(9) = 0;
1244: MatSolve_MUMPS(A,b,x);
1245: mumps->id.ICNTL(9) = 1;
1246: return(0);
1247: }
1249: PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1250: {
1251: PetscErrorCode ierr;
1252: Mat Bt = NULL;
1253: PetscBool denseX,denseB,flg,flgT;
1254: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1255: PetscInt i,nrhs,M;
1256: PetscScalar *array;
1257: const PetscScalar *rbray;
1258: PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0;
1259: PetscMUMPSInt *isol_loc,*isol_loc_save;
1260: PetscScalar *bray,*sol_loc,*sol_loc_save;
1261: IS is_to,is_from;
1262: PetscInt k,proc,j,m,myrstart;
1263: const PetscInt *rstart;
1264: Vec v_mpi,msol_loc;
1265: VecScatter scat_sol;
1266: #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1267: Vec b_seq;
1268: VecScatter scat_rhs;
1269: #endif
1270: PetscScalar *aa;
1271: PetscInt spnr,*ia,*ja;
1272: Mat_MPIAIJ *b = NULL;
1275: PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);
1276: if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1278: PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);
1279: if (denseB) {
1280: if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1281: mumps->id.ICNTL(20)= 0; /* dense RHS */
1282: } else { /* sparse B */
1283: if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1284: PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);
1285: if (flgT) { /* input B is transpose of actural RHS matrix,
1286: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1287: MatTransposeGetMat(B,&Bt);
1288: } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1289: mumps->id.ICNTL(20)= 1; /* sparse RHS */
1290: }
1292: MatGetSize(B,&M,&nrhs);
1293: mumps->id.nrhs = nrhs;
1294: mumps->id.lrhs = M;
1295: mumps->id.rhs = NULL;
1297: if (mumps->petsc_size == 1) {
1298: PetscScalar *aa;
1299: PetscInt spnr,*ia,*ja;
1300: PetscBool second_solve = PETSC_FALSE;
1302: MatDenseGetArray(X,&array);
1303: mumps->id.rhs = (MumpsScalar*)array;
1305: if (denseB) {
1306: /* copy B to X */
1307: MatDenseGetArrayRead(B,&rbray);
1308: PetscArraycpy(array,rbray,M*nrhs);
1309: MatDenseRestoreArrayRead(B,&rbray);
1310: } else { /* sparse B */
1311: MatSeqAIJGetArray(Bt,&aa);
1312: MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1313: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1314: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
1315: mumps->id.rhs_sparse = (MumpsScalar*)aa;
1316: }
1317: /* handle condensation step of Schur complement (if any) */
1318: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1319: second_solve = PETSC_TRUE;
1320: MatMumpsHandleSchur_Private(A,PETSC_FALSE);
1321: }
1322: /* solve phase */
1323: /*-------------*/
1324: mumps->id.job = JOB_SOLVE;
1325: PetscMUMPS_c(mumps);
1326: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1328: /* handle expansion step of Schur complement (if any) */
1329: if (second_solve) {
1330: MatMumpsHandleSchur_Private(A,PETSC_TRUE);
1331: }
1332: if (!denseB) { /* sparse B */
1333: MatSeqAIJRestoreArray(Bt,&aa);
1334: MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1335: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1336: }
1337: MatDenseRestoreArray(X,&array);
1338: return(0);
1339: }
1341: /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1342: if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1344: /* create msol_loc to hold mumps local solution */
1345: isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1346: sol_loc_save = (PetscScalar*)mumps->id.sol_loc;
1348: lsol_loc = mumps->id.lsol_loc;
1349: nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */
1350: PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);
1351: mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1352: mumps->id.isol_loc = isol_loc;
1354: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);
1356: if (denseB) {
1357: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1358: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1359: MatDenseGetArrayRead(B,&rbray);
1360: MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);
1361: MatDenseRestoreArrayRead(B,&rbray);
1362: MatGetLocalSize(B,&m,NULL);
1363: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);
1364: #else
1365: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1366: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1367: 0, re-arrange B into desired order, which is a local operation.
1368: */
1370: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1371: /* wrap dense rhs matrix B into a vector v_mpi */
1372: MatGetLocalSize(B,&m,NULL);
1373: MatDenseGetArray(B,&bray);
1374: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);
1375: MatDenseRestoreArray(B,&bray);
1377: /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1378: if (!mumps->myid) {
1379: PetscInt *idx;
1380: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1381: PetscMalloc1(nrhs*M,&idx);
1382: MatGetOwnershipRanges(B,&rstart);
1383: k = 0;
1384: for (proc=0; proc<mumps->petsc_size; proc++){
1385: for (j=0; j<nrhs; j++){
1386: for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1387: }
1388: }
1390: VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);
1391: ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);
1392: ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);
1393: } else {
1394: VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);
1395: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);
1396: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);
1397: }
1398: VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);
1399: VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);
1400: ISDestroy(&is_to);
1401: ISDestroy(&is_from);
1402: VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);
1404: if (!mumps->myid) { /* define rhs on the host */
1405: VecGetArray(b_seq,&bray);
1406: mumps->id.rhs = (MumpsScalar*)bray;
1407: VecRestoreArray(b_seq,&bray);
1408: }
1409: #endif
1410: } else { /* sparse B */
1411: b = (Mat_MPIAIJ*)Bt->data;
1413: /* wrap dense X into a vector v_mpi */
1414: MatGetLocalSize(X,&m,NULL);
1415: MatDenseGetArray(X,&bray);
1416: VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);
1417: MatDenseRestoreArray(X,&bray);
1419: if (!mumps->myid) {
1420: MatSeqAIJGetArray(b->A,&aa);
1421: MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1422: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1423: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
1424: mumps->id.rhs_sparse = (MumpsScalar*)aa;
1425: } else {
1426: mumps->id.irhs_ptr = NULL;
1427: mumps->id.irhs_sparse = NULL;
1428: mumps->id.nz_rhs = 0;
1429: mumps->id.rhs_sparse = NULL;
1430: }
1431: }
1433: /* solve phase */
1434: /*-------------*/
1435: mumps->id.job = JOB_SOLVE;
1436: PetscMUMPS_c(mumps);
1437: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1439: /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1440: MatDenseGetArray(X,&array);
1441: VecPlaceArray(v_mpi,array);
1443: /* create scatter scat_sol */
1444: MatGetOwnershipRanges(X,&rstart);
1445: /* iidx: index for scatter mumps solution to petsc X */
1447: ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);
1448: PetscMalloc1(nlsol_loc,&idxx);
1449: for (i=0; i<lsol_loc; i++) {
1450: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1452: for (proc=0; proc<mumps->petsc_size; proc++){
1453: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1454: myrstart = rstart[proc];
1455: k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */
1456: iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */
1457: m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1458: break;
1459: }
1460: }
1462: for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1463: }
1464: ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);
1465: VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);
1466: VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);
1467: ISDestroy(&is_from);
1468: ISDestroy(&is_to);
1469: VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);
1470: MatDenseRestoreArray(X,&array);
1472: /* free spaces */
1473: mumps->id.sol_loc = (MumpsScalar*)sol_loc_save;
1474: mumps->id.isol_loc = isol_loc_save;
1476: PetscFree2(sol_loc,isol_loc);
1477: PetscFree(idxx);
1478: VecDestroy(&msol_loc);
1479: VecDestroy(&v_mpi);
1480: if (!denseB) {
1481: if (!mumps->myid) {
1482: b = (Mat_MPIAIJ*)Bt->data;
1483: MatSeqAIJRestoreArray(b->A,&aa);
1484: MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1485: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1486: }
1487: } else {
1488: #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1489: VecDestroy(&b_seq);
1490: VecScatterDestroy(&scat_rhs);
1491: #endif
1492: }
1493: VecScatterDestroy(&scat_sol);
1494: PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));
1495: return(0);
1496: }
1498: PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1499: {
1501: PetscBool flg;
1502: Mat B;
1505: PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);
1506: if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1508: /* Create B=Bt^T that uses Bt's data structure */
1509: MatCreateTranspose(Bt,&B);
1511: MatMatSolve_MUMPS(A,B,X);
1512: MatDestroy(&B);
1513: return(0);
1514: }
1516: #if !defined(PETSC_USE_COMPLEX)
1517: /*
1518: input:
1519: F: numeric factor
1520: output:
1521: nneg: total number of negative pivots
1522: nzero: total number of zero pivots
1523: npos: (global dimension of F) - nneg - nzero
1524: */
1525: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1526: {
1527: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
1529: PetscMPIInt size;
1532: MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);
1533: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1534: if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1536: if (nneg) *nneg = mumps->id.INFOG(12);
1537: if (nzero || npos) {
1538: if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1539: if (nzero) *nzero = mumps->id.INFOG(28);
1540: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1541: }
1542: return(0);
1543: }
1544: #endif
1546: PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1547: {
1549: PetscInt i,nreqs;
1550: PetscMUMPSInt *irn,*jcn;
1551: PetscMPIInt count;
1552: PetscInt64 totnnz,remain;
1553: const PetscInt osize=mumps->omp_comm_size;
1554: PetscScalar *val;
1557: if (osize > 1) {
1558: if (reuse == MAT_INITIAL_MATRIX) {
1559: /* master first gathers counts of nonzeros to receive */
1560: if (mumps->is_omp_master) {PetscMalloc1(osize,&mumps->recvcount);}
1561: MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);
1563: /* Then each computes number of send/recvs */
1564: if (mumps->is_omp_master) {
1565: /* Start from 1 since self communication is not done in MPI */
1566: nreqs = 0;
1567: for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1568: } else {
1569: nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1570: }
1571: PetscMalloc1(nreqs*3,&mumps->reqs); /* Triple the requests since we send irn, jcn and val seperately */
1573: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1574: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1575: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1576: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1577: */
1578: nreqs = 0; /* counter for actual send/recvs */
1579: if (mumps->is_omp_master) {
1580: for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1581: PetscMalloc2(totnnz,&irn,totnnz,&jcn);
1582: PetscMalloc1(totnnz,&val);
1584: /* Self communication */
1585: PetscArraycpy(irn,mumps->irn,mumps->nnz);
1586: PetscArraycpy(jcn,mumps->jcn,mumps->nnz);
1587: PetscArraycpy(val,mumps->val,mumps->nnz);
1589: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1590: PetscFree2(mumps->irn,mumps->jcn);
1591: PetscFree(mumps->val_alloc);
1592: mumps->nnz = totnnz;
1593: mumps->irn = irn;
1594: mumps->jcn = jcn;
1595: mumps->val = mumps->val_alloc = val;
1597: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1598: jcn += mumps->recvcount[0];
1599: val += mumps->recvcount[0];
1601: /* Remote communication */
1602: for (i=1; i<osize; i++) {
1603: count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1604: remain = mumps->recvcount[i] - count;
1605: while (count>0) {
1606: MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1607: MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1608: MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1609: irn += count;
1610: jcn += count;
1611: val += count;
1612: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1613: remain -= count;
1614: }
1615: }
1616: } else {
1617: irn = mumps->irn;
1618: jcn = mumps->jcn;
1619: val = mumps->val;
1620: count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1621: remain = mumps->nnz - count;
1622: while (count>0) {
1623: MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1624: MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1625: MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1626: irn += count;
1627: jcn += count;
1628: val += count;
1629: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1630: remain -= count;
1631: }
1632: }
1633: } else {
1634: nreqs = 0;
1635: if (mumps->is_omp_master) {
1636: val = mumps->val + mumps->recvcount[0];
1637: for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1638: count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1639: remain = mumps->recvcount[i] - count;
1640: while (count>0) {
1641: MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1642: val += count;
1643: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1644: remain -= count;
1645: }
1646: }
1647: } else {
1648: val = mumps->val;
1649: count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1650: remain = mumps->nnz - count;
1651: while (count>0) {
1652: MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1653: val += count;
1654: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1655: remain -= count;
1656: }
1657: }
1658: }
1659: MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);
1660: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1661: }
1662: return(0);
1663: }
1665: PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1666: {
1667: Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data;
1669: PetscBool isMPIAIJ;
1672: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1673: if (mumps->id.INFOG(1) == -6) {
1674: PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1675: }
1676: PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1677: return(0);
1678: }
1680: (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);
1681: MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);
1683: /* numerical factorization phase */
1684: /*-------------------------------*/
1685: mumps->id.job = JOB_FACTNUMERIC;
1686: if (!mumps->id.ICNTL(18)) { /* A is centralized */
1687: if (!mumps->myid) {
1688: mumps->id.a = (MumpsScalar*)mumps->val;
1689: }
1690: } else {
1691: mumps->id.a_loc = (MumpsScalar*)mumps->val;
1692: }
1693: PetscMUMPS_c(mumps);
1694: if (mumps->id.INFOG(1) < 0) {
1695: if (A->erroriffailure) {
1696: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1697: } else {
1698: if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1699: PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1700: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1701: } else if (mumps->id.INFOG(1) == -13) {
1702: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1703: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1704: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1705: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));
1706: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1707: } else {
1708: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1709: F->factorerrortype = MAT_FACTOR_OTHER;
1710: }
1711: }
1712: }
1713: if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1715: F->assembled = PETSC_TRUE;
1716: mumps->matstruc = SAME_NONZERO_PATTERN;
1717: if (F->schur) { /* reset Schur status to unfactored */
1718: #if defined(PETSC_HAVE_CUDA)
1719: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1720: #endif
1721: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1722: mumps->id.ICNTL(19) = 2;
1723: MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);
1724: }
1725: MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);
1726: }
1728: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1729: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1731: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1732: if (mumps->petsc_size > 1) {
1733: PetscInt lsol_loc;
1734: PetscScalar *sol_loc;
1736: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);
1738: /* distributed solution; Create x_seq=sol_loc for repeated use */
1739: if (mumps->x_seq) {
1740: VecScatterDestroy(&mumps->scat_sol);
1741: PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);
1742: VecDestroy(&mumps->x_seq);
1743: }
1744: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1745: PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);
1746: mumps->id.lsol_loc = lsol_loc;
1747: mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1748: VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);
1749: }
1750: PetscLogFlops(mumps->id.RINFO(2));
1751: return(0);
1752: }
1754: /* Sets MUMPS options from the options database */
1755: PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1756: {
1757: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1759: PetscMUMPSInt icntl=0;
1760: PetscInt info[80],i,ninfo=80;
1761: PetscBool flg=PETSC_FALSE;
1764: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");
1765: PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);
1766: if (flg) mumps->id.ICNTL(1) = icntl;
1767: PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);
1768: if (flg) mumps->id.ICNTL(2) = icntl;
1769: PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);
1770: if (flg) mumps->id.ICNTL(3) = icntl;
1772: PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);
1773: if (flg) mumps->id.ICNTL(4) = icntl;
1774: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1776: PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);
1777: if (flg) mumps->id.ICNTL(6) = icntl;
1779: PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=Petsc (sequential only), 3=Scotch, 4=PORD, 5=Metis, 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);
1780: if (flg) {
1781: if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n");
1782: else mumps->id.ICNTL(7) = icntl;
1783: }
1785: PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);
1786: /* PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL); handled by MatSolveTranspose_MUMPS() */
1787: PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);
1788: PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);
1789: PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);
1790: PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);
1791: PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);
1792: PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);
1793: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1794: MatDestroy(&F->schur);
1795: MatMumpsResetSchur_Private(mumps);
1796: }
1797: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL); -- sparse rhs is not supported in PETSc API */
1798: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL); we only use distributed solution vector */
1800: PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);
1801: PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);
1802: PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);
1803: if (mumps->id.ICNTL(24)) {
1804: mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1805: }
1807: PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);
1808: PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);
1809: PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);
1810: PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);
1811: PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);
1812: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL); */ /* call MatMumpsGetInverse() directly */
1813: PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);
1814: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL); -- not supported by PETSc API */
1815: PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);
1816: PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);
1817: PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);
1818: PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);
1820: PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);
1821: PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);
1822: PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);
1823: PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);
1824: PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);
1825: PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);
1827: PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);
1829: PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);
1830: if (ninfo) {
1831: if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1832: PetscMalloc1(ninfo,&mumps->info);
1833: mumps->ninfo = ninfo;
1834: for (i=0; i<ninfo; i++) {
1835: if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1836: else mumps->info[i] = info[i];
1837: }
1838: }
1840: PetscOptionsEnd();
1841: return(0);
1842: }
1844: PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1845: {
1847: PetscInt nthreads=0;
1850: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1851: MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);
1852: MPI_Comm_rank(mumps->petsc_comm,&mumps->myid); /* so that code like "if (!myid)" still works even if mumps_comm is different */
1854: PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);
1855: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1856: PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);
1857: if (mumps->use_petsc_omp_support) {
1858: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1859: PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);
1860: PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);
1861: #else
1862: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n");
1863: #endif
1864: } else {
1865: mumps->omp_comm = PETSC_COMM_SELF;
1866: mumps->mumps_comm = mumps->petsc_comm;
1867: mumps->is_omp_master = PETSC_TRUE;
1868: }
1869: MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);
1870: mumps->reqs = NULL;
1871: mumps->tag = 0;
1873: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1874: mumps->id.job = JOB_INIT;
1875: mumps->id.par = 1; /* host participates factorizaton and solve */
1876: mumps->id.sym = mumps->sym;
1878: PetscMUMPS_c(mumps);
1879: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1881: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1882: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1883: */
1884: MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm); /* see MUMPS-5.1.2 Manual Section 9 */
1885: MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);
1887: mumps->scat_rhs = NULL;
1888: mumps->scat_sol = NULL;
1890: /* set PETSc-MUMPS default options - override MUMPS default */
1891: mumps->id.ICNTL(3) = 0;
1892: mumps->id.ICNTL(4) = 0;
1893: if (mumps->petsc_size == 1) {
1894: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1895: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
1896: } else {
1897: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1898: mumps->id.ICNTL(20) = 0; /* rhs is in dense format */
1899: mumps->id.ICNTL(21) = 1; /* distributed solution */
1900: }
1902: /* schur */
1903: mumps->id.size_schur = 0;
1904: mumps->id.listvar_schur = NULL;
1905: mumps->id.schur = NULL;
1906: mumps->sizeredrhs = 0;
1907: mumps->schur_sol = NULL;
1908: mumps->schur_sizesol = 0;
1909: return(0);
1910: }
1912: PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1913: {
1917: if (mumps->id.INFOG(1) < 0) {
1918: if (A->erroriffailure) {
1919: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1920: } else {
1921: if (mumps->id.INFOG(1) == -6) {
1922: PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1923: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1924: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1925: PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1926: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1927: } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1928: PetscInfo(F,"Empty matrix\n");
1929: } else {
1930: PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1931: F->factorerrortype = MAT_FACTOR_OTHER;
1932: }
1933: }
1934: }
1935: return(0);
1936: }
1938: /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */
1939: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1940: {
1941: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1943: Vec b;
1944: const PetscInt M = A->rmap->N;
1947: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1949: /* Set MUMPS options from the options database */
1950: PetscSetMUMPSFromOptions(F,A);
1952: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
1953: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
1955: /* analysis phase */
1956: /*----------------*/
1957: mumps->id.job = JOB_FACTSYMBOLIC;
1958: mumps->id.n = M;
1959: switch (mumps->id.ICNTL(18)) {
1960: case 0: /* centralized assembled matrix input */
1961: if (!mumps->myid) {
1962: mumps->id.nnz = mumps->nnz;
1963: mumps->id.irn = mumps->irn;
1964: mumps->id.jcn = mumps->jcn;
1965: if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1966: if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */
1967: if (!mumps->myid) {
1968: const PetscInt *idx;
1969: PetscInt i;
1971: PetscMalloc1(M,&mumps->id.perm_in);
1972: ISGetIndices(r,&idx);
1973: for (i=0; i<M; i++) {PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));} /* perm_in[]: start from 1, not 0! */
1974: ISRestoreIndices(r,&idx);
1975: }
1976: }
1977: }
1978: break;
1979: case 3: /* distributed assembled matrix input (size>1) */
1980: mumps->id.nnz_loc = mumps->nnz;
1981: mumps->id.irn_loc = mumps->irn;
1982: mumps->id.jcn_loc = mumps->jcn;
1983: if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1984: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1985: MatCreateVecs(A,NULL,&b);
1986: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
1987: VecDestroy(&b);
1988: break;
1989: }
1990: PetscMUMPS_c(mumps);
1991: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
1993: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1994: F->ops->solve = MatSolve_MUMPS;
1995: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
1996: F->ops->matsolve = MatMatSolve_MUMPS;
1997: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1998: return(0);
1999: }
2001: /* Note the Petsc r and c permutations are ignored */
2002: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2003: {
2004: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2006: Vec b;
2007: const PetscInt M = A->rmap->N;
2010: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2012: /* Set MUMPS options from the options database */
2013: PetscSetMUMPSFromOptions(F,A);
2015: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2016: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
2018: /* analysis phase */
2019: /*----------------*/
2020: mumps->id.job = JOB_FACTSYMBOLIC;
2021: mumps->id.n = M;
2022: switch (mumps->id.ICNTL(18)) {
2023: case 0: /* centralized assembled matrix input */
2024: if (!mumps->myid) {
2025: mumps->id.nnz = mumps->nnz;
2026: mumps->id.irn = mumps->irn;
2027: mumps->id.jcn = mumps->jcn;
2028: if (mumps->id.ICNTL(6)>1) {
2029: mumps->id.a = (MumpsScalar*)mumps->val;
2030: }
2031: }
2032: break;
2033: case 3: /* distributed assembled matrix input (size>1) */
2034: mumps->id.nnz_loc = mumps->nnz;
2035: mumps->id.irn_loc = mumps->irn;
2036: mumps->id.jcn_loc = mumps->jcn;
2037: if (mumps->id.ICNTL(6)>1) {
2038: mumps->id.a_loc = (MumpsScalar*)mumps->val;
2039: }
2040: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2041: MatCreateVecs(A,NULL,&b);
2042: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
2043: VecDestroy(&b);
2044: break;
2045: }
2046: PetscMUMPS_c(mumps);
2047: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2049: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2050: F->ops->solve = MatSolve_MUMPS;
2051: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2052: return(0);
2053: }
2055: /* Note the Petsc r permutation and factor info are ignored */
2056: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2057: {
2058: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2060: Vec b;
2061: const PetscInt M = A->rmap->N;
2064: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2066: /* Set MUMPS options from the options database */
2067: PetscSetMUMPSFromOptions(F,A);
2069: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2070: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
2072: /* analysis phase */
2073: /*----------------*/
2074: mumps->id.job = JOB_FACTSYMBOLIC;
2075: mumps->id.n = M;
2076: switch (mumps->id.ICNTL(18)) {
2077: case 0: /* centralized assembled matrix input */
2078: if (!mumps->myid) {
2079: mumps->id.nnz = mumps->nnz;
2080: mumps->id.irn = mumps->irn;
2081: mumps->id.jcn = mumps->jcn;
2082: if (mumps->id.ICNTL(6)>1) {
2083: mumps->id.a = (MumpsScalar*)mumps->val;
2084: }
2085: }
2086: break;
2087: case 3: /* distributed assembled matrix input (size>1) */
2088: mumps->id.nnz_loc = mumps->nnz;
2089: mumps->id.irn_loc = mumps->irn;
2090: mumps->id.jcn_loc = mumps->jcn;
2091: if (mumps->id.ICNTL(6)>1) {
2092: mumps->id.a_loc = (MumpsScalar*)mumps->val;
2093: }
2094: /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2095: MatCreateVecs(A,NULL,&b);
2096: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
2097: VecDestroy(&b);
2098: break;
2099: }
2100: PetscMUMPS_c(mumps);
2101: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2103: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2104: F->ops->solve = MatSolve_MUMPS;
2105: F->ops->solvetranspose = MatSolve_MUMPS;
2106: F->ops->matsolve = MatMatSolve_MUMPS;
2107: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2108: #if defined(PETSC_USE_COMPLEX)
2109: F->ops->getinertia = NULL;
2110: #else
2111: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2112: #endif
2113: return(0);
2114: }
2116: PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2117: {
2118: PetscErrorCode ierr;
2119: PetscBool iascii;
2120: PetscViewerFormat format;
2121: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
2124: /* check if matrix is mumps type */
2125: if (A->ops->solve != MatSolve_MUMPS) return(0);
2127: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
2128: if (iascii) {
2129: PetscViewerGetFormat(viewer,&format);
2130: if (format == PETSC_VIEWER_ASCII_INFO) {
2131: PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");
2132: PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);
2133: PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);
2134: PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));
2135: PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));
2136: PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));
2137: PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));
2138: PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));
2139: PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));
2140: PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));
2141: PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));
2142: PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));
2143: PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));
2144: if (mumps->id.ICNTL(11)>0) {
2145: PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));
2146: PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));
2147: PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));
2148: PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));
2149: PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));
2150: PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));
2151: }
2152: PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));
2153: PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));
2154: PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));
2155: /* ICNTL(15-17) not used */
2156: PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));
2157: PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));
2158: PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d \n",mumps->id.ICNTL(20));
2159: PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));
2160: PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));
2161: PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));
2163: PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));
2164: PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));
2165: PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d \n",mumps->id.ICNTL(26));
2166: PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d \n",mumps->id.ICNTL(27));
2167: PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));
2168: PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));
2170: PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));
2171: PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));
2172: PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));
2173: PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));
2174: PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));
2175: PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));
2177: PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));
2178: PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));
2179: PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));
2180: PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));
2181: PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));
2182: PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));
2184: /* infomation local to each processor */
2185: PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");
2186: PetscViewerASCIIPushSynchronized(viewer);
2187: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));
2188: PetscViewerFlush(viewer);
2189: PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");
2190: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));
2191: PetscViewerFlush(viewer);
2192: PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");
2193: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));
2194: PetscViewerFlush(viewer);
2196: PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");
2197: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));
2198: PetscViewerFlush(viewer);
2200: PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");
2201: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));
2202: PetscViewerFlush(viewer);
2204: PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");
2205: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));
2206: PetscViewerFlush(viewer);
2208: if (mumps->ninfo && mumps->ninfo <= 80){
2209: PetscInt i;
2210: for (i=0; i<mumps->ninfo; i++){
2211: PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);
2212: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));
2213: PetscViewerFlush(viewer);
2214: }
2215: }
2216: PetscViewerASCIIPopSynchronized(viewer);
2218: if (!mumps->myid) { /* information from the host */
2219: PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));
2220: PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));
2221: PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));
2222: PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));
2224: PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));
2225: PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));
2226: PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));
2227: PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));
2228: PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));
2229: PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));
2230: PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));
2231: PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));
2232: PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));
2233: PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));
2234: PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));
2235: PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));
2236: PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));
2237: PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));
2238: PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));
2239: PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));
2240: PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));
2241: PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));
2242: PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));
2243: PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));
2244: PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));
2245: PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));
2246: PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));
2247: PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));
2248: PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));
2249: PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));
2250: PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));
2251: PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));
2252: PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));
2253: PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));
2254: PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));
2255: PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));
2256: PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));
2257: PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));
2258: }
2259: }
2260: }
2261: return(0);
2262: }
2264: PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2265: {
2266: Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2269: info->block_size = 1.0;
2270: info->nz_allocated = mumps->id.INFOG(20);
2271: info->nz_used = mumps->id.INFOG(20);
2272: info->nz_unneeded = 0.0;
2273: info->assemblies = 0.0;
2274: info->mallocs = 0.0;
2275: info->memory = 0.0;
2276: info->fill_ratio_given = 0;
2277: info->fill_ratio_needed = 0;
2278: info->factor_mallocs = 0;
2279: return(0);
2280: }
2282: /* -------------------------------------------------------------------------------------------*/
2283: PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2284: {
2285: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2286: const PetscScalar *arr;
2287: const PetscInt *idxs;
2288: PetscInt size,i;
2289: PetscErrorCode ierr;
2292: ISGetLocalSize(is,&size);
2293: if (mumps->petsc_size > 1) {
2294: PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2296: ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2297: MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);
2298: if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
2299: }
2301: /* Schur complement matrix */
2302: MatDestroy(&F->schur);
2303: MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);
2304: MatDenseGetArrayRead(F->schur,&arr);
2305: mumps->id.schur = (MumpsScalar*)arr;
2306: mumps->id.size_schur = size;
2307: mumps->id.schur_lld = size;
2308: MatDenseRestoreArrayRead(F->schur,&arr);
2309: if (mumps->sym == 1) {
2310: MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);
2311: }
2313: /* MUMPS expects Fortran style indices */
2314: PetscFree(mumps->id.listvar_schur);
2315: PetscMalloc1(size,&mumps->id.listvar_schur);
2316: ISGetIndices(is,&idxs);
2317: for (i=0; i<size; i++) {PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));}
2318: ISRestoreIndices(is,&idxs);
2319: if (mumps->petsc_size > 1) {
2320: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2321: } else {
2322: if (F->factortype == MAT_FACTOR_LU) {
2323: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2324: } else {
2325: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2326: }
2327: }
2328: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2329: mumps->id.ICNTL(26) = -1;
2330: return(0);
2331: }
2333: /* -------------------------------------------------------------------------------------------*/
2334: PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2335: {
2336: Mat St;
2337: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2338: PetscScalar *array;
2339: #if defined(PETSC_USE_COMPLEX)
2340: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2341: #endif
2345: if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2346: MatCreate(PETSC_COMM_SELF,&St);
2347: MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);
2348: MatSetType(St,MATDENSE);
2349: MatSetUp(St);
2350: MatDenseGetArray(St,&array);
2351: if (!mumps->sym) { /* MUMPS always return a full matrix */
2352: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2353: PetscInt i,j,N=mumps->id.size_schur;
2354: for (i=0;i<N;i++) {
2355: for (j=0;j<N;j++) {
2356: #if !defined(PETSC_USE_COMPLEX)
2357: PetscScalar val = mumps->id.schur[i*N+j];
2358: #else
2359: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2360: #endif
2361: array[j*N+i] = val;
2362: }
2363: }
2364: } else { /* stored by columns */
2365: PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);
2366: }
2367: } else { /* either full or lower-triangular (not packed) */
2368: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2369: PetscInt i,j,N=mumps->id.size_schur;
2370: for (i=0;i<N;i++) {
2371: for (j=i;j<N;j++) {
2372: #if !defined(PETSC_USE_COMPLEX)
2373: PetscScalar val = mumps->id.schur[i*N+j];
2374: #else
2375: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2376: #endif
2377: array[i*N+j] = val;
2378: array[j*N+i] = val;
2379: }
2380: }
2381: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2382: PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);
2383: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2384: PetscInt i,j,N=mumps->id.size_schur;
2385: for (i=0;i<N;i++) {
2386: for (j=0;j<i+1;j++) {
2387: #if !defined(PETSC_USE_COMPLEX)
2388: PetscScalar val = mumps->id.schur[i*N+j];
2389: #else
2390: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2391: #endif
2392: array[i*N+j] = val;
2393: array[j*N+i] = val;
2394: }
2395: }
2396: }
2397: }
2398: MatDenseRestoreArray(St,&array);
2399: *S = St;
2400: return(0);
2401: }
2403: /* -------------------------------------------------------------------------------------------*/
2404: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2405: {
2407: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2410: PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));
2411: return(0);
2412: }
2414: PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2415: {
2416: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2419: *ival = mumps->id.ICNTL(icntl);
2420: return(0);
2421: }
2423: /*@
2424: MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2426: Logically Collective on Mat
2428: Input Parameters:
2429: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2430: . icntl - index of MUMPS parameter array ICNTL()
2431: - ival - value of MUMPS ICNTL(icntl)
2433: Options Database:
2434: . -mat_mumps_icntl_<icntl> <ival>
2436: Level: beginner
2438: References:
2439: . MUMPS Users' Guide
2441: .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2442: @*/
2443: PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2444: {
2449: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2452: PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2453: return(0);
2454: }
2456: /*@
2457: MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2459: Logically Collective on Mat
2461: Input Parameters:
2462: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2463: - icntl - index of MUMPS parameter array ICNTL()
2465: Output Parameter:
2466: . ival - value of MUMPS ICNTL(icntl)
2468: Level: beginner
2470: References:
2471: . MUMPS Users' Guide
2473: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2474: @*/
2475: PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2476: {
2481: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2484: PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2485: return(0);
2486: }
2488: /* -------------------------------------------------------------------------------------------*/
2489: PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2490: {
2491: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2494: mumps->id.CNTL(icntl) = val;
2495: return(0);
2496: }
2498: PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2499: {
2500: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2503: *val = mumps->id.CNTL(icntl);
2504: return(0);
2505: }
2507: /*@
2508: MatMumpsSetCntl - Set MUMPS parameter CNTL()
2510: Logically Collective on Mat
2512: Input Parameters:
2513: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2514: . icntl - index of MUMPS parameter array CNTL()
2515: - val - value of MUMPS CNTL(icntl)
2517: Options Database:
2518: . -mat_mumps_cntl_<icntl> <val>
2520: Level: beginner
2522: References:
2523: . MUMPS Users' Guide
2525: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2526: @*/
2527: PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2528: {
2533: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2536: PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2537: return(0);
2538: }
2540: /*@
2541: MatMumpsGetCntl - Get MUMPS parameter CNTL()
2543: Logically Collective on Mat
2545: Input Parameters:
2546: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2547: - icntl - index of MUMPS parameter array CNTL()
2549: Output Parameter:
2550: . val - value of MUMPS CNTL(icntl)
2552: Level: beginner
2554: References:
2555: . MUMPS Users' Guide
2557: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2558: @*/
2559: PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2560: {
2565: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2568: PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2569: return(0);
2570: }
2572: PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2573: {
2574: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2577: *info = mumps->id.INFO(icntl);
2578: return(0);
2579: }
2581: PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2582: {
2583: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2586: *infog = mumps->id.INFOG(icntl);
2587: return(0);
2588: }
2590: PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2591: {
2592: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2595: *rinfo = mumps->id.RINFO(icntl);
2596: return(0);
2597: }
2599: PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2600: {
2601: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2604: *rinfog = mumps->id.RINFOG(icntl);
2605: return(0);
2606: }
2608: PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2609: {
2611: Mat Bt = NULL,Btseq = NULL;
2612: PetscBool flg;
2613: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2614: PetscScalar *aa;
2615: PetscInt spnr,*ia,*ja,M,nrhs;
2619: PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);
2620: if (flg) {
2621: MatTransposeGetMat(spRHS,&Bt);
2622: } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2624: MatMumpsSetIcntl(F,30,1);
2626: if (mumps->petsc_size > 1) {
2627: Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2628: Btseq = b->A;
2629: } else {
2630: Btseq = Bt;
2631: }
2633: MatGetSize(spRHS,&M,&nrhs);
2634: mumps->id.nrhs = nrhs;
2635: mumps->id.lrhs = M;
2636: mumps->id.rhs = NULL;
2638: if (!mumps->myid) {
2639: MatSeqAIJGetArray(Btseq,&aa);
2640: MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
2641: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2642: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
2643: mumps->id.rhs_sparse = (MumpsScalar*)aa;
2644: } else {
2645: mumps->id.irhs_ptr = NULL;
2646: mumps->id.irhs_sparse = NULL;
2647: mumps->id.nz_rhs = 0;
2648: mumps->id.rhs_sparse = NULL;
2649: }
2650: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2651: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2653: /* solve phase */
2654: /*-------------*/
2655: mumps->id.job = JOB_SOLVE;
2656: PetscMUMPS_c(mumps);
2657: if (mumps->id.INFOG(1) < 0)
2658: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2660: if (!mumps->myid) {
2661: MatSeqAIJRestoreArray(Btseq,&aa);
2662: MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
2663: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2664: }
2665: return(0);
2666: }
2668: /*@
2669: MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2671: Logically Collective on Mat
2673: Input Parameters:
2674: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2675: - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2677: Output Parameter:
2678: . spRHS - requested entries of inverse of A
2680: Level: beginner
2682: References:
2683: . MUMPS Users' Guide
2685: .seealso: MatGetFactor(), MatCreateTranspose()
2686: @*/
2687: PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2688: {
2693: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2694: PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2695: return(0);
2696: }
2698: PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2699: {
2701: Mat spRHS;
2704: MatCreateTranspose(spRHST,&spRHS);
2705: MatMumpsGetInverse_MUMPS(F,spRHS);
2706: MatDestroy(&spRHS);
2707: return(0);
2708: }
2710: /*@
2711: MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2713: Logically Collective on Mat
2715: Input Parameters:
2716: + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2717: - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2719: Output Parameter:
2720: . spRHST - requested entries of inverse of A^T
2722: Level: beginner
2724: References:
2725: . MUMPS Users' Guide
2727: .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2728: @*/
2729: PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2730: {
2732: PetscBool flg;
2736: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2737: PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);
2738: if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2740: PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2741: return(0);
2742: }
2744: /*@
2745: MatMumpsGetInfo - Get MUMPS parameter INFO()
2747: Logically Collective on Mat
2749: Input Parameters:
2750: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2751: - icntl - index of MUMPS parameter array INFO()
2753: Output Parameter:
2754: . ival - value of MUMPS INFO(icntl)
2756: Level: beginner
2758: References:
2759: . MUMPS Users' Guide
2761: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2762: @*/
2763: PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2764: {
2769: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2771: PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2772: return(0);
2773: }
2775: /*@
2776: MatMumpsGetInfog - Get MUMPS parameter INFOG()
2778: Logically Collective on Mat
2780: Input Parameters:
2781: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2782: - icntl - index of MUMPS parameter array INFOG()
2784: Output Parameter:
2785: . ival - value of MUMPS INFOG(icntl)
2787: Level: beginner
2789: References:
2790: . MUMPS Users' Guide
2792: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2793: @*/
2794: PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2795: {
2800: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2802: PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2803: return(0);
2804: }
2806: /*@
2807: MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2809: Logically Collective on Mat
2811: Input Parameters:
2812: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2813: - icntl - index of MUMPS parameter array RINFO()
2815: Output Parameter:
2816: . val - value of MUMPS RINFO(icntl)
2818: Level: beginner
2820: References:
2821: . MUMPS Users' Guide
2823: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2824: @*/
2825: PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2826: {
2831: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2833: PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2834: return(0);
2835: }
2837: /*@
2838: MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2840: Logically Collective on Mat
2842: Input Parameters:
2843: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2844: - icntl - index of MUMPS parameter array RINFOG()
2846: Output Parameter:
2847: . val - value of MUMPS RINFOG(icntl)
2849: Level: beginner
2851: References:
2852: . MUMPS Users' Guide
2854: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2855: @*/
2856: PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2857: {
2862: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2864: PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2865: return(0);
2866: }
2868: /*MC
2869: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
2870: distributed and sequential matrices via the external package MUMPS.
2872: Works with MATAIJ and MATSBAIJ matrices
2874: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2876: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2878: Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2880: Options Database Keys:
2881: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2882: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2883: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
2884: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
2885: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2886: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis
2887: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
2888: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
2889: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2890: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2891: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2892: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
2893: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
2894: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2895: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2896: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
2897: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2898: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
2899: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2900: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2901: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2902: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2903: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2904: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2905: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2906: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2907: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
2908: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
2909: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2910: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2911: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2912: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2913: - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2914: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2916: If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1
2918: Level: beginner
2920: Notes:
2921: MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2923: When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2924: $ KSPGetPC(ksp,&pc);
2925: $ PCFactorGetMatrix(pc,&mat);
2926: $ MatMumpsGetInfo(mat,....);
2927: $ MatMumpsGetInfog(mat,....); etc.
2928: Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2930: Two modes to run MUMPS/PETSc with OpenMP
2932: $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2933: $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2935: $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2936: $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2938: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2939: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2940: (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2941: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2942: (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2944: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2945: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2946: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2947: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2948: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2949: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2950: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2951: MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2952: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2953: problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2954: For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2955: examine the mapping result.
2957: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2958: for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2959: calls omp_set_num_threads(m) internally before calling MUMPS.
2961: References:
2962: + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2963: - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2965: .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix()
2967: M*/
2969: static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2970: {
2972: *type = MATSOLVERMUMPS;
2973: return(0);
2974: }
2976: /* MatGetFactor for Seq and MPI AIJ matrices */
2977: static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2978: {
2979: Mat B;
2981: Mat_MUMPS *mumps;
2982: PetscBool isSeqAIJ;
2983: PetscMPIInt size;
2986: #if defined(PETSC_USE_COMPLEX)
2987: if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2988: #endif
2989: /* Create the factorization matrix */
2990: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
2991: MatCreate(PetscObjectComm((PetscObject)A),&B);
2992: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
2993: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
2994: MatSetUp(B);
2996: PetscNewLog(B,&mumps);
2998: B->ops->view = MatView_MUMPS;
2999: B->ops->getinfo = MatGetInfo_MUMPS;
3001: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3002: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3003: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3004: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3005: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3006: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3007: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3008: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3009: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3010: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3011: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3012: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3013: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3015: if (ftype == MAT_FACTOR_LU) {
3016: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3017: B->factortype = MAT_FACTOR_LU;
3018: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3019: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3020: mumps->sym = 0;
3021: } else {
3022: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3023: B->factortype = MAT_FACTOR_CHOLESKY;
3024: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3025: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3026: #if defined(PETSC_USE_COMPLEX)
3027: mumps->sym = 2;
3028: #else
3029: if (A->spd_set && A->spd) mumps->sym = 1;
3030: else mumps->sym = 2;
3031: #endif
3032: }
3034: /* set solvertype */
3035: PetscFree(B->solvertype);
3036: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3037: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3038: if (size == 1) {
3039: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3040: B->useordering = PETSC_TRUE;
3041: }
3043: B->ops->destroy = MatDestroy_MUMPS;
3044: B->data = (void*)mumps;
3046: PetscInitializeMUMPS(A,mumps);
3048: *F = B;
3049: return(0);
3050: }
3052: /* MatGetFactor for Seq and MPI SBAIJ matrices */
3053: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3054: {
3055: Mat B;
3057: Mat_MUMPS *mumps;
3058: PetscBool isSeqSBAIJ;
3059: PetscMPIInt size;
3062: #if defined(PETSC_USE_COMPLEX)
3063: if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3064: #endif
3065: MatCreate(PetscObjectComm((PetscObject)A),&B);
3066: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3067: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3068: MatSetUp(B);
3070: PetscNewLog(B,&mumps);
3071: PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
3072: if (isSeqSBAIJ) {
3073: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3074: } else {
3075: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3076: }
3078: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3079: B->ops->view = MatView_MUMPS;
3080: B->ops->getinfo = MatGetInfo_MUMPS;
3082: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3083: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3084: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3085: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3086: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3087: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3088: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3089: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3090: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3091: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3092: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3093: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3094: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3096: B->factortype = MAT_FACTOR_CHOLESKY;
3097: #if defined(PETSC_USE_COMPLEX)
3098: mumps->sym = 2;
3099: #else
3100: if (A->spd_set && A->spd) mumps->sym = 1;
3101: else mumps->sym = 2;
3102: #endif
3104: /* set solvertype */
3105: PetscFree(B->solvertype);
3106: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3107: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3108: if (size == 1) {
3109: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3110: B->useordering = PETSC_TRUE;
3111: }
3113: B->ops->destroy = MatDestroy_MUMPS;
3114: B->data = (void*)mumps;
3116: PetscInitializeMUMPS(A,mumps);
3118: *F = B;
3119: return(0);
3120: }
3122: static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3123: {
3124: Mat B;
3126: Mat_MUMPS *mumps;
3127: PetscBool isSeqBAIJ;
3128: PetscMPIInt size;
3131: /* Create the factorization matrix */
3132: PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
3133: MatCreate(PetscObjectComm((PetscObject)A),&B);
3134: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3135: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3136: MatSetUp(B);
3138: PetscNewLog(B,&mumps);
3139: if (ftype == MAT_FACTOR_LU) {
3140: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3141: B->factortype = MAT_FACTOR_LU;
3142: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3143: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3144: mumps->sym = 0;
3145: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
3147: B->ops->view = MatView_MUMPS;
3148: B->ops->getinfo = MatGetInfo_MUMPS;
3150: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3151: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3152: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3153: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3154: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3155: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3156: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3157: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3158: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3159: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3160: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3161: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3162: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3164: /* set solvertype */
3165: PetscFree(B->solvertype);
3166: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3167: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3168: if (size == 1) {
3169: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3170: B->useordering = PETSC_TRUE;
3171: }
3173: B->ops->destroy = MatDestroy_MUMPS;
3174: B->data = (void*)mumps;
3176: PetscInitializeMUMPS(A,mumps);
3178: *F = B;
3179: return(0);
3180: }
3182: /* MatGetFactor for Seq and MPI SELL matrices */
3183: static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3184: {
3185: Mat B;
3187: Mat_MUMPS *mumps;
3188: PetscBool isSeqSELL;
3189: PetscMPIInt size;
3192: /* Create the factorization matrix */
3193: PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);
3194: MatCreate(PetscObjectComm((PetscObject)A),&B);
3195: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3196: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3197: MatSetUp(B);
3199: PetscNewLog(B,&mumps);
3201: B->ops->view = MatView_MUMPS;
3202: B->ops->getinfo = MatGetInfo_MUMPS;
3204: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3205: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3206: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3207: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3208: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3209: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3210: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3211: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3212: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3213: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3214: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3216: if (ftype == MAT_FACTOR_LU) {
3217: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3218: B->factortype = MAT_FACTOR_LU;
3219: if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3220: else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3221: mumps->sym = 0;
3222: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3224: /* set solvertype */
3225: PetscFree(B->solvertype);
3226: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3227: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3228: if (size == 1) {
3229: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3230: B->useordering = PETSC_TRUE;
3231: }
3233: B->ops->destroy = MatDestroy_MUMPS;
3234: B->data = (void*)mumps;
3236: PetscInitializeMUMPS(A,mumps);
3238: *F = B;
3239: return(0);
3240: }
3242: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3243: {
3247: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);
3248: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);
3249: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);
3250: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);
3251: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);
3252: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);
3253: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);
3254: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);
3255: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);
3256: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);
3257: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);
3258: return(0);
3259: }