Actual source code: ispai.c
2: /*
3: 3/99 Modified by Stephen Barnard to support SPAI version 3.0
4: */
6: /*
7: Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8: Code written by Stephen Barnard.
10: Note: there is some BAD memory bleeding below!
12: This code needs work
14: 1) get rid of all memory bleeding
15: 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16: rather than having the sp flag for PC_SPAI
17: 3) fix to set the block size based on the matrix block size
19: */
20: #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
22: #include <petsc/private/pcimpl.h>
23: #include <../src/ksp/pc/impls/spai/petscspai.h>
25: /*
26: These are the SPAI include files
27: */
28: EXTERN_C_BEGIN
29: #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
30: #include <spai.h>
31: #include <matrix.h>
32: EXTERN_C_END
34: extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
35: extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
36: extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *);
37: extern PetscErrorCode MM_to_PETSC(char *, char *, char *);
39: typedef struct {
40: matrix *B; /* matrix in SPAI format */
41: matrix *BT; /* transpose of matrix in SPAI format */
42: matrix *M; /* the approximate inverse in SPAI format */
44: Mat PM; /* the approximate inverse PETSc format */
46: double epsilon; /* tolerance */
47: int nbsteps; /* max number of "improvement" steps per line */
48: int max; /* max dimensions of is_I, q, etc. */
49: int maxnew; /* max number of new entries per step */
50: int block_size; /* constant block size */
51: int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
52: int verbose; /* SPAI prints timing and statistics */
54: int sp; /* symmetric nonzero pattern */
55: MPI_Comm comm_spai; /* communicator to be used with spai */
56: } PC_SPAI;
58: static PetscErrorCode PCSetUp_SPAI(PC pc)
59: {
60: PC_SPAI *ispai = (PC_SPAI *)pc->data;
61: Mat AT;
63: PetscFunctionBegin;
64: init_SPAI();
66: if (ispai->sp) {
67: PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
68: } else {
69: /* Use the transpose to get the column nonzero structure. */
70: PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
71: PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
72: PetscCall(MatDestroy(&AT));
73: }
75: /* Destroy the transpose */
76: /* Don't know how to do it. PETSc developers? */
78: /* construct SPAI preconditioner */
79: /* FILE *messages */ /* file for warning messages */
80: /* double epsilon */ /* tolerance */
81: /* int nbsteps */ /* max number of "improvement" steps per line */
82: /* int max */ /* max dimensions of is_I, q, etc. */
83: /* int maxnew */ /* max number of new entries per step */
84: /* int block_size */ /* block_size == 1 specifies scalar elements
85: block_size == n specifies nxn constant-block elements
86: block_size == 0 specifies variable-block elements */
87: /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
88: /* int verbose */ /* verbose == 0 specifies that SPAI is silent
89: verbose == 1 prints timing and matrix statistics */
91: PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
93: PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
95: /* free the SPAI matrices */
96: sp_free_matrix(ispai->B);
97: sp_free_matrix(ispai->M);
98: PetscFunctionReturn(PETSC_SUCCESS);
99: }
101: static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
102: {
103: PC_SPAI *ispai = (PC_SPAI *)pc->data;
105: PetscFunctionBegin;
106: /* Now using PETSc's multiply */
107: PetscCall(MatMult(ispai->PM, xx, y));
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
111: static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
112: {
113: PC_SPAI *ispai = (PC_SPAI *)pc->data;
115: PetscFunctionBegin;
116: /* Now using PETSc's multiply */
117: PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
118: PetscFunctionReturn(PETSC_SUCCESS);
119: }
121: static PetscErrorCode PCDestroy_SPAI(PC pc)
122: {
123: PC_SPAI *ispai = (PC_SPAI *)pc->data;
125: PetscFunctionBegin;
126: PetscCall(MatDestroy(&ispai->PM));
127: PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
128: PetscCall(PetscFree(pc->data));
129: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
130: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
131: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
132: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
133: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
134: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
135: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
136: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
137: PetscFunctionReturn(PETSC_SUCCESS);
138: }
140: static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
141: {
142: PC_SPAI *ispai = (PC_SPAI *)pc->data;
143: PetscBool iascii;
145: PetscFunctionBegin;
146: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
147: if (iascii) {
148: PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon));
149: PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps));
150: PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max));
151: PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew));
152: PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size));
153: PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size));
154: PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose));
155: PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp));
156: }
157: PetscFunctionReturn(PETSC_SUCCESS);
158: }
160: static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
161: {
162: PC_SPAI *ispai = (PC_SPAI *)pc->data;
164: PetscFunctionBegin;
165: ispai->epsilon = (double)epsilon1;
166: PetscFunctionReturn(PETSC_SUCCESS);
167: }
169: static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
170: {
171: PC_SPAI *ispai = (PC_SPAI *)pc->data;
173: PetscFunctionBegin;
174: ispai->nbsteps = (int)nbsteps1;
175: PetscFunctionReturn(PETSC_SUCCESS);
176: }
178: /* added 1/7/99 g.h. */
179: static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
180: {
181: PC_SPAI *ispai = (PC_SPAI *)pc->data;
183: PetscFunctionBegin;
184: ispai->max = (int)max1;
185: PetscFunctionReturn(PETSC_SUCCESS);
186: }
188: static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
189: {
190: PC_SPAI *ispai = (PC_SPAI *)pc->data;
192: PetscFunctionBegin;
193: ispai->maxnew = (int)maxnew1;
194: PetscFunctionReturn(PETSC_SUCCESS);
195: }
197: static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
198: {
199: PC_SPAI *ispai = (PC_SPAI *)pc->data;
201: PetscFunctionBegin;
202: ispai->block_size = (int)block_size1;
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
207: {
208: PC_SPAI *ispai = (PC_SPAI *)pc->data;
210: PetscFunctionBegin;
211: ispai->cache_size = (int)cache_size;
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
216: {
217: PC_SPAI *ispai = (PC_SPAI *)pc->data;
219: PetscFunctionBegin;
220: ispai->verbose = (int)verbose;
221: PetscFunctionReturn(PETSC_SUCCESS);
222: }
224: static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
225: {
226: PC_SPAI *ispai = (PC_SPAI *)pc->data;
228: PetscFunctionBegin;
229: ispai->sp = (int)sp;
230: PetscFunctionReturn(PETSC_SUCCESS);
231: }
233: /*@
234: PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
236: Input Parameters:
237: + pc - the preconditioner
238: - eps - epsilon (default .4)
240: Note:
241: Espilon must be between 0 and 1. It controls the
242: quality of the approximation of M to the inverse of
243: A. Higher values of epsilon lead to more work, more
244: fill, and usually better preconditioners. In many
245: cases the best choice of epsilon is the one that
246: divides the total solution time equally between the
247: preconditioner and the solver.
249: Level: intermediate
251: .seealso: `PCSPAI`, `PCSetType()`
252: @*/
253: PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
254: {
255: PetscFunctionBegin;
256: PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
257: PetscFunctionReturn(PETSC_SUCCESS);
258: }
260: /*@
261: PCSPAISetNBSteps - set maximum number of improvement steps per row in
262: the `PCSPAI` preconditioner
264: Input Parameters:
265: + pc - the preconditioner
266: - n - number of steps (default 5)
268: Note:
269: `PCSPAI` constructs to approximation to every column of
270: the exact inverse of A in a series of improvement
271: steps. The quality of the approximation is determined
272: by epsilon. If an approximation achieving an accuracy
273: of epsilon is not obtained after ns steps, SPAI simply
274: uses the best approximation constructed so far.
276: Level: intermediate
278: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
279: @*/
280: PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
281: {
282: PetscFunctionBegin;
283: PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
284: PetscFunctionReturn(PETSC_SUCCESS);
285: }
287: /* added 1/7/99 g.h. */
288: /*@
289: PCSPAISetMax - set the size of various working buffers in
290: the `PCSPAI` preconditioner
292: Input Parameters:
293: + pc - the preconditioner
294: - n - size (default is 5000)
296: Level: intermediate
298: .seealso: `PCSPAI`, `PCSetType()`
299: @*/
300: PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
301: {
302: PetscFunctionBegin;
303: PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
304: PetscFunctionReturn(PETSC_SUCCESS);
305: }
307: /*@
308: PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
309: in `PCSPAI` preconditioner
311: Input Parameters:
312: + pc - the preconditioner
313: - n - maximum number (default 5)
315: Level: intermediate
317: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
318: @*/
319: PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
320: {
321: PetscFunctionBegin;
322: PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
323: PetscFunctionReturn(PETSC_SUCCESS);
324: }
326: /*@
327: PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
329: Input Parameters:
330: + pc - the preconditioner
331: - n - block size (default 1)
333: Notes:
334: A block
335: size of 1 treats A as a matrix of scalar elements. A
336: block size of s > 1 treats A as a matrix of sxs
337: blocks. A block size of 0 treats A as a matrix with
338: variable sized blocks, which are determined by
339: searching for dense square diagonal blocks in A.
340: This can be very effective for finite-element
341: matrices.
343: SPAI will convert A to block form, use a block
344: version of the preconditioner algorithm, and then
345: convert the result back to scalar form.
347: In many cases the a block-size parameter other than 1
348: can lead to very significant improvement in
349: performance.
351: Level: intermediate
353: .seealso: `PCSPAI`, `PCSetType()`
354: @*/
355: PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
356: {
357: PetscFunctionBegin;
358: PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
359: PetscFunctionReturn(PETSC_SUCCESS);
360: }
362: /*@
363: PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
365: Input Parameters:
366: + pc - the preconditioner
367: - n - cache size {0,1,2,3,4,5} (default 5)
369: Note:
370: `PCSPAI` uses a hash table to cache messages and avoid
371: redundant communication. If suggest always using
372: 5. This parameter is irrelevant in the serial
373: version.
375: Level: intermediate
377: .seealso: `PCSPAI`, `PCSetType()`
378: @*/
379: PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
380: {
381: PetscFunctionBegin;
382: PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
383: PetscFunctionReturn(PETSC_SUCCESS);
384: }
386: /*@
387: PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
389: Input Parameters:
390: + pc - the preconditioner
391: - n - level (default 1)
393: Note:
394: print parameters, timings and matrix statistics
396: Level: intermediate
398: .seealso: `PCSPAI`, `PCSetType()`
399: @*/
400: PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
401: {
402: PetscFunctionBegin;
403: PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
404: PetscFunctionReturn(PETSC_SUCCESS);
405: }
407: /*@
408: PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
410: Input Parameters:
411: + pc - the preconditioner
412: - n - 0 or 1
414: Note:
415: If A has a symmetric nonzero pattern use -sp 1 to
416: improve performance by eliminating some communication
417: in the parallel version. Even if A does not have a
418: symmetric nonzero pattern -sp 1 may well lead to good
419: results, but the code will not follow the published
420: SPAI algorithm exactly.
422: Level: intermediate
424: .seealso: `PCSPAI`, `PCSetType()`
425: @*/
426: PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
427: {
428: PetscFunctionBegin;
429: PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
430: PetscFunctionReturn(PETSC_SUCCESS);
431: }
433: static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
434: {
435: PC_SPAI *ispai = (PC_SPAI *)pc->data;
436: int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
437: double epsilon1;
438: PetscBool flg;
440: PetscFunctionBegin;
441: PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
442: PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
443: if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
444: PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
445: if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
446: /* added 1/7/99 g.h. */
447: PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
448: if (flg) PetscCall(PCSPAISetMax(pc, max1));
449: PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
450: if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
451: PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
452: if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
453: PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
454: if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
455: PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
456: if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
457: PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
458: if (flg) PetscCall(PCSPAISetSp(pc, sp));
459: PetscOptionsHeadEnd();
460: PetscFunctionReturn(PETSC_SUCCESS);
461: }
463: /*MC
464: PCSPAI - Use the Sparse Approximate Inverse method
466: Options Database Keys:
467: + -pc_spai_epsilon <eps> - set tolerance
468: . -pc_spai_nbstep <n> - set nbsteps
469: . -pc_spai_max <m> - set max
470: . -pc_spai_max_new <m> - set maxnew
471: . -pc_spai_block_size <n> - set block size
472: . -pc_spai_cache_size <n> - set cache size
473: . -pc_spai_sp <m> - set sp
474: - -pc_spai_set_verbose <true,false> - verbose output
476: Level: beginner
478: Note:
479: This only works with `MATAIJ` matrices.
481: References:
482: . * - Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)
484: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
485: `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
486: `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
487: M*/
489: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
490: {
491: PC_SPAI *ispai;
493: PetscFunctionBegin;
494: PetscCall(PetscNew(&ispai));
495: pc->data = ispai;
497: pc->ops->destroy = PCDestroy_SPAI;
498: pc->ops->apply = PCApply_SPAI;
499: pc->ops->matapply = PCMatApply_SPAI;
500: pc->ops->applyrichardson = 0;
501: pc->ops->setup = PCSetUp_SPAI;
502: pc->ops->view = PCView_SPAI;
503: pc->ops->setfromoptions = PCSetFromOptions_SPAI;
505: ispai->epsilon = .4;
506: ispai->nbsteps = 5;
507: ispai->max = 5000;
508: ispai->maxnew = 5;
509: ispai->block_size = 1;
510: ispai->cache_size = 5;
511: ispai->verbose = 0;
513: ispai->sp = 1;
514: PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));
516: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
517: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
518: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
519: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
520: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
521: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
522: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
523: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
524: PetscFunctionReturn(PETSC_SUCCESS);
525: }
527: /*
528: Converts from a PETSc matrix to an SPAI matrix
529: */
530: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
531: {
532: matrix *M;
533: int i, j, col;
534: int row_indx;
535: int len, pe, local_indx, start_indx;
536: int *mapping;
537: const int *cols;
538: const double *vals;
539: int n, mnl, nnl, nz, rstart, rend;
540: PetscMPIInt size, rank;
541: struct compressed_lines *rows;
543: PetscFunctionBegin;
544: PetscCallMPI(MPI_Comm_size(comm, &size));
545: PetscCallMPI(MPI_Comm_rank(comm, &rank));
546: PetscCall(MatGetSize(A, &n, &n));
547: PetscCall(MatGetLocalSize(A, &mnl, &nnl));
549: /*
550: not sure why a barrier is required. commenting out
551: PetscCallMPI(MPI_Barrier(comm));
552: */
554: M = new_matrix((SPAI_Comm)comm);
556: M->n = n;
557: M->bs = 1;
558: M->max_block_size = 1;
560: M->mnls = (int *)malloc(sizeof(int) * size);
561: M->start_indices = (int *)malloc(sizeof(int) * size);
562: M->pe = (int *)malloc(sizeof(int) * n);
563: M->block_sizes = (int *)malloc(sizeof(int) * n);
564: for (i = 0; i < n; i++) M->block_sizes[i] = 1;
566: PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
568: M->start_indices[0] = 0;
569: for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
571: M->mnl = M->mnls[M->myid];
572: M->my_start_index = M->start_indices[M->myid];
574: for (i = 0; i < size; i++) {
575: start_indx = M->start_indices[i];
576: for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
577: }
579: if (AT) {
580: M->lines = new_compressed_lines(M->mnls[rank], 1);
581: } else {
582: M->lines = new_compressed_lines(M->mnls[rank], 0);
583: }
585: rows = M->lines;
587: /* Determine the mapping from global indices to pointers */
588: PetscCall(PetscMalloc1(M->n, &mapping));
589: pe = 0;
590: local_indx = 0;
591: for (i = 0; i < M->n; i++) {
592: if (local_indx >= M->mnls[pe]) {
593: pe++;
594: local_indx = 0;
595: }
596: mapping[i] = local_indx + M->start_indices[pe];
597: local_indx++;
598: }
600: /************** Set up the row structure *****************/
602: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
603: for (i = rstart; i < rend; i++) {
604: row_indx = i - rstart;
605: PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
606: /* allocate buffers */
607: rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
608: rows->A[row_indx] = (double *)malloc(nz * sizeof(double));
609: /* copy the matrix */
610: for (j = 0; j < nz; j++) {
611: col = cols[j];
612: len = rows->len[row_indx]++;
614: rows->ptrs[row_indx][len] = mapping[col];
615: rows->A[row_indx][len] = vals[j];
616: }
617: rows->slen[row_indx] = rows->len[row_indx];
619: PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
620: }
622: /************** Set up the column structure *****************/
624: if (AT) {
625: for (i = rstart; i < rend; i++) {
626: row_indx = i - rstart;
627: PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
628: /* allocate buffers */
629: rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
630: /* copy the matrix (i.e., the structure) */
631: for (j = 0; j < nz; j++) {
632: col = cols[j];
633: len = rows->rlen[row_indx]++;
635: rows->rptrs[row_indx][len] = mapping[col];
636: }
637: PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
638: }
639: }
641: PetscCall(PetscFree(mapping));
643: order_pointers(M);
644: M->maxnz = calc_maxnz(M);
645: *B = M;
646: PetscFunctionReturn(PETSC_SUCCESS);
647: }
649: /*
650: Converts from an SPAI matrix B to a PETSc matrix PB.
651: This assumes that the SPAI matrix B is stored in
652: COMPRESSED-ROW format.
653: */
654: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
655: {
656: PetscMPIInt size, rank;
657: int m, n, M, N;
658: int d_nz, o_nz;
659: int *d_nnz, *o_nnz;
660: int i, k, global_row, global_col, first_diag_col, last_diag_col;
661: PetscScalar val;
663: PetscFunctionBegin;
664: PetscCallMPI(MPI_Comm_size(comm, &size));
665: PetscCallMPI(MPI_Comm_rank(comm, &rank));
667: m = n = B->mnls[rank];
668: d_nz = o_nz = 0;
670: /* Determine preallocation for MatCreateAIJ */
671: PetscCall(PetscMalloc1(m, &d_nnz));
672: PetscCall(PetscMalloc1(m, &o_nnz));
673: for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
674: first_diag_col = B->start_indices[rank];
675: last_diag_col = first_diag_col + B->mnls[rank];
676: for (i = 0; i < B->mnls[rank]; i++) {
677: for (k = 0; k < B->lines->len[i]; k++) {
678: global_col = B->lines->ptrs[i][k];
679: if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
680: else o_nnz[i]++;
681: }
682: }
684: M = N = B->n;
685: /* Here we only know how to create AIJ format */
686: PetscCall(MatCreate(comm, PB));
687: PetscCall(MatSetSizes(*PB, m, n, M, N));
688: PetscCall(MatSetType(*PB, MATAIJ));
689: PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
690: PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
692: for (i = 0; i < B->mnls[rank]; i++) {
693: global_row = B->start_indices[rank] + i;
694: for (k = 0; k < B->lines->len[i]; k++) {
695: global_col = B->lines->ptrs[i][k];
697: val = B->lines->A[i][k];
698: PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
699: }
700: }
702: PetscCall(PetscFree(d_nnz));
703: PetscCall(PetscFree(o_nnz));
705: PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
706: PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
707: PetscFunctionReturn(PETSC_SUCCESS);
708: }
710: /*
711: Converts from an SPAI vector v to a PETSc vec Pv.
712: */
713: PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv)
714: {
715: PetscMPIInt size, rank;
716: int m, M, i, *mnls, *start_indices, *global_indices;
718: PetscFunctionBegin;
719: PetscCallMPI(MPI_Comm_size(comm, &size));
720: PetscCallMPI(MPI_Comm_rank(comm, &rank));
722: m = v->mnl;
723: M = v->n;
725: PetscCall(VecCreateMPI(comm, m, M, Pv));
727: PetscCall(PetscMalloc1(size, &mnls));
728: PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm));
730: PetscCall(PetscMalloc1(size, &start_indices));
732: start_indices[0] = 0;
733: for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1];
735: PetscCall(PetscMalloc1(v->mnl, &global_indices));
736: for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i;
738: PetscCall(PetscFree(mnls));
739: PetscCall(PetscFree(start_indices));
741: PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES));
742: PetscCall(VecAssemblyBegin(*Pv));
743: PetscCall(VecAssemblyEnd(*Pv));
745: PetscCall(PetscFree(global_indices));
746: PetscFunctionReturn(PETSC_SUCCESS);
747: }