Actual source code: aijmkl.c
2: /*
3: Defines basic operations for the MATSEQAIJMKL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but uses
6: sparse BLAS operations from the Intel Math Kernel Library (MKL)
7: wherever possible.
8: */
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12: #include <mkl_spblas.h>
14: typedef struct {
15: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: PetscObjectState state;
19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21: struct matrix_descr descr;
22: #endif
23: } Mat_SeqAIJMKL;
25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
28: {
29: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30: /* so we will ignore 'MatType type'. */
31: Mat B = *newmat;
32: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
34: #endif
36: if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);
38: /* Reset the original function pointers. */
39: B->ops->duplicate = MatDuplicate_SeqAIJ;
40: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
41: B->ops->destroy = MatDestroy_SeqAIJ;
42: B->ops->mult = MatMult_SeqAIJ;
43: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
44: B->ops->multadd = MatMultAdd_SeqAIJ;
45: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
46: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
47: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
48: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
49: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
50: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
51: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL);
55: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
56: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
57: * simply involves destroying the MKL sparse matrix handle and then freeing
58: * the spptr pointer. */
59: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
61: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
62: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
63: PetscFree(B->spptr);
65: /* Change the type of B to MATSEQAIJ. */
66: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
68: *newmat = B;
69: return 0;
70: }
72: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
73: {
74: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
77: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
78: if (aijmkl) {
79: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
80: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
81: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
82: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
83: PetscFree(A->spptr);
84: }
86: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
87: * to destroy everything that remains. */
88: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
89: /* Note that I don't call MatSetType(). I believe this is because that
90: * is only to be called when *building* a matrix. I could be wrong, but
91: * that is how things work for the SuperLU matrix class. */
92: PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL);
93: MatDestroy_SeqAIJ(A);
94: return 0;
95: }
97: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
98: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
99: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
100: * handle, creates a new one, and then calls mkl_sparse_optimize().
101: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
102: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
103: * an unoptimized matrix handle here. */
104: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
105: {
106: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
107: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
108: * does nothing. We make it callable anyway in this case because it cuts
109: * down on littering the code with #ifdefs. */
110: return 0;
111: #else
112: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
113: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
114: PetscInt m, n;
115: MatScalar *aa;
116: PetscInt *aj, *ai;
118: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
119: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
120: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
121: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
122: * mkl_sparse_optimize() later. */
123: if (aijmkl->no_SpMV2) return 0;
124: #endif
126: if (aijmkl->sparse_optimized) {
127: /* Matrix has been previously assembled and optimized. Must destroy old
128: * matrix handle before running the optimization step again. */
129: PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
130: }
131: aijmkl->sparse_optimized = PETSC_FALSE;
133: /* Now perform the SpMV2 setup and matrix optimization. */
134: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
135: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
136: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
137: m = A->rmap->n;
138: n = A->cmap->n;
139: aj = a->j; /* aj[k] gives column index for element aa[k]. */
140: aa = a->a; /* Nonzero elements stored row-by-row. */
141: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
142: if (a->nz && aa && !A->structure_only) {
143: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
144: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
145: PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa);
146: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
147: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
148: if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
149: aijmkl->sparse_optimized = PETSC_TRUE;
150: PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
151: } else {
152: aijmkl->csrA = PETSC_NULL;
153: }
155: return 0;
156: #endif
157: }
159: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
160: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
161: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
162: {
163: sparse_index_base_t indexing;
164: PetscInt m, n;
165: PetscInt *aj, *ai, *dummy;
166: MatScalar *aa;
167: Mat_SeqAIJMKL *aijmkl;
169: if (csrA) {
170: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
171: PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa);
173: } else {
174: aj = ai = PETSC_NULL;
175: aa = PETSC_NULL;
176: }
178: MatSetType(A, MATSEQAIJ);
179: MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols);
180: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
181: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
182: * they will be destroyed when the MKL handle is destroyed.
183: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
184: if (csrA) {
185: MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL);
186: } else {
187: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
188: MatSetUp(A);
189: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
190: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
191: }
193: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
194: * Now turn it into a MATSEQAIJMKL. */
195: MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A);
197: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
198: aijmkl->csrA = csrA;
200: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
201: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
202: * and just need to be able to run the MKL optimization step. */
203: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
204: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
205: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
206: if (csrA) {
207: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
208: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
209: }
210: PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
211: return 0;
212: }
213: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
215: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
216: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
217: * MatMatMultNumeric(). */
218: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
219: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
220: {
221: PetscInt i;
222: PetscInt nrows, ncols;
223: PetscInt nz;
224: PetscInt *ai, *aj, *dummy;
225: PetscScalar *aa;
226: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
227: sparse_index_base_t indexing;
229: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
230: if (!aijmkl->csrA) return 0;
232: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
233: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
235: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
236: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
237: for (i = 0; i < nrows; i++) {
238: nz = ai[i + 1] - ai[i];
239: MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES);
240: }
242: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
243: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
245: PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
246: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
247: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
248: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
249: aijmkl->sparse_optimized = PETSC_FALSE;
250: return 0;
251: }
252: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
254: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
255: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
256: {
257: PetscInt i, j, k;
258: PetscInt nrows, ncols;
259: PetscInt nz;
260: PetscInt *ai, *aj, *dummy;
261: PetscScalar *aa;
262: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
263: sparse_index_base_t indexing;
265: PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");
267: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
268: if (!aijmkl->csrA) {
269: PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n");
270: return 0;
271: }
273: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
274: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
276: k = 0;
277: for (i = 0; i < nrows; i++) {
278: PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i);
279: nz = ai[i + 1] - ai[i];
280: for (j = 0; j < nz; j++) {
281: if (aa) {
282: PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]));
283: } else {
284: PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]);
285: }
286: k++;
287: }
288: PetscViewerASCIIPrintf(viewer, "\n");
289: }
290: return 0;
291: }
292: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
294: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
295: {
296: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
297: Mat_SeqAIJMKL *aijmkl_dest;
299: MatDuplicate_SeqAIJ(A, op, M);
300: aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
301: PetscArraycpy(aijmkl_dest, aijmkl, 1);
302: aijmkl_dest->sparse_optimized = PETSC_FALSE;
303: if (aijmkl->eager_inspection) MatSeqAIJMKL_create_mkl_handle(A);
304: return 0;
305: }
307: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
308: {
309: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
310: Mat_SeqAIJMKL *aijmkl;
312: if (mode == MAT_FLUSH_ASSEMBLY) return 0;
314: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
315: * extra information and some different methods, call the AssemblyEnd
316: * routine for a MATSEQAIJ.
317: * I'm not sure if this is the best way to do this, but it avoids
318: * a lot of code duplication. */
319: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
320: MatAssemblyEnd_SeqAIJ(A, mode);
322: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
323: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
324: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
325: if (aijmkl->eager_inspection) MatSeqAIJMKL_create_mkl_handle(A);
327: return 0;
328: }
330: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
331: PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
332: {
333: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
334: const PetscScalar *x;
335: PetscScalar *y;
336: const MatScalar *aa;
337: PetscInt m = A->rmap->n;
338: PetscInt n = A->cmap->n;
339: PetscScalar alpha = 1.0;
340: PetscScalar beta = 0.0;
341: const PetscInt *aj, *ai;
342: char matdescra[6];
344: /* Variables not in MatMult_SeqAIJ. */
345: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
347: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
348: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
349: VecGetArrayRead(xx, &x);
350: VecGetArray(yy, &y);
351: aj = a->j; /* aj[k] gives column index for element aa[k]. */
352: aa = a->a; /* Nonzero elements stored row-by-row. */
353: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
355: /* Call MKL sparse BLAS routine to do the MatMult. */
356: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
358: PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
359: VecRestoreArrayRead(xx, &x);
360: VecRestoreArray(yy, &y);
361: return 0;
362: }
363: #endif
365: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
366: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
367: {
368: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
369: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
370: const PetscScalar *x;
371: PetscScalar *y;
372: PetscObjectState state;
375: /* If there are no nonzero entries, zero yy and return immediately. */
376: if (!a->nz) {
377: VecGetArray(yy, &y);
378: PetscArrayzero(y, A->rmap->n);
379: VecRestoreArray(yy, &y);
380: return 0;
381: }
383: VecGetArrayRead(xx, &x);
384: VecGetArray(yy, &y);
386: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
387: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
388: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
389: PetscObjectStateGet((PetscObject)A, &state);
390: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
392: /* Call MKL SpMV2 executor routine to do the MatMult. */
393: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
395: PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
396: VecRestoreArrayRead(xx, &x);
397: VecRestoreArray(yy, &y);
398: return 0;
399: }
400: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
402: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
403: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
404: {
405: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
406: const PetscScalar *x;
407: PetscScalar *y;
408: const MatScalar *aa;
409: PetscInt m = A->rmap->n;
410: PetscInt n = A->cmap->n;
411: PetscScalar alpha = 1.0;
412: PetscScalar beta = 0.0;
413: const PetscInt *aj, *ai;
414: char matdescra[6];
416: /* Variables not in MatMultTranspose_SeqAIJ. */
417: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
419: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
420: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
421: VecGetArrayRead(xx, &x);
422: VecGetArray(yy, &y);
423: aj = a->j; /* aj[k] gives column index for element aa[k]. */
424: aa = a->a; /* Nonzero elements stored row-by-row. */
425: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
427: /* Call MKL sparse BLAS routine to do the MatMult. */
428: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
430: PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
431: VecRestoreArrayRead(xx, &x);
432: VecRestoreArray(yy, &y);
433: return 0;
434: }
435: #endif
437: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
438: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
439: {
440: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
441: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
442: const PetscScalar *x;
443: PetscScalar *y;
444: PetscObjectState state;
447: /* If there are no nonzero entries, zero yy and return immediately. */
448: if (!a->nz) {
449: VecGetArray(yy, &y);
450: PetscArrayzero(y, A->cmap->n);
451: VecRestoreArray(yy, &y);
452: return 0;
453: }
455: VecGetArrayRead(xx, &x);
456: VecGetArray(yy, &y);
458: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
459: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
460: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
461: PetscObjectStateGet((PetscObject)A, &state);
462: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
464: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
465: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
467: PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
468: VecRestoreArrayRead(xx, &x);
469: VecRestoreArray(yy, &y);
470: return 0;
471: }
472: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
474: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
475: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
476: {
477: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
478: const PetscScalar *x;
479: PetscScalar *y, *z;
480: const MatScalar *aa;
481: PetscInt m = A->rmap->n;
482: PetscInt n = A->cmap->n;
483: const PetscInt *aj, *ai;
484: PetscInt i;
486: /* Variables not in MatMultAdd_SeqAIJ. */
487: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
488: PetscScalar alpha = 1.0;
489: PetscScalar beta;
490: char matdescra[6];
492: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
493: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
495: VecGetArrayRead(xx, &x);
496: VecGetArrayPair(yy, zz, &y, &z);
497: aj = a->j; /* aj[k] gives column index for element aa[k]. */
498: aa = a->a; /* Nonzero elements stored row-by-row. */
499: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
501: /* Call MKL sparse BLAS routine to do the MatMult. */
502: if (zz == yy) {
503: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
504: beta = 1.0;
505: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
506: } else {
507: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
508: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
509: beta = 0.0;
510: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
511: for (i = 0; i < m; i++) z[i] += y[i];
512: }
514: PetscLogFlops(2.0 * a->nz);
515: VecRestoreArrayRead(xx, &x);
516: VecRestoreArrayPair(yy, zz, &y, &z);
517: return 0;
518: }
519: #endif
521: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
522: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
523: {
524: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
525: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
526: const PetscScalar *x;
527: PetscScalar *y, *z;
528: PetscInt m = A->rmap->n;
529: PetscInt i;
531: /* Variables not in MatMultAdd_SeqAIJ. */
532: PetscObjectState state;
535: /* If there are no nonzero entries, set zz = yy and return immediately. */
536: if (!a->nz) {
537: VecGetArrayPair(yy, zz, &y, &z);
538: PetscArraycpy(z, y, m);
539: VecRestoreArrayPair(yy, zz, &y, &z);
540: return 0;
541: }
543: VecGetArrayRead(xx, &x);
544: VecGetArrayPair(yy, zz, &y, &z);
546: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
547: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
548: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
549: PetscObjectStateGet((PetscObject)A, &state);
550: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
552: /* Call MKL sparse BLAS routine to do the MatMult. */
553: if (zz == yy) {
554: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
555: * with alpha and beta both set to 1.0. */
556: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
557: } else {
558: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
559: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
560: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
561: for (i = 0; i < m; i++) z[i] += y[i];
562: }
564: PetscLogFlops(2.0 * a->nz);
565: VecRestoreArrayRead(xx, &x);
566: VecRestoreArrayPair(yy, zz, &y, &z);
567: return 0;
568: }
569: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
571: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
572: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
573: {
574: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
575: const PetscScalar *x;
576: PetscScalar *y, *z;
577: const MatScalar *aa;
578: PetscInt m = A->rmap->n;
579: PetscInt n = A->cmap->n;
580: const PetscInt *aj, *ai;
581: PetscInt i;
583: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
584: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
585: PetscScalar alpha = 1.0;
586: PetscScalar beta;
587: char matdescra[6];
589: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
590: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
592: VecGetArrayRead(xx, &x);
593: VecGetArrayPair(yy, zz, &y, &z);
594: aj = a->j; /* aj[k] gives column index for element aa[k]. */
595: aa = a->a; /* Nonzero elements stored row-by-row. */
596: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
598: /* Call MKL sparse BLAS routine to do the MatMult. */
599: if (zz == yy) {
600: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
601: beta = 1.0;
602: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
603: } else {
604: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
605: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
606: beta = 0.0;
607: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
608: for (i = 0; i < n; i++) z[i] += y[i];
609: }
611: PetscLogFlops(2.0 * a->nz);
612: VecRestoreArrayRead(xx, &x);
613: VecRestoreArrayPair(yy, zz, &y, &z);
614: return 0;
615: }
616: #endif
618: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
619: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
620: {
621: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
622: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
623: const PetscScalar *x;
624: PetscScalar *y, *z;
625: PetscInt n = A->cmap->n;
626: PetscInt i;
627: PetscObjectState state;
629: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
632: /* If there are no nonzero entries, set zz = yy and return immediately. */
633: if (!a->nz) {
634: VecGetArrayPair(yy, zz, &y, &z);
635: PetscArraycpy(z, y, n);
636: VecRestoreArrayPair(yy, zz, &y, &z);
637: return 0;
638: }
640: VecGetArrayRead(xx, &x);
641: VecGetArrayPair(yy, zz, &y, &z);
643: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
644: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
645: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
646: PetscObjectStateGet((PetscObject)A, &state);
647: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
649: /* Call MKL sparse BLAS routine to do the MatMult. */
650: if (zz == yy) {
651: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
652: * with alpha and beta both set to 1.0. */
653: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
654: } else {
655: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
656: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
657: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
658: for (i = 0; i < n; i++) z[i] += y[i];
659: }
661: PetscLogFlops(2.0 * a->nz);
662: VecRestoreArrayRead(xx, &x);
663: VecRestoreArrayPair(yy, zz, &y, &z);
664: return 0;
665: }
666: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
668: /* -------------------------- MatProduct code -------------------------- */
669: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
670: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
671: {
672: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
673: sparse_matrix_t csrA, csrB, csrC;
674: PetscInt nrows, ncols;
675: struct matrix_descr descr_type_gen;
676: PetscObjectState state;
678: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
679: * not handle sparse matrices with zero rows or columns. */
680: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
681: else nrows = A->cmap->N;
682: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
683: else ncols = B->rmap->N;
685: PetscObjectStateGet((PetscObject)A, &state);
686: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
687: PetscObjectStateGet((PetscObject)B, &state);
688: if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
689: csrA = a->csrA;
690: csrB = b->csrA;
691: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
693: if (csrA && csrB) {
694: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
695: } else {
696: csrC = PETSC_NULL;
697: }
699: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C);
701: return 0;
702: }
704: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
705: {
706: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
707: sparse_matrix_t csrA, csrB, csrC;
708: struct matrix_descr descr_type_gen;
709: PetscObjectState state;
711: PetscObjectStateGet((PetscObject)A, &state);
712: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
713: PetscObjectStateGet((PetscObject)B, &state);
714: if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
715: csrA = a->csrA;
716: csrB = b->csrA;
717: csrC = c->csrA;
718: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
720: if (csrA && csrB) {
721: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
722: } else {
723: csrC = PETSC_NULL;
724: }
726: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
727: MatSeqAIJMKL_update_from_mkl_handle(C);
729: return 0;
730: }
732: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
733: {
734: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
735: return 0;
736: }
738: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
739: {
740: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
741: return 0;
742: }
744: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
745: {
746: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
747: return 0;
748: }
750: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
751: {
752: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
753: return 0;
754: }
756: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
757: {
758: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C);
759: return 0;
760: }
762: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
763: {
764: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C);
765: return 0;
766: }
768: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
769: {
770: Mat_Product *product = C->product;
771: Mat A = product->A, B = product->B;
773: MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C);
774: return 0;
775: }
777: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
778: {
779: Mat_Product *product = C->product;
780: Mat A = product->A, B = product->B;
781: PetscReal fill = product->fill;
783: MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C);
784: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
785: return 0;
786: }
788: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
789: {
790: Mat Ct;
791: Vec zeros;
792: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
793: sparse_matrix_t csrA, csrP, csrC;
794: PetscBool set, flag;
795: struct matrix_descr descr_type_sym;
796: PetscObjectState state;
798: MatIsSymmetricKnown(A, &set, &flag);
801: PetscObjectStateGet((PetscObject)A, &state);
802: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
803: PetscObjectStateGet((PetscObject)P, &state);
804: if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
805: csrA = a->csrA;
806: csrP = p->csrA;
807: csrC = c->csrA;
808: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
809: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
810: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
812: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
813: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
815: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
816: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
817: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
818: * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
819: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
820: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
821: * the full matrix. */
822: MatSeqAIJMKL_update_from_mkl_handle(C);
823: MatTranspose(C, MAT_INITIAL_MATRIX, &Ct);
824: MatCreateVecs(C, &zeros, NULL);
825: VecSetFromOptions(zeros);
826: VecZeroEntries(zeros);
827: MatDiagonalSet(Ct, zeros, INSERT_VALUES);
828: MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN);
829: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
830: MatProductCreateWithMat(A, P, PETSC_NULL, C);
831: MatProductSetType(C, MATPRODUCT_PtAP);
832: MatSeqAIJMKL_create_mkl_handle(C);
833: VecDestroy(&zeros);
834: MatDestroy(&Ct);
835: return 0;
836: }
838: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
839: {
840: Mat_Product *product = C->product;
841: Mat A = product->A, P = product->B;
842: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
843: sparse_matrix_t csrA, csrP, csrC;
844: struct matrix_descr descr_type_sym;
845: PetscObjectState state;
847: PetscObjectStateGet((PetscObject)A, &state);
848: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
849: PetscObjectStateGet((PetscObject)P, &state);
850: if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
851: csrA = a->csrA;
852: csrP = p->csrA;
853: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
854: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
855: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
857: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
858: if (csrP && csrA) {
859: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
860: } else {
861: csrC = PETSC_NULL;
862: }
864: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
865: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
866: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
867: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
868: * is guaranteed. */
869: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C);
871: C->ops->productnumeric = MatProductNumeric_PtAP;
872: return 0;
873: }
875: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
876: {
877: C->ops->productsymbolic = MatProductSymbolic_AB;
878: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
879: return 0;
880: }
882: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
883: {
884: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
885: return 0;
886: }
888: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
889: {
890: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
891: C->ops->productsymbolic = MatProductSymbolic_ABt;
892: return 0;
893: }
895: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
896: {
897: Mat_Product *product = C->product;
898: Mat A = product->A;
899: PetscBool set, flag;
901: if (PetscDefined(USE_COMPLEX)) {
902: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
903: * We do this in several other locations in this file. This works for the time being, but these
904: * routines are considered unsafe and may be removed from the MatProduct code in the future.
905: * TODO: Add proper MATSEQAIJMKL implementations */
906: C->ops->productsymbolic = NULL;
907: } else {
908: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
909: MatIsSymmetricKnown(A, &set, &flag);
910: if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
911: else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
912: /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
913: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
914: }
915: return 0;
916: }
918: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
919: {
920: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
921: return 0;
922: }
924: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
925: {
926: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
927: return 0;
928: }
930: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
931: {
932: Mat_Product *product = C->product;
934: switch (product->type) {
935: case MATPRODUCT_AB:
936: MatProductSetFromOptions_SeqAIJMKL_AB(C);
937: break;
938: case MATPRODUCT_AtB:
939: MatProductSetFromOptions_SeqAIJMKL_AtB(C);
940: break;
941: case MATPRODUCT_ABt:
942: MatProductSetFromOptions_SeqAIJMKL_ABt(C);
943: break;
944: case MATPRODUCT_PtAP:
945: MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
946: break;
947: case MATPRODUCT_RARt:
948: MatProductSetFromOptions_SeqAIJMKL_RARt(C);
949: break;
950: case MATPRODUCT_ABC:
951: MatProductSetFromOptions_SeqAIJMKL_ABC(C);
952: break;
953: default:
954: break;
955: }
956: return 0;
957: }
958: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
959: /* ------------------------ End MatProduct code ------------------------ */
961: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
962: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
963: * routine, but can also be used to convert an assembled SeqAIJ matrix
964: * into a SeqAIJMKL one. */
965: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
966: {
967: Mat B = *newmat;
968: Mat_SeqAIJMKL *aijmkl;
969: PetscBool set;
970: PetscBool sametype;
972: if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);
974: PetscObjectTypeCompare((PetscObject)A, type, &sametype);
975: if (sametype) return 0;
977: PetscNew(&aijmkl);
978: B->spptr = (void *)aijmkl;
980: /* Set function pointers for methods that we inherit from AIJ but override.
981: * We also parse some command line options below, since those determine some of the methods we point to. */
982: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
983: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
984: B->ops->destroy = MatDestroy_SeqAIJMKL;
986: aijmkl->sparse_optimized = PETSC_FALSE;
987: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
988: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
989: #else
990: aijmkl->no_SpMV2 = PETSC_TRUE;
991: #endif
992: aijmkl->eager_inspection = PETSC_FALSE;
994: /* Parse command line options. */
995: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
996: PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set);
997: PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set);
998: PetscOptionsEnd();
999: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1000: if (!aijmkl->no_SpMV2) {
1001: PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
1002: aijmkl->no_SpMV2 = PETSC_TRUE;
1003: }
1004: #endif
1006: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1007: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1008: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1009: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1010: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1011: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1012: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1013: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1014: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1015: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1016: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1017: #if !defined(PETSC_USE_COMPLEX)
1018: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1019: #else
1020: B->ops->ptapnumeric = NULL;
1021: #endif
1022: #endif
1023: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1025: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1026: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1027: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1028: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1029: * versions in which the older interface has not been deprecated, we use the old interface. */
1030: if (aijmkl->no_SpMV2) {
1031: B->ops->mult = MatMult_SeqAIJMKL;
1032: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1033: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1034: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1035: }
1036: #endif
1038: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ);
1040: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL);
1041: *newmat = B;
1042: return 0;
1043: }
1045: /*@C
1046: MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1047: This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1048: routines from Intel MKL whenever possible.
1049: If the installed version of MKL supports the "SpMV2" sparse
1050: inspector-executor routines, then those are used by default.
1051: `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1052: (for symmetric A) operations are currently supported.
1053: Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1055: Collective
1057: Input Parameters:
1058: + comm - MPI communicator, set to `PETSC_COMM_SELF`
1059: . m - number of rows
1060: . n - number of columns
1061: . nz - number of nonzeros per row (same for all rows)
1062: - nnz - array containing the number of nonzeros in the various rows
1063: (possibly different for each row) or NULL
1065: Output Parameter:
1066: . A - the matrix
1068: Options Database Keys:
1069: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1070: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1072: Note:
1073: If nnz is given then nz is ignored
1075: Level: intermediate
1077: .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1078: @*/
1079: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1080: {
1081: MatCreate(comm, A);
1082: MatSetSizes(*A, m, n, m, n);
1083: MatSetType(*A, MATSEQAIJMKL);
1084: MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz);
1085: return 0;
1086: }
1088: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1089: {
1090: MatSetType(A, MATSEQAIJ);
1091: MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A);
1092: return 0;
1093: }