Actual source code: aijmkl.c


  2: /*
  3:   Defines basic operations for the MATSEQAIJMKL matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the
  5:   compressed row storage (aka Yale sparse matrix format) but uses
  6:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  7:   wherever possible.
  8: */

 10: #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 12: #include <mkl_spblas.h>

 14: typedef struct {
 15:   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 16:   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 17:   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   PetscObjectState state;
 19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 20:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 21:   struct matrix_descr descr;
 22: #endif
 23: } Mat_SeqAIJMKL;

 25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);

 27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
 28: {
 29:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 30:   /* so we will ignore 'MatType type'. */
 31:   Mat B = *newmat;
 32: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 33:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
 34: #endif

 36:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);

 38:   /* Reset the original function pointers. */
 39:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 40:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 41:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 42:   B->ops->mult                    = MatMult_SeqAIJ;
 43:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 44:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 45:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 46:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 47:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 48:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 49:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 50:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 51:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 53:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL);

 55: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 56:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 57:    * simply involves destroying the MKL sparse matrix handle and then freeing
 58:    * the spptr pointer. */
 59:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;

 61:   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 62: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 63:   PetscFree(B->spptr);

 65:   /* Change the type of B to MATSEQAIJ. */
 66:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 68:   *newmat = B;
 69:   return 0;
 70: }

 72: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 73: {
 74:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;


 77:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 78:   if (aijmkl) {
 79:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 80: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 81:     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 82: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 83:     PetscFree(A->spptr);
 84:   }

 86:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
 87:    * to destroy everything that remains. */
 88:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
 89:   /* Note that I don't call MatSetType().  I believe this is because that
 90:    * is only to be called when *building* a matrix.  I could be wrong, but
 91:    * that is how things work for the SuperLU matrix class. */
 92:   PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL);
 93:   MatDestroy_SeqAIJ(A);
 94:   return 0;
 95: }

 97: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
 98:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
 99:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
100:  * handle, creates a new one, and then calls mkl_sparse_optimize().
101:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
102:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
103:  * an unoptimized matrix handle here. */
104: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
105: {
106: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
107:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
108:    * does nothing. We make it callable anyway in this case because it cuts
109:    * down on littering the code with #ifdefs. */
110:   return 0;
111: #else
112:   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
113:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
114:   PetscInt       m, n;
115:   MatScalar     *aa;
116:   PetscInt      *aj, *ai;

118:   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
119:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
120:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
121:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
122:    * mkl_sparse_optimize() later. */
123:   if (aijmkl->no_SpMV2) return 0;
124:   #endif

126:   if (aijmkl->sparse_optimized) {
127:     /* Matrix has been previously assembled and optimized. Must destroy old
128:      * matrix handle before running the optimization step again. */
129:     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
130:   }
131:   aijmkl->sparse_optimized = PETSC_FALSE;

133:   /* Now perform the SpMV2 setup and matrix optimization. */
134:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
135:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
136:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
137:   m                  = A->rmap->n;
138:   n                  = A->cmap->n;
139:   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
140:   aa                 = a->a; /* Nonzero elements stored row-by-row. */
141:   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
142:   if (a->nz && aa && !A->structure_only) {
143:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
144:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
145:     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa);
146:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
147:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
148:     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
149:     aijmkl->sparse_optimized = PETSC_TRUE;
150:     PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
151:   } else {
152:     aijmkl->csrA = PETSC_NULL;
153:   }

155:   return 0;
156: #endif
157: }

159: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
160: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
161: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
162: {
163:   sparse_index_base_t indexing;
164:   PetscInt            m, n;
165:   PetscInt           *aj, *ai, *dummy;
166:   MatScalar          *aa;
167:   Mat_SeqAIJMKL      *aijmkl;

169:   if (csrA) {
170:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
171:     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa);
173:   } else {
174:     aj = ai = PETSC_NULL;
175:     aa      = PETSC_NULL;
176:   }

178:   MatSetType(A, MATSEQAIJ);
179:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols);
180:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
181:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
182:    * they will be destroyed when the MKL handle is destroyed.
183:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
184:   if (csrA) {
185:     MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL);
186:   } else {
187:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
188:     MatSetUp(A);
189:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
190:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
191:   }

193:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
194:    * Now turn it into a MATSEQAIJMKL. */
195:   MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A);

197:   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
198:   aijmkl->csrA = csrA;

200:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
201:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
202:    * and just need to be able to run the MKL optimization step. */
203:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
204:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
205:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
206:   if (csrA) {
207:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
208:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
209:   }
210:   PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
211:   return 0;
212: }
213: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

215: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
216:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
217:  * MatMatMultNumeric(). */
218: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
219: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
220: {
221:   PetscInt            i;
222:   PetscInt            nrows, ncols;
223:   PetscInt            nz;
224:   PetscInt           *ai, *aj, *dummy;
225:   PetscScalar        *aa;
226:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
227:   sparse_index_base_t indexing;

229:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
230:   if (!aijmkl->csrA) return 0;

232:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
233:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);

235:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
236:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
237:   for (i = 0; i < nrows; i++) {
238:     nz = ai[i + 1] - ai[i];
239:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES);
240:   }

242:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
243:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);

245:   PetscObjectStateGet((PetscObject)A, &(aijmkl->state));
246:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
247:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
248:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
249:   aijmkl->sparse_optimized = PETSC_FALSE;
250:   return 0;
251: }
252: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

254: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
255: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
256: {
257:   PetscInt            i, j, k;
258:   PetscInt            nrows, ncols;
259:   PetscInt            nz;
260:   PetscInt           *ai, *aj, *dummy;
261:   PetscScalar        *aa;
262:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
263:   sparse_index_base_t indexing;

265:   PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");

267:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
268:   if (!aijmkl->csrA) {
269:     PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n");
270:     return 0;
271:   }

273:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
274:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);

276:   k = 0;
277:   for (i = 0; i < nrows; i++) {
278:     PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i);
279:     nz = ai[i + 1] - ai[i];
280:     for (j = 0; j < nz; j++) {
281:       if (aa) {
282:         PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k]));
283:       } else {
284:         PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]);
285:       }
286:       k++;
287:     }
288:     PetscViewerASCIIPrintf(viewer, "\n");
289:   }
290:   return 0;
291: }
292: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

294: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
295: {
296:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
297:   Mat_SeqAIJMKL *aijmkl_dest;

299:   MatDuplicate_SeqAIJ(A, op, M);
300:   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
301:   PetscArraycpy(aijmkl_dest, aijmkl, 1);
302:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
303:   if (aijmkl->eager_inspection) MatSeqAIJMKL_create_mkl_handle(A);
304:   return 0;
305: }

307: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
308: {
309:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
310:   Mat_SeqAIJMKL *aijmkl;

312:   if (mode == MAT_FLUSH_ASSEMBLY) return 0;

314:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
315:    * extra information and some different methods, call the AssemblyEnd
316:    * routine for a MATSEQAIJ.
317:    * I'm not sure if this is the best way to do this, but it avoids
318:    * a lot of code duplication. */
319:   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
320:   MatAssemblyEnd_SeqAIJ(A, mode);

322:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
323:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
324:   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
325:   if (aijmkl->eager_inspection) MatSeqAIJMKL_create_mkl_handle(A);

327:   return 0;
328: }

330: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
331: PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
332: {
333:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
334:   const PetscScalar *x;
335:   PetscScalar       *y;
336:   const MatScalar   *aa;
337:   PetscInt           m     = A->rmap->n;
338:   PetscInt           n     = A->cmap->n;
339:   PetscScalar        alpha = 1.0;
340:   PetscScalar        beta  = 0.0;
341:   const PetscInt    *aj, *ai;
342:   char               matdescra[6];

344:   /* Variables not in MatMult_SeqAIJ. */
345:   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */

347:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
348:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
349:   VecGetArrayRead(xx, &x);
350:   VecGetArray(yy, &y);
351:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
352:   aa = a->a; /* Nonzero elements stored row-by-row. */
353:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

355:   /* Call MKL sparse BLAS routine to do the MatMult. */
356:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

358:   PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
359:   VecRestoreArrayRead(xx, &x);
360:   VecRestoreArray(yy, &y);
361:   return 0;
362: }
363: #endif

365: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
366: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
367: {
368:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
369:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
370:   const PetscScalar *x;
371:   PetscScalar       *y;
372:   PetscObjectState   state;


375:   /* If there are no nonzero entries, zero yy and return immediately. */
376:   if (!a->nz) {
377:     VecGetArray(yy, &y);
378:     PetscArrayzero(y, A->rmap->n);
379:     VecRestoreArray(yy, &y);
380:     return 0;
381:   }

383:   VecGetArrayRead(xx, &x);
384:   VecGetArray(yy, &y);

386:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
387:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
388:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
389:   PetscObjectStateGet((PetscObject)A, &state);
390:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

392:   /* Call MKL SpMV2 executor routine to do the MatMult. */
393:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

395:   PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
396:   VecRestoreArrayRead(xx, &x);
397:   VecRestoreArray(yy, &y);
398:   return 0;
399: }
400: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

402: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
403: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
404: {
405:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
406:   const PetscScalar *x;
407:   PetscScalar       *y;
408:   const MatScalar   *aa;
409:   PetscInt           m     = A->rmap->n;
410:   PetscInt           n     = A->cmap->n;
411:   PetscScalar        alpha = 1.0;
412:   PetscScalar        beta  = 0.0;
413:   const PetscInt    *aj, *ai;
414:   char               matdescra[6];

416:   /* Variables not in MatMultTranspose_SeqAIJ. */
417:   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */

419:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
420:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
421:   VecGetArrayRead(xx, &x);
422:   VecGetArray(yy, &y);
423:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
424:   aa = a->a; /* Nonzero elements stored row-by-row. */
425:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

427:   /* Call MKL sparse BLAS routine to do the MatMult. */
428:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

430:   PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
431:   VecRestoreArrayRead(xx, &x);
432:   VecRestoreArray(yy, &y);
433:   return 0;
434: }
435: #endif

437: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
438: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
439: {
440:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
441:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
442:   const PetscScalar *x;
443:   PetscScalar       *y;
444:   PetscObjectState   state;


447:   /* If there are no nonzero entries, zero yy and return immediately. */
448:   if (!a->nz) {
449:     VecGetArray(yy, &y);
450:     PetscArrayzero(y, A->cmap->n);
451:     VecRestoreArray(yy, &y);
452:     return 0;
453:   }

455:   VecGetArrayRead(xx, &x);
456:   VecGetArray(yy, &y);

458:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
459:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
460:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
461:   PetscObjectStateGet((PetscObject)A, &state);
462:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

464:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
465:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

467:   PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt);
468:   VecRestoreArrayRead(xx, &x);
469:   VecRestoreArray(yy, &y);
470:   return 0;
471: }
472: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

474: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
475: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
476: {
477:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
478:   const PetscScalar *x;
479:   PetscScalar       *y, *z;
480:   const MatScalar   *aa;
481:   PetscInt           m = A->rmap->n;
482:   PetscInt           n = A->cmap->n;
483:   const PetscInt    *aj, *ai;
484:   PetscInt           i;

486:   /* Variables not in MatMultAdd_SeqAIJ. */
487:   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
488:   PetscScalar alpha  = 1.0;
489:   PetscScalar beta;
490:   char        matdescra[6];

492:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
493:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

495:   VecGetArrayRead(xx, &x);
496:   VecGetArrayPair(yy, zz, &y, &z);
497:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
498:   aa = a->a; /* Nonzero elements stored row-by-row. */
499:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

501:   /* Call MKL sparse BLAS routine to do the MatMult. */
502:   if (zz == yy) {
503:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
504:     beta = 1.0;
505:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
506:   } else {
507:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
508:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
509:     beta = 0.0;
510:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
511:     for (i = 0; i < m; i++) z[i] += y[i];
512:   }

514:   PetscLogFlops(2.0 * a->nz);
515:   VecRestoreArrayRead(xx, &x);
516:   VecRestoreArrayPair(yy, zz, &y, &z);
517:   return 0;
518: }
519: #endif

521: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
522: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
523: {
524:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
525:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
526:   const PetscScalar *x;
527:   PetscScalar       *y, *z;
528:   PetscInt           m = A->rmap->n;
529:   PetscInt           i;

531:   /* Variables not in MatMultAdd_SeqAIJ. */
532:   PetscObjectState state;


535:   /* If there are no nonzero entries, set zz = yy and return immediately. */
536:   if (!a->nz) {
537:     VecGetArrayPair(yy, zz, &y, &z);
538:     PetscArraycpy(z, y, m);
539:     VecRestoreArrayPair(yy, zz, &y, &z);
540:     return 0;
541:   }

543:   VecGetArrayRead(xx, &x);
544:   VecGetArrayPair(yy, zz, &y, &z);

546:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
547:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
548:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
549:   PetscObjectStateGet((PetscObject)A, &state);
550:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

552:   /* Call MKL sparse BLAS routine to do the MatMult. */
553:   if (zz == yy) {
554:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
555:      * with alpha and beta both set to 1.0. */
556:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
557:   } else {
558:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
559:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
560:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
561:     for (i = 0; i < m; i++) z[i] += y[i];
562:   }

564:   PetscLogFlops(2.0 * a->nz);
565:   VecRestoreArrayRead(xx, &x);
566:   VecRestoreArrayPair(yy, zz, &y, &z);
567:   return 0;
568: }
569: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

571: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
572: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
573: {
574:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
575:   const PetscScalar *x;
576:   PetscScalar       *y, *z;
577:   const MatScalar   *aa;
578:   PetscInt           m = A->rmap->n;
579:   PetscInt           n = A->cmap->n;
580:   const PetscInt    *aj, *ai;
581:   PetscInt           i;

583:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
584:   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
585:   PetscScalar alpha  = 1.0;
586:   PetscScalar beta;
587:   char        matdescra[6];

589:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
590:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

592:   VecGetArrayRead(xx, &x);
593:   VecGetArrayPair(yy, zz, &y, &z);
594:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
595:   aa = a->a; /* Nonzero elements stored row-by-row. */
596:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

598:   /* Call MKL sparse BLAS routine to do the MatMult. */
599:   if (zz == yy) {
600:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
601:     beta = 1.0;
602:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
603:   } else {
604:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
605:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
606:     beta = 0.0;
607:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
608:     for (i = 0; i < n; i++) z[i] += y[i];
609:   }

611:   PetscLogFlops(2.0 * a->nz);
612:   VecRestoreArrayRead(xx, &x);
613:   VecRestoreArrayPair(yy, zz, &y, &z);
614:   return 0;
615: }
616: #endif

618: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
619: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
620: {
621:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
622:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
623:   const PetscScalar *x;
624:   PetscScalar       *y, *z;
625:   PetscInt           n = A->cmap->n;
626:   PetscInt           i;
627:   PetscObjectState   state;

629:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */


632:   /* If there are no nonzero entries, set zz = yy and return immediately. */
633:   if (!a->nz) {
634:     VecGetArrayPair(yy, zz, &y, &z);
635:     PetscArraycpy(z, y, n);
636:     VecRestoreArrayPair(yy, zz, &y, &z);
637:     return 0;
638:   }

640:   VecGetArrayRead(xx, &x);
641:   VecGetArrayPair(yy, zz, &y, &z);

643:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
644:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
645:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
646:   PetscObjectStateGet((PetscObject)A, &state);
647:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

649:   /* Call MKL sparse BLAS routine to do the MatMult. */
650:   if (zz == yy) {
651:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
652:      * with alpha and beta both set to 1.0. */
653:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
654:   } else {
655:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
656:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
657:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
658:     for (i = 0; i < n; i++) z[i] += y[i];
659:   }

661:   PetscLogFlops(2.0 * a->nz);
662:   VecRestoreArrayRead(xx, &x);
663:   VecRestoreArrayPair(yy, zz, &y, &z);
664:   return 0;
665: }
666: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

668: /* -------------------------- MatProduct code -------------------------- */
669: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
670: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
671: {
672:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
673:   sparse_matrix_t     csrA, csrB, csrC;
674:   PetscInt            nrows, ncols;
675:   struct matrix_descr descr_type_gen;
676:   PetscObjectState    state;

678:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
679:    * not handle sparse matrices with zero rows or columns. */
680:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
681:   else nrows = A->cmap->N;
682:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
683:   else ncols = B->rmap->N;

685:   PetscObjectStateGet((PetscObject)A, &state);
686:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
687:   PetscObjectStateGet((PetscObject)B, &state);
688:   if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
689:   csrA                = a->csrA;
690:   csrB                = b->csrA;
691:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

693:   if (csrA && csrB) {
694:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
695:   } else {
696:     csrC = PETSC_NULL;
697:   }

699:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C);

701:   return 0;
702: }

704: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
705: {
706:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
707:   sparse_matrix_t     csrA, csrB, csrC;
708:   struct matrix_descr descr_type_gen;
709:   PetscObjectState    state;

711:   PetscObjectStateGet((PetscObject)A, &state);
712:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
713:   PetscObjectStateGet((PetscObject)B, &state);
714:   if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
715:   csrA                = a->csrA;
716:   csrB                = b->csrA;
717:   csrC                = c->csrA;
718:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

720:   if (csrA && csrB) {
721:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
722:   } else {
723:     csrC = PETSC_NULL;
724:   }

726:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
727:   MatSeqAIJMKL_update_from_mkl_handle(C);

729:   return 0;
730: }

732: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
733: {
734:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
735:   return 0;
736: }

738: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
739: {
740:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
741:   return 0;
742: }

744: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
745: {
746:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
747:   return 0;
748: }

750: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
751: {
752:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C);
753:   return 0;
754: }

756: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
757: {
758:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C);
759:   return 0;
760: }

762: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
763: {
764:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C);
765:   return 0;
766: }

768: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
769: {
770:   Mat_Product *product = C->product;
771:   Mat          A = product->A, B = product->B;

773:   MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C);
774:   return 0;
775: }

777: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
778: {
779:   Mat_Product *product = C->product;
780:   Mat          A = product->A, B = product->B;
781:   PetscReal    fill = product->fill;

783:   MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C);
784:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
785:   return 0;
786: }

788: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
789: {
790:   Mat                 Ct;
791:   Vec                 zeros;
792:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
793:   sparse_matrix_t     csrA, csrP, csrC;
794:   PetscBool           set, flag;
795:   struct matrix_descr descr_type_sym;
796:   PetscObjectState    state;

798:   MatIsSymmetricKnown(A, &set, &flag);

801:   PetscObjectStateGet((PetscObject)A, &state);
802:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
803:   PetscObjectStateGet((PetscObject)P, &state);
804:   if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
805:   csrA                = a->csrA;
806:   csrP                = p->csrA;
807:   csrC                = c->csrA;
808:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
809:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
810:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

812:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
813:   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);

815:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
816:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
817:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
818:    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
819:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
820:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
821:    * the full matrix. */
822:   MatSeqAIJMKL_update_from_mkl_handle(C);
823:   MatTranspose(C, MAT_INITIAL_MATRIX, &Ct);
824:   MatCreateVecs(C, &zeros, NULL);
825:   VecSetFromOptions(zeros);
826:   VecZeroEntries(zeros);
827:   MatDiagonalSet(Ct, zeros, INSERT_VALUES);
828:   MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN);
829:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
830:   MatProductCreateWithMat(A, P, PETSC_NULL, C);
831:   MatProductSetType(C, MATPRODUCT_PtAP);
832:   MatSeqAIJMKL_create_mkl_handle(C);
833:   VecDestroy(&zeros);
834:   MatDestroy(&Ct);
835:   return 0;
836: }

838: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
839: {
840:   Mat_Product        *product = C->product;
841:   Mat                 A = product->A, P = product->B;
842:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
843:   sparse_matrix_t     csrA, csrP, csrC;
844:   struct matrix_descr descr_type_sym;
845:   PetscObjectState    state;

847:   PetscObjectStateGet((PetscObject)A, &state);
848:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
849:   PetscObjectStateGet((PetscObject)P, &state);
850:   if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
851:   csrA                = a->csrA;
852:   csrP                = p->csrA;
853:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
854:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
855:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

857:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
858:   if (csrP && csrA) {
859:     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
860:   } else {
861:     csrC = PETSC_NULL;
862:   }

864:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
865:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
866:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
867:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
868:    * is guaranteed. */
869:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C);

871:   C->ops->productnumeric = MatProductNumeric_PtAP;
872:   return 0;
873: }

875: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
876: {
877:   C->ops->productsymbolic = MatProductSymbolic_AB;
878:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
879:   return 0;
880: }

882: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
883: {
884:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
885:   return 0;
886: }

888: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
889: {
890:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
891:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
892:   return 0;
893: }

895: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
896: {
897:   Mat_Product *product = C->product;
898:   Mat          A       = product->A;
899:   PetscBool    set, flag;

901:   if (PetscDefined(USE_COMPLEX)) {
902:     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
903:      * We do this in several other locations in this file. This works for the time being, but these
904:      * routines are considered unsafe and may be removed from the MatProduct code in the future.
905:      * TODO: Add proper MATSEQAIJMKL implementations */
906:     C->ops->productsymbolic = NULL;
907:   } else {
908:     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
909:     MatIsSymmetricKnown(A, &set, &flag);
910:     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
911:     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
912:     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
913:      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
914:   }
915:   return 0;
916: }

918: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
919: {
920:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
921:   return 0;
922: }

924: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
925: {
926:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
927:   return 0;
928: }

930: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
931: {
932:   Mat_Product *product = C->product;

934:   switch (product->type) {
935:   case MATPRODUCT_AB:
936:     MatProductSetFromOptions_SeqAIJMKL_AB(C);
937:     break;
938:   case MATPRODUCT_AtB:
939:     MatProductSetFromOptions_SeqAIJMKL_AtB(C);
940:     break;
941:   case MATPRODUCT_ABt:
942:     MatProductSetFromOptions_SeqAIJMKL_ABt(C);
943:     break;
944:   case MATPRODUCT_PtAP:
945:     MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
946:     break;
947:   case MATPRODUCT_RARt:
948:     MatProductSetFromOptions_SeqAIJMKL_RARt(C);
949:     break;
950:   case MATPRODUCT_ABC:
951:     MatProductSetFromOptions_SeqAIJMKL_ABC(C);
952:     break;
953:   default:
954:     break;
955:   }
956:   return 0;
957: }
958: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
959: /* ------------------------ End MatProduct code ------------------------ */

961: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
962:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
963:  * routine, but can also be used to convert an assembled SeqAIJ matrix
964:  * into a SeqAIJMKL one. */
965: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
966: {
967:   Mat            B = *newmat;
968:   Mat_SeqAIJMKL *aijmkl;
969:   PetscBool      set;
970:   PetscBool      sametype;

972:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);

974:   PetscObjectTypeCompare((PetscObject)A, type, &sametype);
975:   if (sametype) return 0;

977:   PetscNew(&aijmkl);
978:   B->spptr = (void *)aijmkl;

980:   /* Set function pointers for methods that we inherit from AIJ but override.
981:    * We also parse some command line options below, since those determine some of the methods we point to. */
982:   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
983:   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
984:   B->ops->destroy     = MatDestroy_SeqAIJMKL;

986:   aijmkl->sparse_optimized = PETSC_FALSE;
987: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
988:   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
989: #else
990:   aijmkl->no_SpMV2 = PETSC_TRUE;
991: #endif
992:   aijmkl->eager_inspection = PETSC_FALSE;

994:   /* Parse command line options. */
995:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
996:   PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set);
997:   PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set);
998:   PetscOptionsEnd();
999: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1000:   if (!aijmkl->no_SpMV2) {
1001:     PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1002:     aijmkl->no_SpMV2 = PETSC_TRUE;
1003:   }
1004: #endif

1006: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1007:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1008:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1009:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1010:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1011:   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1012:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1013:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1014:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1015:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1016:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1017:     #if !defined(PETSC_USE_COMPLEX)
1018:   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1019:     #else
1020:   B->ops->ptapnumeric = NULL;
1021:     #endif
1022:   #endif
1023: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1025: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1026:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1027:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1028:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1029:    * versions in which the older interface has not been deprecated, we use the old interface. */
1030:   if (aijmkl->no_SpMV2) {
1031:     B->ops->mult             = MatMult_SeqAIJMKL;
1032:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1033:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1034:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1035:   }
1036: #endif

1038:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ);

1040:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL);
1041:   *newmat = B;
1042:   return 0;
1043: }

1045: /*@C
1046:    MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1047:    This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1048:    routines from Intel MKL whenever possible.
1049:    If the installed version of MKL supports the "SpMV2" sparse
1050:    inspector-executor routines, then those are used by default.
1051:    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1052:    (for symmetric A) operations are currently supported.
1053:    Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.

1055:    Collective

1057:    Input Parameters:
1058: +  comm - MPI communicator, set to `PETSC_COMM_SELF`
1059: .  m - number of rows
1060: .  n - number of columns
1061: .  nz - number of nonzeros per row (same for all rows)
1062: -  nnz - array containing the number of nonzeros in the various rows
1063:          (possibly different for each row) or NULL

1065:    Output Parameter:
1066: .  A - the matrix

1068:    Options Database Keys:
1069: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1070: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1072:    Note:
1073:    If nnz is given then nz is ignored

1075:    Level: intermediate

1077: .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1078: @*/
1079: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1080: {
1081:   MatCreate(comm, A);
1082:   MatSetSizes(*A, m, n, m, n);
1083:   MatSetType(*A, MATSEQAIJMKL);
1084:   MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz);
1085:   return 0;
1086: }

1088: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1089: {
1090:   MatSetType(A, MATSEQAIJ);
1091:   MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A);
1092:   return 0;
1093: }