Actual source code: baijmkl.c

  1: /*
  2:   Defines basic operations for the MATSEQBAIJMKL matrix class.
  3:   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
  4:   wherever possible. If used MKL verion is older than 11.3 PETSc default
  5:   code for sparse matrix operations is used.
  6: */

  8: #include <../src/mat/impls/baij/seq/baij.h>
  9: #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
 10: #include <mkl_spblas.h>

 12: static PetscBool PetscSeqBAIJSupportsZeroBased(void)
 13: {
 14:   static PetscBool set = PETSC_FALSE, value;
 15:   int              n   = 1, ia[1], ja[1];
 16:   float            a[1];
 17:   sparse_status_t  status;
 18:   sparse_matrix_t  A;

 20:   if (!set) {
 21:     status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, n, n, n, ia, ia, ja, a);
 22:     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
 23:     (void)mkl_sparse_destroy(A);
 24:     set = PETSC_TRUE;
 25:   }
 26:   return value;
 27: }

 29: typedef struct {
 30:   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 31:   sparse_matrix_t     bsrA;             /* "Handle" used by SpMV2 inspector-executor routines. */
 32:   struct matrix_descr descr;
 33:   PetscInt           *ai1;
 34:   PetscInt           *aj1;
 35: } Mat_SeqBAIJMKL;

 37: static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
 38: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType);

 40: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
 41: {
 42:   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
 43:   /* so we will ignore 'MatType type'. */
 44:   Mat             B       = *newmat;
 45:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;

 47:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);

 49:   /* Reset the original function pointers. */
 50:   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
 51:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
 52:   B->ops->destroy          = MatDestroy_SeqBAIJ;
 53:   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
 54:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
 55:   B->ops->scale            = MatScale_SeqBAIJ;
 56:   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
 57:   B->ops->axpy             = MatAXPY_SeqBAIJ;

 59:   switch (A->rmap->bs) {
 60:   case 1:
 61:     B->ops->mult    = MatMult_SeqBAIJ_1;
 62:     B->ops->multadd = MatMultAdd_SeqBAIJ_1;
 63:     break;
 64:   case 2:
 65:     B->ops->mult    = MatMult_SeqBAIJ_2;
 66:     B->ops->multadd = MatMultAdd_SeqBAIJ_2;
 67:     break;
 68:   case 3:
 69:     B->ops->mult    = MatMult_SeqBAIJ_3;
 70:     B->ops->multadd = MatMultAdd_SeqBAIJ_3;
 71:     break;
 72:   case 4:
 73:     B->ops->mult    = MatMult_SeqBAIJ_4;
 74:     B->ops->multadd = MatMultAdd_SeqBAIJ_4;
 75:     break;
 76:   case 5:
 77:     B->ops->mult    = MatMult_SeqBAIJ_5;
 78:     B->ops->multadd = MatMultAdd_SeqBAIJ_5;
 79:     break;
 80:   case 6:
 81:     B->ops->mult    = MatMult_SeqBAIJ_6;
 82:     B->ops->multadd = MatMultAdd_SeqBAIJ_6;
 83:     break;
 84:   case 7:
 85:     B->ops->mult    = MatMult_SeqBAIJ_7;
 86:     B->ops->multadd = MatMultAdd_SeqBAIJ_7;
 87:     break;
 88:   case 11:
 89:     B->ops->mult    = MatMult_SeqBAIJ_11;
 90:     B->ops->multadd = MatMultAdd_SeqBAIJ_11;
 91:     break;
 92:   case 15:
 93:     B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
 94:     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 95:     break;
 96:   default:
 97:     B->ops->mult    = MatMult_SeqBAIJ_N;
 98:     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 99:     break;
100:   }
101:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL);

103:   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
104:    * simply involves destroying the MKL sparse matrix handle and then freeing
105:    * the spptr pointer. */
106:   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;

108:   if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
109:   PetscFree2(baijmkl->ai1, baijmkl->aj1);
110:   PetscFree(B->spptr);

112:   /* Change the type of B to MATSEQBAIJ. */
113:   PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);

115:   *newmat = B;
116:   return 0;
117: }

119: static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
120: {
121:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;

123:   if (baijmkl) {
124:     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
125:     if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
126:     PetscFree2(baijmkl->ai1, baijmkl->aj1);
127:     PetscFree(A->spptr);
128:   }

130:   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
131:    * to destroy everything that remains. */
132:   PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
133:   MatDestroy_SeqBAIJ(A);
134:   return 0;
135: }

137: static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
138: {
139:   Mat_SeqBAIJ    *a       = (Mat_SeqBAIJ *)A->data;
140:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
141:   PetscInt        mbs, nbs, nz, bs;
142:   MatScalar      *aa;
143:   PetscInt       *aj, *ai;
144:   PetscInt        i;

146:   if (baijmkl->sparse_optimized) {
147:     /* Matrix has been previously assembled and optimized. Must destroy old
148:      * matrix handle before running the optimization step again. */
149:     PetscFree2(baijmkl->ai1, baijmkl->aj1);
150:     mkl_sparse_destroy(baijmkl->bsrA);
151:   }
152:   baijmkl->sparse_optimized = PETSC_FALSE;

154:   /* Now perform the SpMV2 setup and matrix optimization. */
155:   baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
156:   baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
157:   baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
158:   mbs                 = a->mbs;
159:   nbs                 = a->nbs;
160:   nz                  = a->nz;
161:   bs                  = A->rmap->bs;
162:   aa                  = a->a;

164:   if ((nz != 0) & !(A->structure_only)) {
165:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
166:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
167:     if (PetscSeqBAIJSupportsZeroBased()) {
168:       aj = a->j;
169:       ai = a->i;
170:       mkl_sparse_x_create_bsr(&(baijmkl->bsrA), SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa);
171:     } else {
172:       PetscMalloc2(mbs + 1, &ai, nz, &aj);
173:       for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
174:       for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
175:       aa = a->a;
176:       mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa);
177:       baijmkl->ai1 = ai;
178:       baijmkl->aj1 = aj;
179:     }
180:     mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000);
181:     mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE);
182:     mkl_sparse_optimize(baijmkl->bsrA);
183:     baijmkl->sparse_optimized = PETSC_TRUE;
184:   }
185:   return 0;
186: }

188: static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
189: {
190:   Mat_SeqBAIJMKL *baijmkl;
191:   Mat_SeqBAIJMKL *baijmkl_dest;

193:   MatDuplicate_SeqBAIJ(A, op, M);
194:   baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
195:   PetscNew(&baijmkl_dest);
196:   (*M)->spptr = (void *)baijmkl_dest;
197:   PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL));
198:   baijmkl_dest->sparse_optimized = PETSC_FALSE;
199:   MatSeqBAIJMKL_create_mkl_handle(A);
200:   return 0;
201: }

203: static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
204: {
205:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
206:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
207:   const PetscScalar *x;
208:   PetscScalar       *y;

210:   /* If there are no nonzero entries, zero yy and return immediately. */
211:   if (!a->nz) {
212:     VecSet(yy, 0.0);
213:     return 0;
214:   }

216:   VecGetArrayRead(xx, &x);
217:   VecGetArray(yy, &y);

219:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
220:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
221:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
222:   if (!baijmkl->sparse_optimized) MatSeqBAIJMKL_create_mkl_handle(A);

224:   /* Call MKL SpMV2 executor routine to do the MatMult. */
225:   mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y);

227:   PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs);
228:   VecRestoreArrayRead(xx, &x);
229:   VecRestoreArray(yy, &y);
230:   return 0;
231: }

233: static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
234: {
235:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
236:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
237:   const PetscScalar *x;
238:   PetscScalar       *y;

240:   /* If there are no nonzero entries, zero yy and return immediately. */
241:   if (!a->nz) {
242:     VecSet(yy, 0.0);
243:     return 0;
244:   }

246:   VecGetArrayRead(xx, &x);
247:   VecGetArray(yy, &y);

249:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
250:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
251:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
252:   if (!baijmkl->sparse_optimized) MatSeqBAIJMKL_create_mkl_handle(A);

254:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
255:   mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y);

257:   PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs);
258:   VecRestoreArrayRead(xx, &x);
259:   VecRestoreArray(yy, &y);
260:   return 0;
261: }

263: static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
264: {
265:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
266:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
267:   const PetscScalar *x;
268:   PetscScalar       *y, *z;
269:   PetscInt           m = a->mbs * A->rmap->bs;
270:   PetscInt           i;

272:   /* If there are no nonzero entries, set zz = yy and return immediately. */
273:   if (!a->nz) {
274:     VecCopy(yy, zz);
275:     return 0;
276:   }

278:   VecGetArrayRead(xx, &x);
279:   VecGetArrayPair(yy, zz, &y, &z);

281:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
282:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
283:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
284:   if (!baijmkl->sparse_optimized) MatSeqBAIJMKL_create_mkl_handle(A);

286:   /* Call MKL sparse BLAS routine to do the MatMult. */
287:   if (zz == yy) {
288:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
289:      * with alpha and beta both set to 1.0. */
290:     mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z);
291:   } else {
292:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
293:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
294:     mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z);
295:     for (i = 0; i < m; i++) z[i] += y[i];
296:   }

298:   PetscLogFlops(2.0 * a->bs2 * a->nz);
299:   VecRestoreArrayRead(xx, &x);
300:   VecRestoreArrayPair(yy, zz, &y, &z);
301:   return 0;
302: }

304: static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
305: {
306:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
307:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
308:   const PetscScalar *x;
309:   PetscScalar       *y, *z;
310:   PetscInt           n = a->nbs * A->rmap->bs;
311:   PetscInt           i;
312:   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */

314:   /* If there are no nonzero entries, set zz = yy and return immediately. */
315:   if (!a->nz) {
316:     VecCopy(yy, zz);
317:     return 0;
318:   }

320:   VecGetArrayRead(xx, &x);
321:   VecGetArrayPair(yy, zz, &y, &z);

323:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
324:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
325:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
326:   if (!baijmkl->sparse_optimized) MatSeqBAIJMKL_create_mkl_handle(A);

328:   /* Call MKL sparse BLAS routine to do the MatMult. */
329:   if (zz == yy) {
330:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
331:      * with alpha and beta both set to 1.0. */
332:     mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z);
333:   } else {
334:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
335:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
336:     mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z);
337:     for (i = 0; i < n; i++) z[i] += y[i];
338:   }

340:   PetscLogFlops(2.0 * a->bs2 * a->nz);
341:   VecRestoreArrayRead(xx, &x);
342:   VecRestoreArrayPair(yy, zz, &y, &z);
343:   return 0;
344: }

346: static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
347: {
348:   MatScale_SeqBAIJ(inA, alpha);
349:   MatSeqBAIJMKL_create_mkl_handle(inA);
350:   return 0;
351: }

353: static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
354: {
355:   MatDiagonalScale_SeqBAIJ(A, ll, rr);
356:   MatSeqBAIJMKL_create_mkl_handle(A);
357:   return 0;
358: }

360: static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
361: {
362:   MatAXPY_SeqBAIJ(Y, a, X, str);
363:   if (str == SAME_NONZERO_PATTERN) {
364:     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
365:     MatSeqBAIJMKL_create_mkl_handle(Y);
366:   }
367:   return 0;
368: }
369: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
370:  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
371:  * routine, but can also be used to convert an assembled SeqBAIJ matrix
372:  * into a SeqBAIJMKL one. */
373: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
374: {
375:   Mat             B = *newmat;
376:   Mat_SeqBAIJMKL *baijmkl;
377:   PetscBool       sametype;

379:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A, MAT_COPY_VALUES, &B);

381:   PetscObjectTypeCompare((PetscObject)A, type, &sametype);
382:   if (sametype) return 0;

384:   PetscNew(&baijmkl);
385:   B->spptr = (void *)baijmkl;

387:   /* Set function pointers for methods that we inherit from BAIJ but override.
388:    * We also parse some command line options below, since those determine some of the methods we point to. */
389:   B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;

391:   baijmkl->sparse_optimized = PETSC_FALSE;

393:   PetscObjectComposeFunction((PetscObject)B, "MatScale_SeqBAIJMKL_C", MatScale_SeqBAIJMKL);
394:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ);

396:   PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL);
397:   *newmat = B;
398:   return 0;
399: }

401: static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
402: {
403:   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
404:   MatAssemblyEnd_SeqBAIJ(A, mode);
405:   MatSeqBAIJMKL_create_mkl_handle(A);
406:   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
407:   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
408:   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
409:   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
410:   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
411:   A->ops->scale            = MatScale_SeqBAIJMKL;
412:   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
413:   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
414:   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
415:   return 0;
416: }

418: /*@C
419:    MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
420:    This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
421:    routines from Intel MKL whenever possible.
422:    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
423:    operations are currently supported.
424:    If the installed version of MKL supports the "SpMV2" sparse
425:    inspector-executor routines, then those are used by default.
426:    Default PETSc kernels are used otherwise.

428:    Input Parameters:
429: +  comm - MPI communicator, set to `PETSC_COMM_SELF`
430: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
431:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
432: .  m - number of rows
433: .  n - number of columns
434: .  nz - number of nonzero blocks  per block row (same for all rows)
435: -  nnz - array containing the number of nonzero blocks in the various block rows
436:          (possibly different for each block row) or NULL

438:    Output Parameter:
439: .  A - the matrix

441:    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
442:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
443:    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]

445:    Options Database Keys:
446: +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
447: -   -mat_block_size - size of the blocks to use

449:    Level: intermediate

451:    Notes:
452:    The number of rows and columns must be divisible by blocksize.

454:    If the nnz parameter is given then the nz parameter is ignored

456:    A nonzero block is any block that as 1 or more nonzeros in it

458:    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
459:    storage.  That is, the stored row and column indices can begin at
460:    either one (as in Fortran) or zero.  See the users' manual for details.

462:    Specify the preallocated storage with either nz or nnz (not both).
463:    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
464:    allocation.  See [Sparse Matrices](sec_matsparse) for details.
465:    matrices.

467: .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
468: @*/
469: PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
470: {
471:   MatCreate(comm, A);
472:   MatSetSizes(*A, m, n, m, n);
473:   MatSetType(*A, MATSEQBAIJMKL);
474:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz);
475:   return 0;
476: }

478: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
479: {
480:   MatSetType(A, MATSEQBAIJ);
481:   MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A);
482:   return 0;
483: }