Actual source code: mpidense.c


  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4:    Portions of this code are under:
  5:    Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
  6: */

  8: #include <../src/mat/impls/dense/mpi/mpidense.h>
  9: #include <../src/mat/impls/aij/mpi/mpiaij.h>
 10: #include <petscblaslapack.h>

 12: /*@
 13:       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
 14:               matrix that represents the operator. For sequential matrices it returns itself.

 16:     Input Parameter:
 17: .      A - the sequential or MPI `MATDENSE` matrix

 19:     Output Parameter:
 20: .      B - the inner matrix

 22:     Level: intermediate

 24: .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
 25: @*/
 26: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
 27: {
 28:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 29:   PetscBool     flg;

 31:   PetscFunctionBegin;
 34:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
 35:   if (flg) *B = mat->A;
 36:   else {
 37:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
 38:     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
 39:     *B = A;
 40:   }
 41:   PetscFunctionReturn(PETSC_SUCCESS);
 42: }

 44: PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
 45: {
 46:   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
 47:   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;

 49:   PetscFunctionBegin;
 50:   PetscCall(MatCopy(Amat->A, Bmat->A, s));
 51:   PetscFunctionReturn(PETSC_SUCCESS);
 52: }

 54: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
 55: {
 56:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 57:   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
 58:   PetscScalar  *v;

 60:   PetscFunctionBegin;
 61:   PetscCall(MatDenseGetArray(mat->A, &v));
 62:   PetscCall(MatDenseGetLDA(mat->A, &lda));
 63:   rend2 = PetscMin(rend, A->cmap->N);
 64:   if (rend2 > rstart) {
 65:     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
 66:     PetscCall(PetscLogFlops(rend2 - rstart));
 67:   }
 68:   PetscCall(MatDenseRestoreArray(mat->A, &v));
 69:   PetscFunctionReturn(PETSC_SUCCESS);
 70: }

 72: PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 73: {
 74:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 75:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 77:   PetscFunctionBegin;
 78:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
 79:   lrow = row - rstart;
 80:   PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
 81:   PetscFunctionReturn(PETSC_SUCCESS);
 82: }

 84: PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 85: {
 86:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 87:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 89:   PetscFunctionBegin;
 90:   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
 91:   lrow = row - rstart;
 92:   PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
 93:   PetscFunctionReturn(PETSC_SUCCESS);
 94: }

 96: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
 97: {
 98:   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
 99:   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
100:   PetscScalar  *array;
101:   MPI_Comm      comm;
102:   PetscBool     flg;
103:   Mat           B;

105:   PetscFunctionBegin;
106:   PetscCall(MatHasCongruentLayouts(A, &flg));
107:   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108:   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109:   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110: #if defined(PETSC_HAVE_CUDA)
111:     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112:     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113: #elif (PETSC_HAVE_HIP)
114:     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115:     PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116: #endif
117:     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118:     PetscCall(MatCreate(comm, &B));
119:     PetscCall(MatSetSizes(B, m, m, m, m));
120:     PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121:     PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122:     PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123:     PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124:     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125:     *a = B;
126:     PetscCall(MatDestroy(&B));
127:   } else *a = B;
128:   PetscFunctionReturn(PETSC_SUCCESS);
129: }

131: PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132: {
133:   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135:   PetscBool     roworiented = A->roworiented;

137:   PetscFunctionBegin;
138:   for (i = 0; i < m; i++) {
139:     if (idxm[i] < 0) continue;
140:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141:     if (idxm[i] >= rstart && idxm[i] < rend) {
142:       row = idxm[i] - rstart;
143:       if (roworiented) {
144:         PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145:       } else {
146:         for (j = 0; j < n; j++) {
147:           if (idxn[j] < 0) continue;
148:           PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149:           PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150:         }
151:       }
152:     } else if (!A->donotstash) {
153:       mat->assembled = PETSC_FALSE;
154:       if (roworiented) {
155:         PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156:       } else {
157:         PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158:       }
159:     }
160:   }
161:   PetscFunctionReturn(PETSC_SUCCESS);
162: }

164: PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165: {
166:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;

169:   PetscFunctionBegin;
170:   for (i = 0; i < m; i++) {
171:     if (idxm[i] < 0) continue; /* negative row */
172:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173:     if (idxm[i] >= rstart && idxm[i] < rend) {
174:       row = idxm[i] - rstart;
175:       for (j = 0; j < n; j++) {
176:         if (idxn[j] < 0) continue; /* negative column */
177:         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178:         PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179:       }
180:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181:   }
182:   PetscFunctionReturn(PETSC_SUCCESS);
183: }

185: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186: {
187:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

189:   PetscFunctionBegin;
190:   PetscCall(MatDenseGetLDA(a->A, lda));
191:   PetscFunctionReturn(PETSC_SUCCESS);
192: }

194: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195: {
196:   Mat_MPIDense *a     = (Mat_MPIDense *)A->data;
197:   MatType       mtype = MATSEQDENSE;

199:   PetscFunctionBegin;
200:   if (!a->A) {
201:     PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202:     PetscCall(PetscLayoutSetUp(A->rmap));
203:     PetscCall(PetscLayoutSetUp(A->cmap));
204:     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205:     PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206: #if defined(PETSC_HAVE_CUDA)
207:     PetscBool iscuda;
208:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209:     if (iscuda) mtype = MATSEQDENSECUDA;
210: #elif (PETSC_HAVE_HIP)
211:     PetscBool iship;
212:     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213:     if (iship) mtype = MATSEQDENSEHIP;
214: #endif
215:     PetscCall(MatSetType(a->A, mtype));
216:   }
217:   PetscCall(MatDenseSetLDA(a->A, lda));
218:   PetscFunctionReturn(PETSC_SUCCESS);
219: }

221: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222: {
223:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

225:   PetscFunctionBegin;
226:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227:   PetscCall(MatDenseGetArray(a->A, array));
228:   PetscFunctionReturn(PETSC_SUCCESS);
229: }

231: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232: {
233:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

235:   PetscFunctionBegin;
236:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237:   PetscCall(MatDenseGetArrayRead(a->A, array));
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242: {
243:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

245:   PetscFunctionBegin;
246:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247:   PetscCall(MatDenseGetArrayWrite(a->A, array));
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252: {
253:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

255:   PetscFunctionBegin;
256:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258:   PetscCall(MatDensePlaceArray(a->A, array));
259:   PetscFunctionReturn(PETSC_SUCCESS);
260: }

262: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263: {
264:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

266:   PetscFunctionBegin;
267:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269:   PetscCall(MatDenseResetArray(a->A));
270:   PetscFunctionReturn(PETSC_SUCCESS);
271: }

273: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274: {
275:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

277:   PetscFunctionBegin;
278:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280:   PetscCall(MatDenseReplaceArray(a->A, array));
281:   PetscFunctionReturn(PETSC_SUCCESS);
282: }

284: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285: {
286:   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
287:   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288:   const PetscInt    *irow, *icol;
289:   const PetscScalar *v;
290:   PetscScalar       *bv;
291:   Mat                newmat;
292:   IS                 iscol_local;
293:   MPI_Comm           comm_is, comm_mat;

295:   PetscFunctionBegin;
296:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297:   PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298:   PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");

300:   PetscCall(ISAllGather(iscol, &iscol_local));
301:   PetscCall(ISGetIndices(isrow, &irow));
302:   PetscCall(ISGetIndices(iscol_local, &icol));
303:   PetscCall(ISGetLocalSize(isrow, &nrows));
304:   PetscCall(ISGetLocalSize(iscol, &ncols));
305:   PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */

307:   /* No parallel redistribution currently supported! Should really check each index set
308:      to confirm that it is OK.  ... Currently supports only submatrix same partitioning as
309:      original matrix! */

311:   PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));

314:   /* Check submatrix call */
315:   if (scall == MAT_REUSE_MATRIX) {
316:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317:     /* Really need to test rows and column sizes! */
318:     newmat = *B;
319:   } else {
320:     /* Create and fill new matrix */
321:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322:     PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323:     PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324:     PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325:   }

327:   /* Now extract the data pointers and do the copy, column at a time */
328:   newmatd = (Mat_MPIDense *)newmat->data;
329:   PetscCall(MatDenseGetArray(newmatd->A, &bv));
330:   PetscCall(MatDenseGetArrayRead(mat->A, &v));
331:   PetscCall(MatDenseGetLDA(mat->A, &lda));
332:   for (i = 0; i < Ncols; i++) {
333:     const PetscScalar *av = v + lda * icol[i];
334:     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335:   }
336:   PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337:   PetscCall(MatDenseRestoreArray(newmatd->A, &bv));

339:   /* Assemble the matrices so that the correct flags are set */
340:   PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341:   PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));

343:   /* Free work space */
344:   PetscCall(ISRestoreIndices(isrow, &irow));
345:   PetscCall(ISRestoreIndices(iscol_local, &icol));
346:   PetscCall(ISDestroy(&iscol_local));
347:   *B = newmat;
348:   PetscFunctionReturn(PETSC_SUCCESS);
349: }

351: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352: {
353:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

355:   PetscFunctionBegin;
356:   PetscCall(MatDenseRestoreArray(a->A, array));
357:   PetscFunctionReturn(PETSC_SUCCESS);
358: }

360: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361: {
362:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

364:   PetscFunctionBegin;
365:   PetscCall(MatDenseRestoreArrayRead(a->A, array));
366:   PetscFunctionReturn(PETSC_SUCCESS);
367: }

369: PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370: {
371:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

373:   PetscFunctionBegin;
374:   PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375:   PetscFunctionReturn(PETSC_SUCCESS);
376: }

378: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379: {
380:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381:   PetscInt      nstash, reallocs;

383:   PetscFunctionBegin;
384:   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);

386:   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387:   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388:   PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389:   PetscFunctionReturn(PETSC_SUCCESS);
390: }

392: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393: {
394:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395:   PetscInt      i, *row, *col, flg, j, rstart, ncols;
396:   PetscMPIInt   n;
397:   PetscScalar  *val;

399:   PetscFunctionBegin;
400:   if (!mdn->donotstash && !mat->nooffprocentries) {
401:     /*  wait on receives */
402:     while (1) {
403:       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404:       if (!flg) break;

406:       for (i = 0; i < n;) {
407:         /* Now identify the consecutive vals belonging to the same row */
408:         for (j = i, rstart = row[j]; j < n; j++) {
409:           if (row[j] != rstart) break;
410:         }
411:         if (j < n) ncols = j - i;
412:         else ncols = n - i;
413:         /* Now assemble all these values with a single function call */
414:         PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415:         i = j;
416:       }
417:     }
418:     PetscCall(MatStashScatterEnd_Private(&mat->stash));
419:   }

421:   PetscCall(MatAssemblyBegin(mdn->A, mode));
422:   PetscCall(MatAssemblyEnd(mdn->A, mode));
423:   PetscFunctionReturn(PETSC_SUCCESS);
424: }

426: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427: {
428:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

430:   PetscFunctionBegin;
431:   PetscCall(MatZeroEntries(l->A));
432:   PetscFunctionReturn(PETSC_SUCCESS);
433: }

435: PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436: {
437:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438:   PetscInt      i, len, *lrows;

440:   PetscFunctionBegin;
441:   /* get locally owned rows */
442:   PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443:   /* fix right hand side if needed */
444:   if (x && b) {
445:     const PetscScalar *xx;
446:     PetscScalar       *bb;

448:     PetscCall(VecGetArrayRead(x, &xx));
449:     PetscCall(VecGetArrayWrite(b, &bb));
450:     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451:     PetscCall(VecRestoreArrayRead(x, &xx));
452:     PetscCall(VecRestoreArrayWrite(b, &bb));
453:   }
454:   PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455:   if (diag != 0.0) {
456:     Vec d;

458:     PetscCall(MatCreateVecs(A, NULL, &d));
459:     PetscCall(VecSet(d, diag));
460:     PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461:     PetscCall(VecDestroy(&d));
462:   }
463:   PetscCall(PetscFree(lrows));
464:   PetscFunctionReturn(PETSC_SUCCESS);
465: }

467: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);

472: PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473: {
474:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
475:   const PetscScalar *ax;
476:   PetscScalar       *ay;
477:   PetscMemType       axmtype, aymtype;

479:   PetscFunctionBegin;
480:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487:   PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488:   PetscFunctionReturn(PETSC_SUCCESS);
489: }

491: PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492: {
493:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
494:   const PetscScalar *ax;
495:   PetscScalar       *ay;
496:   PetscMemType       axmtype, aymtype;

498:   PetscFunctionBegin;
499:   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500:   PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501:   PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502:   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503:   PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504:   PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505:   PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506:   PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507:   PetscFunctionReturn(PETSC_SUCCESS);
508: }

510: PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511: {
512:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
513:   const PetscScalar *ax;
514:   PetscScalar       *ay;
515:   PetscMemType       axmtype, aymtype;

517:   PetscFunctionBegin;
518:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519:   PetscCall(VecSet(yy, 0.0));
520:   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522:   PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526:   PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527:   PetscFunctionReturn(PETSC_SUCCESS);
528: }

530: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531: {
532:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
533:   const PetscScalar *ax;
534:   PetscScalar       *ay;
535:   PetscMemType       axmtype, aymtype;

537:   PetscFunctionBegin;
538:   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539:   PetscCall(VecCopy(yy, zz));
540:   PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541:   PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542:   PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543:   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544:   PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545:   PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546:   PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547:   PetscFunctionReturn(PETSC_SUCCESS);
548: }

550: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551: {
552:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
553:   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
554:   PetscScalar       *x, zero = 0.0;
555:   const PetscScalar *av;

557:   PetscFunctionBegin;
558:   PetscCall(VecSet(v, zero));
559:   PetscCall(VecGetArray(v, &x));
560:   PetscCall(VecGetSize(v, &n));
561:   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
562:   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
563:   radd = A->rmap->rstart * m;
564:   PetscCall(MatDenseGetArrayRead(a->A, &av));
565:   PetscCall(MatDenseGetLDA(a->A, &lda));
566:   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568:   PetscCall(VecRestoreArray(v, &x));
569:   PetscFunctionReturn(PETSC_SUCCESS);
570: }

572: PetscErrorCode MatDestroy_MPIDense(Mat mat)
573: {
574:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;

576:   PetscFunctionBegin;
577: #if defined(PETSC_USE_LOG)
578:   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
579: #endif
580:   PetscCall(MatStashDestroy_Private(&mat->stash));
581:   PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
582:   PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
583:   PetscCall(MatDestroy(&mdn->A));
584:   PetscCall(VecDestroy(&mdn->lvec));
585:   PetscCall(PetscSFDestroy(&mdn->Mvctx));
586:   PetscCall(VecDestroy(&mdn->cvec));
587:   PetscCall(MatDestroy(&mdn->cmat));

589:   PetscCall(PetscFree(mat->data));
590:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));

592:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
593:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
594:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
595:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
596:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
597:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
598:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
599:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
600:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
601:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
602:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
603:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
604:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
605: #if defined(PETSC_HAVE_ELEMENTAL)
606:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
607: #endif
608: #if defined(PETSC_HAVE_SCALAPACK)
609:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
610: #endif
611:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
612:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
613:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
614: #if defined(PETSC_HAVE_CUDA)
615:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
616:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
617:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
618:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
619:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
620:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
621:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
622:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
623:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
624:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
625:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
626:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
627:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
628:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
629:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
630:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
631:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
632: #endif
633: #if defined(PETSC_HAVE_HIP)
634:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651: #endif
652:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));

663:   PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664:   PetscFunctionReturn(PETSC_SUCCESS);
665: }

667: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);

669: #include <petscdraw.h>
670: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671: {
672:   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
673:   PetscMPIInt       rank;
674:   PetscViewerType   vtype;
675:   PetscBool         iascii, isdraw;
676:   PetscViewer       sviewer;
677:   PetscViewerFormat format;

679:   PetscFunctionBegin;
680:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683:   if (iascii) {
684:     PetscCall(PetscViewerGetType(viewer, &vtype));
685:     PetscCall(PetscViewerGetFormat(viewer, &format));
686:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687:       MatInfo info;
688:       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691:                                                    (PetscInt)info.memory));
692:       PetscCall(PetscViewerFlush(viewer));
693:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694:       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695:       PetscFunctionReturn(PETSC_SUCCESS);
696:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
697:       PetscFunctionReturn(PETSC_SUCCESS);
698:     }
699:   } else if (isdraw) {
700:     PetscDraw draw;
701:     PetscBool isnull;

703:     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704:     PetscCall(PetscDrawIsNull(draw, &isnull));
705:     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706:   }

708:   {
709:     /* assemble the entire matrix onto first processor. */
710:     Mat          A;
711:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712:     PetscInt    *cols;
713:     PetscScalar *vals;

715:     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716:     if (rank == 0) {
717:       PetscCall(MatSetSizes(A, M, N, M, N));
718:     } else {
719:       PetscCall(MatSetSizes(A, 0, 0, M, N));
720:     }
721:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722:     PetscCall(MatSetType(A, MATMPIDENSE));
723:     PetscCall(MatMPIDenseSetPreallocation(A, NULL));

725:     /* Copy the matrix ... This isn't the most efficient means,
726:        but it's quick for now */
727:     A->insertmode = INSERT_VALUES;

729:     row = mat->rmap->rstart;
730:     m   = mdn->A->rmap->n;
731:     for (i = 0; i < m; i++) {
732:       PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733:       PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734:       PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735:       row++;
736:     }

738:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740:     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741:     if (rank == 0) {
742:       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743:       PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744:     }
745:     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746:     PetscCall(PetscViewerFlush(viewer));
747:     PetscCall(MatDestroy(&A));
748:   }
749:   PetscFunctionReturn(PETSC_SUCCESS);
750: }

752: PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753: {
754:   PetscBool iascii, isbinary, isdraw, issocket;

756:   PetscFunctionBegin;
757:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));

762:   if (iascii || issocket || isdraw) {
763:     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764:   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765:   PetscFunctionReturn(PETSC_SUCCESS);
766: }

768: PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769: {
770:   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
771:   Mat            mdn = mat->A;
772:   PetscLogDouble isend[5], irecv[5];

774:   PetscFunctionBegin;
775:   info->block_size = 1.0;

777:   PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));

779:   isend[0] = info->nz_used;
780:   isend[1] = info->nz_allocated;
781:   isend[2] = info->nz_unneeded;
782:   isend[3] = info->memory;
783:   isend[4] = info->mallocs;
784:   if (flag == MAT_LOCAL) {
785:     info->nz_used      = isend[0];
786:     info->nz_allocated = isend[1];
787:     info->nz_unneeded  = isend[2];
788:     info->memory       = isend[3];
789:     info->mallocs      = isend[4];
790:   } else if (flag == MAT_GLOBAL_MAX) {
791:     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));

793:     info->nz_used      = irecv[0];
794:     info->nz_allocated = irecv[1];
795:     info->nz_unneeded  = irecv[2];
796:     info->memory       = irecv[3];
797:     info->mallocs      = irecv[4];
798:   } else if (flag == MAT_GLOBAL_SUM) {
799:     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));

801:     info->nz_used      = irecv[0];
802:     info->nz_allocated = irecv[1];
803:     info->nz_unneeded  = irecv[2];
804:     info->memory       = irecv[3];
805:     info->mallocs      = irecv[4];
806:   }
807:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808:   info->fill_ratio_needed = 0;
809:   info->factor_mallocs    = 0;
810:   PetscFunctionReturn(PETSC_SUCCESS);
811: }

813: PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814: {
815:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

817:   PetscFunctionBegin;
818:   switch (op) {
819:   case MAT_NEW_NONZERO_LOCATIONS:
820:   case MAT_NEW_NONZERO_LOCATION_ERR:
821:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822:     MatCheckPreallocated(A, 1);
823:     PetscCall(MatSetOption(a->A, op, flg));
824:     break;
825:   case MAT_ROW_ORIENTED:
826:     MatCheckPreallocated(A, 1);
827:     a->roworiented = flg;
828:     PetscCall(MatSetOption(a->A, op, flg));
829:     break;
830:   case MAT_FORCE_DIAGONAL_ENTRIES:
831:   case MAT_KEEP_NONZERO_PATTERN:
832:   case MAT_USE_HASH_TABLE:
833:   case MAT_SORTED_FULL:
834:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835:     break;
836:   case MAT_IGNORE_OFF_PROC_ENTRIES:
837:     a->donotstash = flg;
838:     break;
839:   case MAT_SYMMETRIC:
840:   case MAT_STRUCTURALLY_SYMMETRIC:
841:   case MAT_HERMITIAN:
842:   case MAT_SYMMETRY_ETERNAL:
843:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844:   case MAT_SPD:
845:   case MAT_IGNORE_LOWER_TRIANGULAR:
846:   case MAT_IGNORE_ZERO_ENTRIES:
847:   case MAT_SPD_ETERNAL:
848:     /* if the diagonal matrix is square it inherits some of the properties above */
849:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850:     break;
851:   default:
852:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853:   }
854:   PetscFunctionReturn(PETSC_SUCCESS);
855: }

857: PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858: {
859:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
860:   const PetscScalar *l;
861:   PetscScalar        x, *v, *vv, *r;
862:   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;

864:   PetscFunctionBegin;
865:   PetscCall(MatDenseGetArray(mdn->A, &vv));
866:   PetscCall(MatDenseGetLDA(mdn->A, &lda));
867:   PetscCall(MatGetLocalSize(A, &s2, &s3));
868:   if (ll) {
869:     PetscCall(VecGetLocalSize(ll, &s2a));
870:     PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871:     PetscCall(VecGetArrayRead(ll, &l));
872:     for (i = 0; i < m; i++) {
873:       x = l[i];
874:       v = vv + i;
875:       for (j = 0; j < n; j++) {
876:         (*v) *= x;
877:         v += lda;
878:       }
879:     }
880:     PetscCall(VecRestoreArrayRead(ll, &l));
881:     PetscCall(PetscLogFlops(1.0 * n * m));
882:   }
883:   if (rr) {
884:     const PetscScalar *ar;

886:     PetscCall(VecGetLocalSize(rr, &s3a));
887:     PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888:     PetscCall(VecGetArrayRead(rr, &ar));
889:     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890:     PetscCall(VecGetArray(mdn->lvec, &r));
891:     PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892:     PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893:     PetscCall(VecRestoreArrayRead(rr, &ar));
894:     for (i = 0; i < n; i++) {
895:       x = r[i];
896:       v = vv + i * lda;
897:       for (j = 0; j < m; j++) (*v++) *= x;
898:     }
899:     PetscCall(VecRestoreArray(mdn->lvec, &r));
900:     PetscCall(PetscLogFlops(1.0 * n * m));
901:   }
902:   PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903:   PetscFunctionReturn(PETSC_SUCCESS);
904: }

906: PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907: {
908:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
909:   PetscInt           i, j;
910:   PetscMPIInt        size;
911:   PetscReal          sum = 0.0;
912:   const PetscScalar *av, *v;

914:   PetscFunctionBegin;
915:   PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916:   v = av;
917:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918:   if (size == 1) {
919:     PetscCall(MatNorm(mdn->A, type, nrm));
920:   } else {
921:     if (type == NORM_FROBENIUS) {
922:       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923:         sum += PetscRealPart(PetscConj(*v) * (*v));
924:         v++;
925:       }
926:       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927:       *nrm = PetscSqrtReal(*nrm);
928:       PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929:     } else if (type == NORM_1) {
930:       PetscReal *tmp, *tmp2;
931:       PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932:       *nrm = 0.0;
933:       v    = av;
934:       for (j = 0; j < mdn->A->cmap->n; j++) {
935:         for (i = 0; i < mdn->A->rmap->n; i++) {
936:           tmp[j] += PetscAbsScalar(*v);
937:           v++;
938:         }
939:       }
940:       PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941:       for (j = 0; j < A->cmap->N; j++) {
942:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943:       }
944:       PetscCall(PetscFree2(tmp, tmp2));
945:       PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946:     } else if (type == NORM_INFINITY) { /* max row norm */
947:       PetscReal ntemp;
948:       PetscCall(MatNorm(mdn->A, type, &ntemp));
949:       PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950:     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951:   }
952:   PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953:   PetscFunctionReturn(PETSC_SUCCESS);
954: }

956: PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957: {
958:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959:   Mat           B;
960:   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961:   PetscInt      j, i, lda;
962:   PetscScalar  *v;

964:   PetscFunctionBegin;
965:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968:     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969:     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970:     PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971:   } else B = *matout;

973:   m = a->A->rmap->n;
974:   n = a->A->cmap->n;
975:   PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976:   PetscCall(MatDenseGetLDA(a->A, &lda));
977:   PetscCall(PetscMalloc1(m, &rwork));
978:   for (i = 0; i < m; i++) rwork[i] = rstart + i;
979:   for (j = 0; j < n; j++) {
980:     PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981:     v += lda;
982:   }
983:   PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984:   PetscCall(PetscFree(rwork));
985:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988:     *matout = B;
989:   } else {
990:     PetscCall(MatHeaderMerge(A, &B));
991:   }
992:   PetscFunctionReturn(PETSC_SUCCESS);
993: }

995: static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);

998: PetscErrorCode MatSetUp_MPIDense(Mat A)
999: {
1000:   PetscFunctionBegin;
1001:   PetscCall(PetscLayoutSetUp(A->rmap));
1002:   PetscCall(PetscLayoutSetUp(A->cmap));
1003:   if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004:   PetscFunctionReturn(PETSC_SUCCESS);
1005: }

1007: PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008: {
1009:   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;

1011:   PetscFunctionBegin;
1012:   PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013:   PetscFunctionReturn(PETSC_SUCCESS);
1014: }

1016: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017: {
1018:   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;

1020:   PetscFunctionBegin;
1021:   PetscCall(MatConjugate(a->A));
1022:   PetscFunctionReturn(PETSC_SUCCESS);
1023: }

1025: PetscErrorCode MatRealPart_MPIDense(Mat A)
1026: {
1027:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1029:   PetscFunctionBegin;
1030:   PetscCall(MatRealPart(a->A));
1031:   PetscFunctionReturn(PETSC_SUCCESS);
1032: }

1034: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035: {
1036:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1038:   PetscFunctionBegin;
1039:   PetscCall(MatImaginaryPart(a->A));
1040:   PetscFunctionReturn(PETSC_SUCCESS);
1041: }

1043: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044: {
1045:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1047:   PetscFunctionBegin;
1048:   PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049:   PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050:   PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051:   PetscFunctionReturn(PETSC_SUCCESS);
1052: }

1054: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);

1056: PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057: {
1058:   PetscInt      i, m, n;
1059:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060:   PetscReal    *work;

1062:   PetscFunctionBegin;
1063:   PetscCall(MatGetSize(A, &m, &n));
1064:   PetscCall(PetscMalloc1(n, &work));
1065:   if (type == REDUCTION_MEAN_REALPART) {
1066:     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067:   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068:     PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069:   } else {
1070:     PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071:   }
1072:   if (type == NORM_2) {
1073:     for (i = 0; i < n; i++) work[i] *= work[i];
1074:   }
1075:   if (type == NORM_INFINITY) {
1076:     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077:   } else {
1078:     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079:   }
1080:   PetscCall(PetscFree(work));
1081:   if (type == NORM_2) {
1082:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084:     for (i = 0; i < n; i++) reductions[i] /= m;
1085:   }
1086:   PetscFunctionReturn(PETSC_SUCCESS);
1087: }

1089: #if defined(PETSC_HAVE_CUDA)
1090: PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1091: {
1092:   PetscScalar *da;
1093:   PetscInt     lda;

1095:   PetscFunctionBegin;
1096:   PetscCall(MatDenseCUDAGetArray(A, &da));
1097:   PetscCall(MatDenseGetLDA(A, &lda));
1098:   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1099:   PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1100:   PetscCall(MatDenseCUDARestoreArray(A, &da));
1101:   PetscFunctionReturn(PETSC_SUCCESS);
1102: }

1104: static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1105: {
1106:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1107:   PetscInt      lda;

1109:   PetscFunctionBegin;
1110:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1111:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1112:   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1113:   a->vecinuse = col + 1;
1114:   PetscCall(MatDenseGetLDA(a->A, &lda));
1115:   PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1116:   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1117:   *v = a->cvec;
1118:   PetscFunctionReturn(PETSC_SUCCESS);
1119: }

1121: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1122: {
1123:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1125:   PetscFunctionBegin;
1126:   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1127:   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1128:   a->vecinuse = 0;
1129:   PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1130:   PetscCall(VecCUDAResetArray(a->cvec));
1131:   if (v) *v = NULL;
1132:   PetscFunctionReturn(PETSC_SUCCESS);
1133: }

1135: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1136: {
1137:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1138:   PetscInt      lda;

1140:   PetscFunctionBegin;
1141:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1142:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1143:   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1144:   a->vecinuse = col + 1;
1145:   PetscCall(MatDenseGetLDA(a->A, &lda));
1146:   PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1147:   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1148:   PetscCall(VecLockReadPush(a->cvec));
1149:   *v = a->cvec;
1150:   PetscFunctionReturn(PETSC_SUCCESS);
1151: }

1153: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1154: {
1155:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1157:   PetscFunctionBegin;
1158:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1159:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1160:   a->vecinuse = 0;
1161:   PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1162:   PetscCall(VecLockReadPop(a->cvec));
1163:   PetscCall(VecCUDAResetArray(a->cvec));
1164:   if (v) *v = NULL;
1165:   PetscFunctionReturn(PETSC_SUCCESS);
1166: }

1168: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1169: {
1170:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1171:   PetscInt      lda;

1173:   PetscFunctionBegin;
1174:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1175:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1176:   if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1177:   a->vecinuse = col + 1;
1178:   PetscCall(MatDenseGetLDA(a->A, &lda));
1179:   PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1180:   PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1181:   *v = a->cvec;
1182:   PetscFunctionReturn(PETSC_SUCCESS);
1183: }

1185: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1186: {
1187:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1189:   PetscFunctionBegin;
1190:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1191:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1192:   a->vecinuse = 0;
1193:   PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1194:   PetscCall(VecCUDAResetArray(a->cvec));
1195:   if (v) *v = NULL;
1196:   PetscFunctionReturn(PETSC_SUCCESS);
1197: }

1199: static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1200: {
1201:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1203:   PetscFunctionBegin;
1204:   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1205:   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1206:   PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1207:   PetscFunctionReturn(PETSC_SUCCESS);
1208: }

1210: static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1211: {
1212:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1214:   PetscFunctionBegin;
1215:   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1216:   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1217:   PetscCall(MatDenseCUDAResetArray(l->A));
1218:   PetscFunctionReturn(PETSC_SUCCESS);
1219: }

1221: static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1222: {
1223:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1225:   PetscFunctionBegin;
1226:   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1227:   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1228:   PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1229:   PetscFunctionReturn(PETSC_SUCCESS);
1230: }

1232: static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1233: {
1234:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1236:   PetscFunctionBegin;
1237:   PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1238:   PetscFunctionReturn(PETSC_SUCCESS);
1239: }

1241: static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1242: {
1243:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1245:   PetscFunctionBegin;
1246:   PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1247:   PetscFunctionReturn(PETSC_SUCCESS);
1248: }

1250: static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1251: {
1252:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1254:   PetscFunctionBegin;
1255:   PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1256:   PetscFunctionReturn(PETSC_SUCCESS);
1257: }

1259: static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1260: {
1261:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1263:   PetscFunctionBegin;
1264:   PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1265:   PetscFunctionReturn(PETSC_SUCCESS);
1266: }

1268: static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1269: {
1270:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1272:   PetscFunctionBegin;
1273:   PetscCall(MatDenseCUDAGetArray(l->A, a));
1274:   PetscFunctionReturn(PETSC_SUCCESS);
1275: }

1277: static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1278: {
1279:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1281:   PetscFunctionBegin;
1282:   PetscCall(MatDenseCUDARestoreArray(l->A, a));
1283:   PetscFunctionReturn(PETSC_SUCCESS);
1284: }

1286: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1287: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1288: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1289: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1290: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1291: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1292: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);

1294: static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1295: {
1296:   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;

1298:   PetscFunctionBegin;
1299:   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1300:   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1301:   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1302:   mat->boundtocpu = bind;
1303:   if (!bind) {
1304:     PetscBool iscuda;

1306:     PetscCall(PetscFree(mat->defaultrandtype));
1307:     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1308:     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1309:     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1310:     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1311:     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1312:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1313:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1314:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1315:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1316:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1317:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1318:     mat->ops->shift = MatShift_MPIDenseCUDA;
1319:   } else {
1320:     PetscCall(PetscFree(mat->defaultrandtype));
1321:     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1322:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1323:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1324:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1325:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1326:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1327:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1328:     mat->ops->shift = MatShift_MPIDense;
1329:   }
1330:   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1331:   PetscFunctionReturn(PETSC_SUCCESS);
1332: }

1334: PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1335: {
1336:   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1337:   PetscBool     iscuda;

1339:   PetscFunctionBegin;
1341:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1342:   if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS);
1343:   PetscCall(PetscLayoutSetUp(A->rmap));
1344:   PetscCall(PetscLayoutSetUp(A->cmap));
1345:   if (!d->A) {
1346:     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1347:     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1348:   }
1349:   PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1350:   PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1351:   A->preallocated = PETSC_TRUE;
1352:   A->assembled    = PETSC_TRUE;
1353:   PetscFunctionReturn(PETSC_SUCCESS);
1354: }
1355: #endif

1357: #if defined(PETSC_HAVE_HIP)
1358: PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha)
1359: {
1360:   PetscScalar *da;
1361:   PetscInt     lda;

1363:   PetscFunctionBegin;
1364:   PetscCall(MatDenseHIPGetArray(A, &da));
1365:   PetscCall(MatDenseGetLDA(A, &lda));
1366:   PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1367:   PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1368:   PetscCall(MatDenseHIPRestoreArray(A, &da));
1369:   PetscFunctionReturn(PETSC_SUCCESS);
1370: }

1372: static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1373: {
1374:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1375:   PetscInt      lda;

1377:   PetscFunctionBegin;
1378:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1379:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1380:   if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1381:   a->vecinuse = col + 1;
1382:   PetscCall(MatDenseGetLDA(a->A, &lda));
1383:   PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1384:   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1385:   *v = a->cvec;
1386:   PetscFunctionReturn(PETSC_SUCCESS);
1387: }

1389: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1390: {
1391:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1393:   PetscFunctionBegin;
1394:   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1395:   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1396:   a->vecinuse = 0;
1397:   PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1398:   PetscCall(VecHIPResetArray(a->cvec));
1399:   if (v) *v = NULL;
1400:   PetscFunctionReturn(PETSC_SUCCESS);
1401: }

1403: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1404: {
1405:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1406:   PetscInt      lda;

1408:   PetscFunctionBegin;
1409:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1410:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1411:   if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1412:   a->vecinuse = col + 1;
1413:   PetscCall(MatDenseGetLDA(a->A, &lda));
1414:   PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse));
1415:   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1416:   PetscCall(VecLockReadPush(a->cvec));
1417:   *v = a->cvec;
1418:   PetscFunctionReturn(PETSC_SUCCESS);
1419: }

1421: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1422: {
1423:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1425:   PetscFunctionBegin;
1426:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1427:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1428:   a->vecinuse = 0;
1429:   PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse));
1430:   PetscCall(VecLockReadPop(a->cvec));
1431:   PetscCall(VecHIPResetArray(a->cvec));
1432:   if (v) *v = NULL;
1433:   PetscFunctionReturn(PETSC_SUCCESS);
1434: }

1436: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1437: {
1438:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1439:   PetscInt      lda;

1441:   PetscFunctionBegin;
1442:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1443:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1444:   if (!a->cvec) PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1445:   a->vecinuse = col + 1;
1446:   PetscCall(MatDenseGetLDA(a->A, &lda));
1447:   PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1448:   PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1449:   *v = a->cvec;
1450:   PetscFunctionReturn(PETSC_SUCCESS);
1451: }

1453: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1454: {
1455:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1457:   PetscFunctionBegin;
1458:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1459:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1460:   a->vecinuse = 0;
1461:   PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1462:   PetscCall(VecHIPResetArray(a->cvec));
1463:   if (v) *v = NULL;
1464:   PetscFunctionReturn(PETSC_SUCCESS);
1465: }

1467: static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1468: {
1469:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1471:   PetscFunctionBegin;
1472:   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1473:   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1474:   PetscCall(MatDenseHIPPlaceArray(l->A, a));
1475:   PetscFunctionReturn(PETSC_SUCCESS);
1476: }

1478: static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A)
1479: {
1480:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1482:   PetscFunctionBegin;
1483:   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1484:   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1485:   PetscCall(MatDenseHIPResetArray(l->A));
1486:   PetscFunctionReturn(PETSC_SUCCESS);
1487: }

1489: static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1490: {
1491:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1493:   PetscFunctionBegin;
1494:   PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1495:   PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1496:   PetscCall(MatDenseHIPReplaceArray(l->A, a));
1497:   PetscFunctionReturn(PETSC_SUCCESS);
1498: }

1500: static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1501: {
1502:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1504:   PetscFunctionBegin;
1505:   PetscCall(MatDenseHIPGetArrayWrite(l->A, a));
1506:   PetscFunctionReturn(PETSC_SUCCESS);
1507: }

1509: static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1510: {
1511:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1513:   PetscFunctionBegin;
1514:   PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a));
1515:   PetscFunctionReturn(PETSC_SUCCESS);
1516: }

1518: static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1519: {
1520:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1522:   PetscFunctionBegin;
1523:   PetscCall(MatDenseHIPGetArrayRead(l->A, a));
1524:   PetscFunctionReturn(PETSC_SUCCESS);
1525: }

1527: static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1528: {
1529:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1531:   PetscFunctionBegin;
1532:   PetscCall(MatDenseHIPRestoreArrayRead(l->A, a));
1533:   PetscFunctionReturn(PETSC_SUCCESS);
1534: }

1536: static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a)
1537: {
1538:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1540:   PetscFunctionBegin;
1541:   PetscCall(MatDenseHIPGetArray(l->A, a));
1542:   PetscFunctionReturn(PETSC_SUCCESS);
1543: }

1545: static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a)
1546: {
1547:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1549:   PetscFunctionBegin;
1550:   PetscCall(MatDenseHIPRestoreArray(l->A, a));
1551:   PetscFunctionReturn(PETSC_SUCCESS);
1552: }

1554: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1555: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1556: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1557: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1558: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1559: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1560: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);

1562: static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind)
1563: {
1564:   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;

1566:   PetscFunctionBegin;
1567:   PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1568:   PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1569:   if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1570:   mat->boundtocpu = bind;
1571:   if (!bind) {
1572:     PetscBool iscuda;

1574:     PetscCall(PetscFree(mat->defaultrandtype));
1575:     PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1576:     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda));
1577:     if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1578:     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda));
1579:     if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1580:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP));
1581:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP));
1582:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP));
1583:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP));
1584:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP));
1585:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP));
1586:     mat->ops->shift = MatShift_MPIDenseHIP;
1587:   } else {
1588:     PetscCall(PetscFree(mat->defaultrandtype));
1589:     PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1590:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1591:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1592:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1593:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1594:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1595:     PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1596:     mat->ops->shift = MatShift_MPIDense;
1597:   }
1598:   if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1599:   PetscFunctionReturn(PETSC_SUCCESS);
1600: }

1602: PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data)
1603: {
1604:   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1605:   PetscBool     iscuda;

1607:   PetscFunctionBegin;
1609:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda));
1610:   if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS);
1611:   PetscCall(PetscLayoutSetUp(A->rmap));
1612:   PetscCall(PetscLayoutSetUp(A->cmap));
1613:   if (!d->A) {
1614:     PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1615:     PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1616:   }
1617:   PetscCall(MatSetType(d->A, MATSEQDENSEHIP));
1618:   PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data));
1619:   A->preallocated = PETSC_TRUE;
1620:   A->assembled    = PETSC_TRUE;
1621:   PetscFunctionReturn(PETSC_SUCCESS);
1622: }
1623: #endif

1625: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1626: {
1627:   Mat_MPIDense *d = (Mat_MPIDense *)x->data;

1629:   PetscFunctionBegin;
1630:   PetscCall(MatSetRandom(d->A, rctx));
1631: #if defined(PETSC_HAVE_DEVICE)
1632:   x->offloadmask = d->A->offloadmask;
1633: #endif
1634:   PetscFunctionReturn(PETSC_SUCCESS);
1635: }

1637: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1638: {
1639:   PetscFunctionBegin;
1640:   *missing = PETSC_FALSE;
1641:   PetscFunctionReturn(PETSC_SUCCESS);
1642: }

1644: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1645: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1646: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1647: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1648: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1649: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);

1651: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1652:                                        MatGetRow_MPIDense,
1653:                                        MatRestoreRow_MPIDense,
1654:                                        MatMult_MPIDense,
1655:                                        /*  4*/ MatMultAdd_MPIDense,
1656:                                        MatMultTranspose_MPIDense,
1657:                                        MatMultTransposeAdd_MPIDense,
1658:                                        NULL,
1659:                                        NULL,
1660:                                        NULL,
1661:                                        /* 10*/ NULL,
1662:                                        NULL,
1663:                                        NULL,
1664:                                        NULL,
1665:                                        MatTranspose_MPIDense,
1666:                                        /* 15*/ MatGetInfo_MPIDense,
1667:                                        MatEqual_MPIDense,
1668:                                        MatGetDiagonal_MPIDense,
1669:                                        MatDiagonalScale_MPIDense,
1670:                                        MatNorm_MPIDense,
1671:                                        /* 20*/ MatAssemblyBegin_MPIDense,
1672:                                        MatAssemblyEnd_MPIDense,
1673:                                        MatSetOption_MPIDense,
1674:                                        MatZeroEntries_MPIDense,
1675:                                        /* 24*/ MatZeroRows_MPIDense,
1676:                                        NULL,
1677:                                        NULL,
1678:                                        NULL,
1679:                                        NULL,
1680:                                        /* 29*/ MatSetUp_MPIDense,
1681:                                        NULL,
1682:                                        NULL,
1683:                                        MatGetDiagonalBlock_MPIDense,
1684:                                        NULL,
1685:                                        /* 34*/ MatDuplicate_MPIDense,
1686:                                        NULL,
1687:                                        NULL,
1688:                                        NULL,
1689:                                        NULL,
1690:                                        /* 39*/ MatAXPY_MPIDense,
1691:                                        MatCreateSubMatrices_MPIDense,
1692:                                        NULL,
1693:                                        MatGetValues_MPIDense,
1694:                                        MatCopy_MPIDense,
1695:                                        /* 44*/ NULL,
1696:                                        MatScale_MPIDense,
1697:                                        MatShift_MPIDense,
1698:                                        NULL,
1699:                                        NULL,
1700:                                        /* 49*/ MatSetRandom_MPIDense,
1701:                                        NULL,
1702:                                        NULL,
1703:                                        NULL,
1704:                                        NULL,
1705:                                        /* 54*/ NULL,
1706:                                        NULL,
1707:                                        NULL,
1708:                                        NULL,
1709:                                        NULL,
1710:                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1711:                                        MatDestroy_MPIDense,
1712:                                        MatView_MPIDense,
1713:                                        NULL,
1714:                                        NULL,
1715:                                        /* 64*/ NULL,
1716:                                        NULL,
1717:                                        NULL,
1718:                                        NULL,
1719:                                        NULL,
1720:                                        /* 69*/ NULL,
1721:                                        NULL,
1722:                                        NULL,
1723:                                        NULL,
1724:                                        NULL,
1725:                                        /* 74*/ NULL,
1726:                                        NULL,
1727:                                        NULL,
1728:                                        NULL,
1729:                                        NULL,
1730:                                        /* 79*/ NULL,
1731:                                        NULL,
1732:                                        NULL,
1733:                                        NULL,
1734:                                        /* 83*/ MatLoad_MPIDense,
1735:                                        NULL,
1736:                                        NULL,
1737:                                        NULL,
1738:                                        NULL,
1739:                                        NULL,
1740:                                        /* 89*/ NULL,
1741:                                        NULL,
1742:                                        NULL,
1743:                                        NULL,
1744:                                        NULL,
1745:                                        /* 94*/ NULL,
1746:                                        NULL,
1747:                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1748:                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1749:                                        NULL,
1750:                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1751:                                        NULL,
1752:                                        NULL,
1753:                                        MatConjugate_MPIDense,
1754:                                        NULL,
1755:                                        /*104*/ NULL,
1756:                                        MatRealPart_MPIDense,
1757:                                        MatImaginaryPart_MPIDense,
1758:                                        NULL,
1759:                                        NULL,
1760:                                        /*109*/ NULL,
1761:                                        NULL,
1762:                                        NULL,
1763:                                        MatGetColumnVector_MPIDense,
1764:                                        MatMissingDiagonal_MPIDense,
1765:                                        /*114*/ NULL,
1766:                                        NULL,
1767:                                        NULL,
1768:                                        NULL,
1769:                                        NULL,
1770:                                        /*119*/ NULL,
1771:                                        NULL,
1772:                                        NULL,
1773:                                        NULL,
1774:                                        NULL,
1775:                                        /*124*/ NULL,
1776:                                        MatGetColumnReductions_MPIDense,
1777:                                        NULL,
1778:                                        NULL,
1779:                                        NULL,
1780:                                        /*129*/ NULL,
1781:                                        NULL,
1782:                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1783:                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1784:                                        NULL,
1785:                                        /*134*/ NULL,
1786:                                        NULL,
1787:                                        NULL,
1788:                                        NULL,
1789:                                        NULL,
1790:                                        /*139*/ NULL,
1791:                                        NULL,
1792:                                        NULL,
1793:                                        NULL,
1794:                                        NULL,
1795:                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1796:                                        /*145*/ NULL,
1797:                                        NULL,
1798:                                        NULL,
1799:                                        NULL,
1800:                                        NULL,
1801:                                        /*150*/ NULL,
1802:                                        NULL};

1804: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1805: {
1806:   Mat_MPIDense *a     = (Mat_MPIDense *)mat->data;
1807:   MatType       mtype = MATSEQDENSE;

1809:   PetscFunctionBegin;
1810:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1811:   PetscCall(PetscLayoutSetUp(mat->rmap));
1812:   PetscCall(PetscLayoutSetUp(mat->cmap));
1813:   if (!a->A) {
1814:     PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1815:     PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1816:   }
1817: #if defined(PETSC_HAVE_CUDA)
1818:   PetscBool iscuda;
1819:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1820:   if (iscuda) mtype = MATSEQDENSECUDA;
1821: #endif
1822: #if defined(PETSC_HAVE_HIP)
1823:   PetscBool iship;
1824:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1825:   if (iship) mtype = MATSEQDENSEHIP;
1826: #endif
1827:   PetscCall(MatSetType(a->A, mtype));
1828:   PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1829: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1830:   mat->offloadmask = a->A->offloadmask;
1831: #endif
1832:   mat->preallocated = PETSC_TRUE;
1833:   mat->assembled    = PETSC_TRUE;
1834:   PetscFunctionReturn(PETSC_SUCCESS);
1835: }

1837: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1838: {
1839:   Mat B, C;

1841:   PetscFunctionBegin;
1842:   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1843:   PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1844:   PetscCall(MatDestroy(&C));
1845:   if (reuse == MAT_REUSE_MATRIX) {
1846:     C = *newmat;
1847:   } else C = NULL;
1848:   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1849:   PetscCall(MatDestroy(&B));
1850:   if (reuse == MAT_INPLACE_MATRIX) {
1851:     PetscCall(MatHeaderReplace(A, &C));
1852:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1853:   PetscFunctionReturn(PETSC_SUCCESS);
1854: }

1856: PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1857: {
1858:   Mat B, C;

1860:   PetscFunctionBegin;
1861:   PetscCall(MatDenseGetLocalMatrix(A, &C));
1862:   PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1863:   if (reuse == MAT_REUSE_MATRIX) {
1864:     C = *newmat;
1865:   } else C = NULL;
1866:   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1867:   PetscCall(MatDestroy(&B));
1868:   if (reuse == MAT_INPLACE_MATRIX) {
1869:     PetscCall(MatHeaderReplace(A, &C));
1870:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1871:   PetscFunctionReturn(PETSC_SUCCESS);
1872: }

1874: #if defined(PETSC_HAVE_ELEMENTAL)
1875: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1876: {
1877:   Mat          mat_elemental;
1878:   PetscScalar *v;
1879:   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;

1881:   PetscFunctionBegin;
1882:   if (reuse == MAT_REUSE_MATRIX) {
1883:     mat_elemental = *newmat;
1884:     PetscCall(MatZeroEntries(*newmat));
1885:   } else {
1886:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1887:     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1888:     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1889:     PetscCall(MatSetUp(mat_elemental));
1890:     PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1891:   }

1893:   PetscCall(PetscMalloc2(m, &rows, N, &cols));
1894:   for (i = 0; i < N; i++) cols[i] = i;
1895:   for (i = 0; i < m; i++) rows[i] = rstart + i;

1897:   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1898:   PetscCall(MatDenseGetArray(A, &v));
1899:   PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1900:   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1901:   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1902:   PetscCall(MatDenseRestoreArray(A, &v));
1903:   PetscCall(PetscFree2(rows, cols));

1905:   if (reuse == MAT_INPLACE_MATRIX) {
1906:     PetscCall(MatHeaderReplace(A, &mat_elemental));
1907:   } else {
1908:     *newmat = mat_elemental;
1909:   }
1910:   PetscFunctionReturn(PETSC_SUCCESS);
1911: }
1912: #endif

1914: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1915: {
1916:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1918:   PetscFunctionBegin;
1919:   PetscCall(MatDenseGetColumn(mat->A, col, vals));
1920:   PetscFunctionReturn(PETSC_SUCCESS);
1921: }

1923: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1924: {
1925:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1927:   PetscFunctionBegin;
1928:   PetscCall(MatDenseRestoreColumn(mat->A, vals));
1929:   PetscFunctionReturn(PETSC_SUCCESS);
1930: }

1932: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1933: {
1934:   Mat_MPIDense *mat;
1935:   PetscInt      m, nloc, N;

1937:   PetscFunctionBegin;
1938:   PetscCall(MatGetSize(inmat, &m, &N));
1939:   PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1940:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1941:     PetscInt sum;

1943:     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1944:     /* Check sum(n) = N */
1945:     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1946:     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);

1948:     PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1949:     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1950:   }

1952:   /* numeric phase */
1953:   mat = (Mat_MPIDense *)(*outmat)->data;
1954:   PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1955:   PetscFunctionReturn(PETSC_SUCCESS);
1956: }

1958: #if defined(PETSC_HAVE_CUDA)
1959: PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1960: {
1961:   Mat           B;
1962:   Mat_MPIDense *m;

1964:   PetscFunctionBegin;
1965:   if (reuse == MAT_INITIAL_MATRIX) {
1966:     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1967:   } else if (reuse == MAT_REUSE_MATRIX) {
1968:     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1969:   }

1971:   B = *newmat;
1972:   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1973:   PetscCall(PetscFree(B->defaultvectype));
1974:   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1975:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1976:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1977:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1978:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1979:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1980:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1981:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1982:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1983:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1984:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1985:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1986:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1987:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1988:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1989:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1990:   m = (Mat_MPIDense *)(B)->data;
1991:   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1992:   B->ops->bindtocpu = NULL;
1993:   B->offloadmask    = PETSC_OFFLOAD_CPU;
1994:   PetscFunctionReturn(PETSC_SUCCESS);
1995: }

1997: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1998: {
1999:   Mat           B;
2000:   Mat_MPIDense *m;

2002:   PetscFunctionBegin;
2003:   if (reuse == MAT_INITIAL_MATRIX) {
2004:     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2005:   } else if (reuse == MAT_REUSE_MATRIX) {
2006:     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2007:   }

2009:   B = *newmat;
2010:   PetscCall(PetscFree(B->defaultvectype));
2011:   PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
2012:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
2013:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
2014:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2015:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2016:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2017:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2018:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
2019:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
2020:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
2021:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
2022:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
2023:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
2024:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
2025:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
2026:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
2027:   m = (Mat_MPIDense *)(B->data);
2028:   if (m->A) {
2029:     PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
2030:     B->offloadmask = PETSC_OFFLOAD_BOTH;
2031:   } else {
2032:     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2033:   }
2034:   PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));

2036:   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
2037:   PetscFunctionReturn(PETSC_SUCCESS);
2038: }
2039: #endif

2041: #if defined(PETSC_HAVE_HIP)
2042: PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2043: {
2044:   Mat           B;
2045:   Mat_MPIDense *m;

2047:   PetscFunctionBegin;
2048:   if (reuse == MAT_INITIAL_MATRIX) {
2049:     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2050:   } else if (reuse == MAT_REUSE_MATRIX) {
2051:     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2052:   }

2054:   B = *newmat;
2055:   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE));
2056:   PetscCall(PetscFree(B->defaultvectype));
2057:   PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
2058:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
2059:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL));
2060:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
2061:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
2062:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
2063:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
2064:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL));
2065:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL));
2066:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL));
2067:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL));
2068:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL));
2069:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL));
2070:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL));
2071:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL));
2072:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL));
2073:   m = (Mat_MPIDense *)(B)->data;
2074:   if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
2075:   B->ops->bindtocpu = NULL;
2076:   B->offloadmask    = PETSC_OFFLOAD_CPU;
2077:   PetscFunctionReturn(PETSC_SUCCESS);
2078: }

2080: PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2081: {
2082:   Mat           B;
2083:   Mat_MPIDense *m;

2085:   PetscFunctionBegin;
2086:   if (reuse == MAT_INITIAL_MATRIX) {
2087:     PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2088:   } else if (reuse == MAT_REUSE_MATRIX) {
2089:     PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2090:   }

2092:   B = *newmat;
2093:   PetscCall(PetscFree(B->defaultvectype));
2094:   PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype));
2095:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP));
2096:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense));
2097:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2098:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2099:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2100:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2101:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP));
2102:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP));
2103:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP));
2104:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP));
2105:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP));
2106:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP));
2107:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP));
2108:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP));
2109:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP));
2110:   m = (Mat_MPIDense *)(B->data);
2111:   if (m->A) {
2112:     PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A));
2113:     B->offloadmask = PETSC_OFFLOAD_BOTH;
2114:   } else {
2115:     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2116:   }
2117:   PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE));

2119:   B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP;
2120:   PetscFunctionReturn(PETSC_SUCCESS);
2121: }
2122: #endif

2124: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2125: {
2126:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2127:   PetscInt      lda;

2129:   PetscFunctionBegin;
2130:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2131:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2132:   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2133:   a->vecinuse = col + 1;
2134:   PetscCall(MatDenseGetLDA(a->A, &lda));
2135:   PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
2136:   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2137:   *v = a->cvec;
2138:   PetscFunctionReturn(PETSC_SUCCESS);
2139: }

2141: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2142: {
2143:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

2145:   PetscFunctionBegin;
2146:   PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2147:   PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
2148:   a->vecinuse = 0;
2149:   PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
2150:   PetscCall(VecResetArray(a->cvec));
2151:   if (v) *v = NULL;
2152:   PetscFunctionReturn(PETSC_SUCCESS);
2153: }

2155: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2156: {
2157:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2158:   PetscInt      lda;

2160:   PetscFunctionBegin;
2161:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2162:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2163:   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2164:   a->vecinuse = col + 1;
2165:   PetscCall(MatDenseGetLDA(a->A, &lda));
2166:   PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
2167:   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2168:   PetscCall(VecLockReadPush(a->cvec));
2169:   *v = a->cvec;
2170:   PetscFunctionReturn(PETSC_SUCCESS);
2171: }

2173: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2174: {
2175:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

2177:   PetscFunctionBegin;
2178:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2179:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2180:   a->vecinuse = 0;
2181:   PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
2182:   PetscCall(VecLockReadPop(a->cvec));
2183:   PetscCall(VecResetArray(a->cvec));
2184:   if (v) *v = NULL;
2185:   PetscFunctionReturn(PETSC_SUCCESS);
2186: }

2188: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2189: {
2190:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2191:   PetscInt      lda;

2193:   PetscFunctionBegin;
2194:   PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2195:   PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2196:   if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2197:   a->vecinuse = col + 1;
2198:   PetscCall(MatDenseGetLDA(a->A, &lda));
2199:   PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2200:   PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2201:   *v = a->cvec;
2202:   PetscFunctionReturn(PETSC_SUCCESS);
2203: }

2205: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2206: {
2207:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

2209:   PetscFunctionBegin;
2210:   PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2211:   PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2212:   a->vecinuse = 0;
2213:   PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2214:   PetscCall(VecResetArray(a->cvec));
2215:   if (v) *v = NULL;
2216:   PetscFunctionReturn(PETSC_SUCCESS);
2217: }

2219: PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
2220: {
2221:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2222:   Mat_MPIDense *c;
2223:   MPI_Comm      comm;
2224:   PetscInt      pbegin, pend;

2226:   PetscFunctionBegin;
2227:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2228:   PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2229:   PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2230:   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
2231:   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
2232:   if (!a->cmat) {
2233:     PetscCall(MatCreate(comm, &a->cmat));
2234:     PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
2235:     if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
2236:     else {
2237:       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2238:       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2239:       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2240:     }
2241:     PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2242:     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2243:   } else {
2244:     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
2245:     if (same && a->cmat->rmap->N != A->rmap->N) {
2246:       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
2247:       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2248:     }
2249:     if (!same) {
2250:       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
2251:       PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
2252:       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2253:       PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2254:       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2255:     }
2256:     if (cend - cbegin != a->cmat->cmap->N) {
2257:       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
2258:       PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
2259:       PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2260:       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2261:     }
2262:   }
2263:   c = (Mat_MPIDense *)a->cmat->data;
2264:   PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2265:   PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));

2267:   a->cmat->preallocated = PETSC_TRUE;
2268:   a->cmat->assembled    = PETSC_TRUE;
2269: #if defined(PETSC_HAVE_DEVICE)
2270:   a->cmat->offloadmask = c->A->offloadmask;
2271: #endif
2272:   a->matinuse = cbegin + 1;
2273:   *v          = a->cmat;
2274:   PetscFunctionReturn(PETSC_SUCCESS);
2275: }

2277: PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
2278: {
2279:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2280:   Mat_MPIDense *c;

2282:   PetscFunctionBegin;
2283:   PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
2284:   PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
2285:   PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
2286:   a->matinuse = 0;
2287:   c           = (Mat_MPIDense *)a->cmat->data;
2288:   PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
2289:   if (v) *v = NULL;
2290: #if defined(PETSC_HAVE_DEVICE)
2291:   A->offloadmask = a->A->offloadmask;
2292: #endif
2293:   PetscFunctionReturn(PETSC_SUCCESS);
2294: }

2296: /*MC
2297:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

2299:    Options Database Key:
2300: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`

2302:   Level: beginner

2304: .seealso: [](chapter_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
2305: M*/
2306: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
2307: {
2308:   Mat_MPIDense *a;

2310:   PetscFunctionBegin;
2311:   PetscCall(PetscNew(&a));
2312:   mat->data = (void *)a;
2313:   PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));

2315:   mat->insertmode = NOT_SET_VALUES;

2317:   /* build cache for off array entries formed */
2318:   a->donotstash = PETSC_FALSE;

2320:   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));

2322:   /* stuff used for matrix vector multiply */
2323:   a->lvec        = NULL;
2324:   a->Mvctx       = NULL;
2325:   a->roworiented = PETSC_TRUE;

2327:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
2328:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
2329:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
2330:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
2331:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
2332:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
2333:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
2334:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
2335:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
2336:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
2337:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
2338:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
2339:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
2340:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
2341:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
2342:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
2343:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
2344:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
2345:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
2346:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
2347:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
2348: #if defined(PETSC_HAVE_ELEMENTAL)
2349:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
2350: #endif
2351: #if defined(PETSC_HAVE_SCALAPACK)
2352:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
2353: #endif
2354: #if defined(PETSC_HAVE_CUDA)
2355:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
2356: #endif
2357:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
2358:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2359:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2360: #if defined(PETSC_HAVE_CUDA)
2361:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2362:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2363: #endif
2364: #if defined(PETSC_HAVE_HIP)
2365:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
2366:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2367:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2368: #endif
2369:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
2370:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
2371:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
2372:   PetscFunctionReturn(PETSC_SUCCESS);
2373: }

2375: /*MC
2376:    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.

2378:    Options Database Key:
2379: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`

2381:   Level: beginner

2383: .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
2384: M*/
2385: #if defined(PETSC_HAVE_CUDA)
2386: #include <petsc/private/deviceimpl.h>
2387: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2388: {
2389:   PetscFunctionBegin;
2390:   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2391:   PetscCall(MatCreate_MPIDense(B));
2392:   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
2393:   PetscFunctionReturn(PETSC_SUCCESS);
2394: }
2395: #endif

2397: /*MC
2398:    MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs.

2400:    Options Database Key:
2401: . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()`

2403:   Level: beginner

2405: .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP`
2406: M*/
2407: #if defined(PETSC_HAVE_HIP)
2408: #include <petsc/private/deviceimpl.h>
2409: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B)
2410: {
2411:   PetscFunctionBegin;
2412:   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
2413:   PetscCall(MatCreate_MPIDense(B));
2414:   PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B));
2415:   PetscFunctionReturn(PETSC_SUCCESS);
2416: }
2417: #endif

2419: /*MC
2420:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

2422:    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
2423:    and `MATMPIDENSE` otherwise.

2425:    Options Database Key:
2426: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`

2428:   Level: beginner

2430: .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
2431: M*/

2433: /*MC
2434:    MATDENSECUDA -  "densecuda" - A matrix type to be used for dense matrices on GPUs.
2435:    Similarly,
2436:    `MATDENSEHIP` = "densehip"

2438:    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
2439:    and `MATMPIDENSECUDA` otherwise.

2441:    Options Database Key:
2442: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`

2444:   Level: beginner

2446: .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2447: M*/

2449: /*MC
2450:    MATDENSEHIP - "densehip" - A matrix type to be used for dense matrices on GPUs.

2452:    This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator,
2453:    and `MATMPIDENSEHIP` otherwise.

2455:    Options Database Key:
2456: . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()`

2458:   Level: beginner

2460: .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2461: M*/

2463: /*@C
2464:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

2466:    Collective

2468:    Input Parameters:
2469: .  B - the matrix
2470: -  data - optional location of matrix data.  Set to `NULL` for PETSc
2471:    to control all matrix memory allocation.

2473:    Level: intermediate

2475:    Notes:
2476:    The dense format is fully compatible with standard Fortran
2477:    storage by columns.

2479:    The data input variable is intended primarily for Fortran programmers
2480:    who wish to allocate their own matrix memory space.  Most users should
2481:    set `data` to `NULL`.

2483: .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2484: @*/
2485: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
2486: {
2487:   PetscFunctionBegin;
2489:   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
2490:   PetscFunctionReturn(PETSC_SUCCESS);
2491: }

2493: /*@
2494:    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
2495:    array provided by the user. This is useful to avoid copying an array
2496:    into a matrix

2498:    Not Collective

2500:    Input Parameters:
2501: +  mat - the matrix
2502: -  array - the array in column major order

2504:    Level: developer

2506:    Note:
2507:    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
2508:    freed when the matrix is destroyed.

2510: .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2511:           `MatDenseReplaceArray()`
2512: @*/
2513: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
2514: {
2515:   PetscFunctionBegin;
2517:   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2518:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2519: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2520:   mat->offloadmask = PETSC_OFFLOAD_CPU;
2521: #endif
2522:   PetscFunctionReturn(PETSC_SUCCESS);
2523: }

2525: /*@
2526:    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`

2528:    Not Collective

2530:    Input Parameters:
2531: .  mat - the matrix

2533:    Level: developer

2535:    Note:
2536:    You can only call this after a call to `MatDensePlaceArray()`

2538: .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2539: @*/
2540: PetscErrorCode MatDenseResetArray(Mat mat)
2541: {
2542:   PetscFunctionBegin;
2544:   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2545:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2546:   PetscFunctionReturn(PETSC_SUCCESS);
2547: }

2549: /*@
2550:    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2551:    array provided by the user. This is useful to avoid copying an array
2552:    into a matrix

2554:    Not Collective

2556:    Input Parameters:
2557: +  mat - the matrix
2558: -  array - the array in column major order

2560:    Level: developer

2562:    Note:
2563:    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2564:    freed by the user. It will be freed when the matrix is destroyed.

2566: .seealso: [](chapter_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2567: @*/
2568: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2569: {
2570:   PetscFunctionBegin;
2572:   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2573:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2574: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2575:   mat->offloadmask = PETSC_OFFLOAD_CPU;
2576: #endif
2577:   PetscFunctionReturn(PETSC_SUCCESS);
2578: }

2580: #if defined(PETSC_HAVE_CUDA)
2581: /*@C
2582:    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2583:    array provided by the user. This is useful to avoid copying an array
2584:    into a matrix

2586:    Not Collective

2588:    Input Parameters:
2589: +  mat - the matrix
2590: -  array - the array in column major order

2592:    Level: developer

2594:    Note:
2595:    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2596:    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().

2598: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2599: @*/
2600: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2601: {
2602:   PetscFunctionBegin;
2604:   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2605:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2606:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2607:   PetscFunctionReturn(PETSC_SUCCESS);
2608: }

2610: /*@C
2611:    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`

2613:    Not Collective

2615:    Input Parameters:
2616: .  mat - the matrix

2618:    Level: developer

2620:    Note:
2621:    You can only call this after a call to `MatDenseCUDAPlaceArray()`

2623: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2624: @*/
2625: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2626: {
2627:   PetscFunctionBegin;
2629:   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2630:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2631:   PetscFunctionReturn(PETSC_SUCCESS);
2632: }

2634: /*@C
2635:    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2636:    array provided by the user. This is useful to avoid copying an array
2637:    into a matrix

2639:    Not Collective

2641:    Input Parameters:
2642: +  mat - the matrix
2643: -  array - the array in column major order

2645:    Level: developer

2647:    Note:
2648:    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2649:    The memory passed in CANNOT be freed by the user. It will be freed
2650:    when the matrix is destroyed. The array should respect the matrix leading dimension.

2652: .seealso: [](chapter_matrices), `Mat`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2653: @*/
2654: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2655: {
2656:   PetscFunctionBegin;
2658:   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2659:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2660:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2661:   PetscFunctionReturn(PETSC_SUCCESS);
2662: }

2664: /*@C
2665:    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.

2667:    Not Collective

2669:    Input Parameters:
2670: .  A - the matrix

2672:    Output Parameters
2673: .  array - the GPU array in column major order

2675:    Level: developer

2677:    Notes:
2678:    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.

2680:    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.

2682: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2683: @*/
2684: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2685: {
2686:   PetscFunctionBegin;
2688:   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2689:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2690:   PetscFunctionReturn(PETSC_SUCCESS);
2691: }

2693: /*@C
2694:    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.

2696:    Not Collective

2698:    Input Parameters:
2699: +  A - the matrix
2700: -  array - the GPU array in column major order

2702:    Level: developer

2704: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2705: @*/
2706: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2707: {
2708:   PetscFunctionBegin;
2710:   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2711:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2712:   A->offloadmask = PETSC_OFFLOAD_GPU;
2713:   PetscFunctionReturn(PETSC_SUCCESS);
2714: }

2716: /*@C
2717:    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.

2719:    Not Collective

2721:    Input Parameters:
2722: .  A - the matrix

2724:    Output Parameters
2725: .  array - the GPU array in column major order

2727:    Level: developer

2729:    Note:
2730:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.

2732: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2733: @*/
2734: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2735: {
2736:   PetscFunctionBegin;
2738:   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2739:   PetscFunctionReturn(PETSC_SUCCESS);
2740: }

2742: /*@C
2743:    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.

2745:    Not Collective

2747:    Input Parameters:
2748: +  A - the matrix
2749: -  array - the GPU array in column major order

2751:    Level: developer

2753:    Note:
2754:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.

2756: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2757: @*/
2758: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2759: {
2760:   PetscFunctionBegin;
2761:   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2762:   PetscFunctionReturn(PETSC_SUCCESS);
2763: }

2765: /*@C
2766:    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.

2768:    Not Collective

2770:    Input Parameters:
2771: .  A - the matrix

2773:    Output Parameters
2774: .  array - the GPU array in column major order

2776:    Level: developer

2778:    Note:
2779:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.

2781: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2782: @*/
2783: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2784: {
2785:   PetscFunctionBegin;
2787:   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2788:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2789:   PetscFunctionReturn(PETSC_SUCCESS);
2790: }

2792: /*@C
2793:    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.

2795:    Not Collective

2797:    Input Parameters:
2798: +  A - the matrix
2799: -  array - the GPU array in column major order

2801:    Level: developer

2803: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2804: @*/
2805: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2806: {
2807:   PetscFunctionBegin;
2809:   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2810:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2811:   A->offloadmask = PETSC_OFFLOAD_GPU;
2812:   PetscFunctionReturn(PETSC_SUCCESS);
2813: }
2814: #endif

2816: #if defined(PETSC_HAVE_HIP)
2817: /*@C
2818:    MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2819:    array provided by the user. This is useful to avoid copying an array
2820:    into a matrix

2822:    Not Collective

2824:    Input Parameters:
2825: +  mat - the matrix
2826: -  array - the array in column major order

2828:    Level: developer

2830:    Note:
2831:    You can return to the original array with a call to `MatDenseHIPResetArray()`. The user is responsible for freeing this array; it will not be
2832:    freed when the matrix is destroyed. The array must have been allocated with `hipMalloc()`.

2834: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPResetArray()
2835: @*/
2836: PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array)
2837: {
2838:   PetscFunctionBegin;
2840:   PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2841:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2842:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2843:   PetscFunctionReturn(PETSC_SUCCESS);
2844: }

2846: /*@C
2847:    MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to `MatDenseHIPPlaceArray()`

2849:    Not Collective

2851:    Input Parameters:
2852: .  mat - the matrix

2854:    Level: developer

2856:    Notes:
2857:    You can only call this after a call to `MatDenseHIPPlaceArray()`

2859: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPPlaceArray()
2860: @*/
2861: PetscErrorCode MatDenseHIPResetArray(Mat mat)
2862: {
2863:   PetscFunctionBegin;
2865:   PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat));
2866:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2867:   PetscFunctionReturn(PETSC_SUCCESS);
2868: }

2870: /*@C
2871:    MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2872:    array provided by the user. This is useful to avoid copying an array
2873:    into a matrix

2875:    Not Collective

2877:    Input Parameters:
2878: +  mat - the matrix
2879: -  array - the array in column major order

2881:    Level: developer

2883:    Note:
2884:    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2885:    The memory passed in CANNOT be freed by the user. It will be freed
2886:    when the matrix is destroyed. The array should respect the matrix leading dimension.

2888: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray()
2889: @*/
2890: PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array)
2891: {
2892:   PetscFunctionBegin;
2894:   PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2895:   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2896:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2897:   PetscFunctionReturn(PETSC_SUCCESS);
2898: }

2900: /*@C
2901:    MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix.

2903:    Not Collective

2905:    Input Parameters:
2906: .  A - the matrix

2908:    Output Parameters
2909: .  array - the GPU array in column major order

2911:    Level: developer

2913:    Note:
2914:    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseHIPGetArray()`.
2915:    The array must be restored with `MatDenseHIPRestoreArrayWrite()` when no longer needed.

2917: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead()
2918: @*/
2919: PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a)
2920: {
2921:   PetscFunctionBegin;
2923:   PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2924:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2925:   PetscFunctionReturn(PETSC_SUCCESS);
2926: }

2928: /*@C
2929:    MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with `MatDenseHIPGetArrayWrite()`.

2931:    Not Collective

2933:    Input Parameters:
2934: +  A - the matrix
2935: -  array - the GPU array in column major order

2937:    Level: developer

2939: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
2940: @*/
2941: PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a)
2942: {
2943:   PetscFunctionBegin;
2945:   PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2946:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2947:   A->offloadmask = PETSC_OFFLOAD_GPU;
2948:   PetscFunctionReturn(PETSC_SUCCESS);
2949: }

2951: /*@C
2952:    MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed.

2954:    Not Collective

2956:    Input Parameters:
2957: .  A - the matrix

2959:    Output Parameters
2960: .  array - the GPU array in column major order

2962:    Level: developer

2964:    Note:
2965:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.

2967: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
2968: @*/
2969: PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a)
2970: {
2971:   PetscFunctionBegin;
2973:   PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2974:   PetscFunctionReturn(PETSC_SUCCESS);
2975: }

2977: /*@C
2978:    MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead().

2980:    Not Collective

2982:    Input Parameters:
2983: +  A - the matrix
2984: -  array - the GPU array in column major order

2986:    Level: developer

2988:    Notes:
2989:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.

2991: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead()
2992: @*/
2993: PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a)
2994: {
2995:   PetscFunctionBegin;
2996:   PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2997:   PetscFunctionReturn(PETSC_SUCCESS);
2998: }

3000: /*@C
3001:    MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed.

3003:    Not Collective

3005:    Input Parameters:
3006: .  A - the matrix

3008:    Output Parameters
3009: .  array - the GPU array in column major order

3011:    Level: developer

3013:    Note:
3014:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.

3016:    For read-only access, use `MatDenseHIPGetArrayRead()`.

3018: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
3019: @*/
3020: PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a)
3021: {
3022:   PetscFunctionBegin;
3024:   PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a));
3025:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3026:   PetscFunctionReturn(PETSC_SUCCESS);
3027: }

3029: /*@C
3030:    MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray().

3032:    Not Collective

3034:    Input Parameters:
3035: +  A - the matrix
3036: -  array - the GPU array in column major order

3038:    Level: developer

3040: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
3041: @*/
3042: PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a)
3043: {
3044:   PetscFunctionBegin;
3046:   PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a));
3047:   PetscCall(PetscObjectStateIncrease((PetscObject)A));
3048:   A->offloadmask = PETSC_OFFLOAD_GPU;
3049:   PetscFunctionReturn(PETSC_SUCCESS);
3050: }
3051: #endif

3053: /*@C
3054:    MatCreateDense - Creates a matrix in `MATDENSE` format.

3056:    Collective

3058:    Input Parameters:
3059: +  comm - MPI communicator
3060: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3061: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3062: .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3063: .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3064: -  data - optional location of matrix data.  Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
3065:    to control all matrix memory allocation.

3067:    Output Parameter:
3068: .  A - the matrix

3070:    Level: intermediate

3072:    Notes:
3073:    The dense format is fully compatible with standard Fortran
3074:    storage by columns.

3076:    Although local portions of the matrix are stored in column-major
3077:    order, the matrix is partitioned across MPI ranks by row.

3079:    The data input variable is intended primarily for Fortran programmers
3080:    who wish to allocate their own matrix memory space.  Most users should
3081:    set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).

3083:    The user MUST specify either the local or global matrix dimensions
3084:    (possibly both).

3086: .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
3087: @*/
3088: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3089: {
3090:   PetscFunctionBegin;
3091:   PetscCall(MatCreate(comm, A));
3092:   PetscCall(MatSetSizes(*A, m, n, M, N));
3093:   PetscCall(MatSetType(*A, MATDENSE));
3094:   PetscCall(MatSeqDenseSetPreallocation(*A, data));
3095:   PetscCall(MatMPIDenseSetPreallocation(*A, data));
3096:   PetscFunctionReturn(PETSC_SUCCESS);
3097: }

3099: #if defined(PETSC_HAVE_CUDA)
3100: /*@C
3101:    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.

3103:    Collective

3105:    Input Parameters:
3106: +  comm - MPI communicator
3107: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3108: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3109: .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3110: .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3111: -  data - optional location of GPU matrix data.  Use `NULL` for PETSc
3112:    to control matrix memory allocation.

3114:    Output Parameter:
3115: .  A - the matrix

3117:    Level: intermediate

3119: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
3120: @*/
3121: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3122: {
3123:   PetscFunctionBegin;
3124:   PetscCall(MatCreate(comm, A));
3126:   PetscCall(MatSetSizes(*A, m, n, M, N));
3127:   PetscCall(MatSetType(*A, MATDENSECUDA));
3128:   PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
3129:   PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
3130:   PetscFunctionReturn(PETSC_SUCCESS);
3131: }
3132: #endif

3134: #if defined(PETSC_HAVE_HIP)
3135: /*@C
3136:    MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP.

3138:    Collective

3140:    Input Parameters:
3141: +  comm - MPI communicator
3142: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3143: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3144: .  M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3145: .  N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3146: -  data - optional location of GPU matrix data.  Use `NULL` for PETSc
3147:    to control matrix memory allocation.

3149:    Output Parameter:
3150: .  A - the matrix

3152:    Level: intermediate

3154: .seealso: [](chapter_matrices), `Mat`, `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()`
3155: @*/
3156: PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3157: {
3158:   PetscFunctionBegin;
3159:   PetscCall(MatCreate(comm, A));
3161:   PetscCall(MatSetSizes(*A, m, n, M, N));
3162:   PetscCall(MatSetType(*A, MATDENSEHIP));
3163:   PetscCall(MatSeqDenseHIPSetPreallocation(*A, data));
3164:   PetscCall(MatMPIDenseHIPSetPreallocation(*A, data));
3165:   PetscFunctionReturn(PETSC_SUCCESS);
3166: }
3167: #endif

3169: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
3170: {
3171:   Mat           mat;
3172:   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;

3174:   PetscFunctionBegin;
3175:   *newmat = NULL;
3176:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
3177:   PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3178:   PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
3179:   a = (Mat_MPIDense *)mat->data;

3181:   mat->factortype   = A->factortype;
3182:   mat->assembled    = PETSC_TRUE;
3183:   mat->preallocated = PETSC_TRUE;

3185:   mat->insertmode = NOT_SET_VALUES;
3186:   a->donotstash   = oldmat->donotstash;

3188:   PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
3189:   PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));

3191:   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));

3193:   *newmat = mat;
3194:   PetscFunctionReturn(PETSC_SUCCESS);
3195: }

3197: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
3198: {
3199:   PetscBool isbinary;
3200: #if defined(PETSC_HAVE_HDF5)
3201:   PetscBool ishdf5;
3202: #endif

3204:   PetscFunctionBegin;
3207:   /* force binary viewer to load .info file if it has not yet done so */
3208:   PetscCall(PetscViewerSetUp(viewer));
3209:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3210: #if defined(PETSC_HAVE_HDF5)
3211:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3212: #endif
3213:   if (isbinary) {
3214:     PetscCall(MatLoad_Dense_Binary(newMat, viewer));
3215: #if defined(PETSC_HAVE_HDF5)
3216:   } else if (ishdf5) {
3217:     PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
3218: #endif
3219:   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3220:   PetscFunctionReturn(PETSC_SUCCESS);
3221: }

3223: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
3224: {
3225:   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
3226:   Mat           a, b;

3228:   PetscFunctionBegin;
3229:   a = matA->A;
3230:   b = matB->A;
3231:   PetscCall(MatEqual(a, b, flag));
3232:   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
3233:   PetscFunctionReturn(PETSC_SUCCESS);
3234: }

3236: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
3237: {
3238:   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;

3240:   PetscFunctionBegin;
3241:   PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
3242:   PetscCall(MatDestroy(&atb->atb));
3243:   PetscCall(PetscFree(atb));
3244:   PetscFunctionReturn(PETSC_SUCCESS);
3245: }

3247: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
3248: {
3249:   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;

3251:   PetscFunctionBegin;
3252:   PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
3253:   PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
3254:   PetscCall(PetscFree(abt));
3255:   PetscFunctionReturn(PETSC_SUCCESS);
3256: }

3258: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3259: {
3260:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3261:   Mat_TransMatMultDense *atb;
3262:   MPI_Comm               comm;
3263:   PetscMPIInt            size, *recvcounts;
3264:   PetscScalar           *carray, *sendbuf;
3265:   const PetscScalar     *atbarray;
3266:   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
3267:   const PetscInt        *ranges;

3269:   PetscFunctionBegin;
3270:   MatCheckProduct(C, 3);
3271:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3272:   atb        = (Mat_TransMatMultDense *)C->product->data;
3273:   recvcounts = atb->recvcounts;
3274:   sendbuf    = atb->sendbuf;

3276:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3277:   PetscCallMPI(MPI_Comm_size(comm, &size));

3279:   /* compute atbarray = aseq^T * bseq */
3280:   PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));

3282:   PetscCall(MatGetOwnershipRanges(C, &ranges));

3284:   /* arrange atbarray into sendbuf */
3285:   PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
3286:   PetscCall(MatDenseGetLDA(atb->atb, &lda));
3287:   for (proc = 0, k = 0; proc < size; proc++) {
3288:     for (j = 0; j < cN; j++) {
3289:       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
3290:     }
3291:   }
3292:   PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));

3294:   /* sum all atbarray to local values of C */
3295:   PetscCall(MatDenseGetArrayWrite(c->A, &carray));
3296:   PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
3297:   PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
3298:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3299:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3300:   PetscFunctionReturn(PETSC_SUCCESS);
3301: }

3303: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3304: {
3305:   MPI_Comm               comm;
3306:   PetscMPIInt            size;
3307:   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
3308:   Mat_TransMatMultDense *atb;
3309:   PetscBool              cisdense = PETSC_FALSE;
3310:   PetscInt               i;
3311:   const PetscInt        *ranges;

3313:   PetscFunctionBegin;
3314:   MatCheckProduct(C, 4);
3315:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3316:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3317:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
3318:     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3319:   }

3321:   /* create matrix product C */
3322:   PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
3323: #if defined(PETSC_HAVE_CUDA)
3324:   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
3325: #endif
3326: #if defined(PETSC_HAVE_HIP)
3327:   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
3328: #endif
3329:   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
3330:   PetscCall(MatSetUp(C));

3332:   /* create data structure for reuse C */
3333:   PetscCallMPI(MPI_Comm_size(comm, &size));
3334:   PetscCall(PetscNew(&atb));
3335:   cM = C->rmap->N;
3336:   PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
3337:   PetscCall(MatGetOwnershipRanges(C, &ranges));
3338:   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;

3340:   C->product->data    = atb;
3341:   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
3342:   PetscFunctionReturn(PETSC_SUCCESS);
3343: }

3345: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3346: {
3347:   MPI_Comm               comm;
3348:   PetscMPIInt            i, size;
3349:   PetscInt               maxRows, bufsiz;
3350:   PetscMPIInt            tag;
3351:   PetscInt               alg;
3352:   Mat_MatTransMultDense *abt;
3353:   Mat_Product           *product = C->product;
3354:   PetscBool              flg;

3356:   PetscFunctionBegin;
3357:   MatCheckProduct(C, 4);
3358:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3359:   /* check local size of A and B */
3360:   PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);

3362:   PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
3363:   alg = flg ? 0 : 1;

3365:   /* setup matrix product C */
3366:   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
3367:   PetscCall(MatSetType(C, MATMPIDENSE));
3368:   PetscCall(MatSetUp(C));
3369:   PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));

3371:   /* create data structure for reuse C */
3372:   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3373:   PetscCallMPI(MPI_Comm_size(comm, &size));
3374:   PetscCall(PetscNew(&abt));
3375:   abt->tag = tag;
3376:   abt->alg = alg;
3377:   switch (alg) {
3378:   case 1: /* alg: "cyclic" */
3379:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
3380:     bufsiz = A->cmap->N * maxRows;
3381:     PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
3382:     break;
3383:   default: /* alg: "allgatherv" */
3384:     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
3385:     PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
3386:     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
3387:     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
3388:     break;
3389:   }

3391:   C->product->data                = abt;
3392:   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
3393:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
3394:   PetscFunctionReturn(PETSC_SUCCESS);
3395: }

3397: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
3398: {
3399:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3400:   Mat_MatTransMultDense *abt;
3401:   MPI_Comm               comm;
3402:   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
3403:   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
3404:   PetscInt               i, cK             = A->cmap->N, k, j, bn;
3405:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3406:   const PetscScalar     *av, *bv;
3407:   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
3408:   MPI_Request            reqs[2];
3409:   const PetscInt        *ranges;

3411:   PetscFunctionBegin;
3412:   MatCheckProduct(C, 3);
3413:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3414:   abt = (Mat_MatTransMultDense *)C->product->data;
3415:   PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3416:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3417:   PetscCallMPI(MPI_Comm_size(comm, &size));
3418:   PetscCall(MatDenseGetArrayRead(a->A, &av));
3419:   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3420:   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3421:   PetscCall(MatDenseGetLDA(a->A, &i));
3422:   PetscCall(PetscBLASIntCast(i, &alda));
3423:   PetscCall(MatDenseGetLDA(b->A, &i));
3424:   PetscCall(PetscBLASIntCast(i, &blda));
3425:   PetscCall(MatDenseGetLDA(c->A, &i));
3426:   PetscCall(PetscBLASIntCast(i, &clda));
3427:   PetscCall(MatGetOwnershipRanges(B, &ranges));
3428:   bn = B->rmap->n;
3429:   if (blda == bn) {
3430:     sendbuf = (PetscScalar *)bv;
3431:   } else {
3432:     sendbuf = abt->buf[0];
3433:     for (k = 0, i = 0; i < cK; i++) {
3434:       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
3435:     }
3436:   }
3437:   if (size > 1) {
3438:     sendto   = (rank + size - 1) % size;
3439:     recvfrom = (rank + size + 1) % size;
3440:   } else {
3441:     sendto = recvfrom = 0;
3442:   }
3443:   PetscCall(PetscBLASIntCast(cK, &ck));
3444:   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3445:   recvisfrom = rank;
3446:   for (i = 0; i < size; i++) {
3447:     /* we have finished receiving in sending, bufs can be read/modified */
3448:     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
3449:     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

3451:     if (nextrecvisfrom != rank) {
3452:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
3453:       sendsiz = cK * bn;
3454:       recvsiz = cK * nextbn;
3455:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
3456:       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
3457:       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
3458:     }

3460:     /* local aseq * sendbuf^T */
3461:     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
3462:     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));

3464:     if (nextrecvisfrom != rank) {
3465:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
3466:       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
3467:     }
3468:     bn         = nextbn;
3469:     recvisfrom = nextrecvisfrom;
3470:     sendbuf    = recvbuf;
3471:   }
3472:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3473:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3474:   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3475:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3476:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3477:   PetscFunctionReturn(PETSC_SUCCESS);
3478: }

3480: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
3481: {
3482:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3483:   Mat_MatTransMultDense *abt;
3484:   MPI_Comm               comm;
3485:   PetscMPIInt            size;
3486:   PetscScalar           *cv, *sendbuf, *recvbuf;
3487:   const PetscScalar     *av, *bv;
3488:   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
3489:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
3490:   PetscBLASInt           cm, cn, ck, alda, clda;

3492:   PetscFunctionBegin;
3493:   MatCheckProduct(C, 3);
3494:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3495:   abt = (Mat_MatTransMultDense *)C->product->data;
3496:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3497:   PetscCallMPI(MPI_Comm_size(comm, &size));
3498:   PetscCall(MatDenseGetArrayRead(a->A, &av));
3499:   PetscCall(MatDenseGetArrayRead(b->A, &bv));
3500:   PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3501:   PetscCall(MatDenseGetLDA(a->A, &i));
3502:   PetscCall(PetscBLASIntCast(i, &alda));
3503:   PetscCall(MatDenseGetLDA(b->A, &blda));
3504:   PetscCall(MatDenseGetLDA(c->A, &i));
3505:   PetscCall(PetscBLASIntCast(i, &clda));
3506:   /* copy transpose of B into buf[0] */
3507:   bn      = B->rmap->n;
3508:   sendbuf = abt->buf[0];
3509:   recvbuf = abt->buf[1];
3510:   for (k = 0, j = 0; j < bn; j++) {
3511:     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
3512:   }
3513:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3514:   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
3515:   PetscCall(PetscBLASIntCast(cK, &ck));
3516:   PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3517:   PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
3518:   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
3519:   PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3520:   PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3521:   PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3522:   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3523:   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3524:   PetscFunctionReturn(PETSC_SUCCESS);
3525: }

3527: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3528: {
3529:   Mat_MatTransMultDense *abt;

3531:   PetscFunctionBegin;
3532:   MatCheckProduct(C, 3);
3533:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3534:   abt = (Mat_MatTransMultDense *)C->product->data;
3535:   switch (abt->alg) {
3536:   case 1:
3537:     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
3538:     break;
3539:   default:
3540:     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
3541:     break;
3542:   }
3543:   PetscFunctionReturn(PETSC_SUCCESS);
3544: }

3546: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
3547: {
3548:   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;

3550:   PetscFunctionBegin;
3551:   PetscCall(MatDestroy(&ab->Ce));
3552:   PetscCall(MatDestroy(&ab->Ae));
3553:   PetscCall(MatDestroy(&ab->Be));
3554:   PetscCall(PetscFree(ab));
3555:   PetscFunctionReturn(PETSC_SUCCESS);
3556: }

3558: #if defined(PETSC_HAVE_ELEMENTAL)
3559: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3560: {
3561:   Mat_MatMultDense *ab;

3563:   PetscFunctionBegin;
3564:   MatCheckProduct(C, 3);
3565:   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
3566:   ab = (Mat_MatMultDense *)C->product->data;
3567:   PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
3568:   PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
3569:   PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
3570:   PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
3571:   PetscFunctionReturn(PETSC_SUCCESS);
3572: }

3574: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3575: {
3576:   Mat               Ae, Be, Ce;
3577:   Mat_MatMultDense *ab;

3579:   PetscFunctionBegin;
3580:   MatCheckProduct(C, 4);
3581:   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3582:   /* check local size of A and B */
3583:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
3584:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3585:   }

3587:   /* create elemental matrices Ae and Be */
3588:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
3589:   PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
3590:   PetscCall(MatSetType(Ae, MATELEMENTAL));
3591:   PetscCall(MatSetUp(Ae));
3592:   PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));

3594:   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
3595:   PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
3596:   PetscCall(MatSetType(Be, MATELEMENTAL));
3597:   PetscCall(MatSetUp(Be));
3598:   PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));

3600:   /* compute symbolic Ce = Ae*Be */
3601:   PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
3602:   PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));

3604:   /* setup C */
3605:   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3606:   PetscCall(MatSetType(C, MATDENSE));
3607:   PetscCall(MatSetUp(C));

3609:   /* create data structure for reuse Cdense */
3610:   PetscCall(PetscNew(&ab));
3611:   ab->Ae = Ae;
3612:   ab->Be = Be;
3613:   ab->Ce = Ce;

3615:   C->product->data       = ab;
3616:   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
3617:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3618:   PetscFunctionReturn(PETSC_SUCCESS);
3619: }
3620: #endif

3622: #if defined(PETSC_HAVE_ELEMENTAL)
3623: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3624: {
3625:   PetscFunctionBegin;
3626:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3627:   C->ops->productsymbolic = MatProductSymbolic_AB;
3628:   PetscFunctionReturn(PETSC_SUCCESS);
3629: }
3630: #endif

3632: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3633: {
3634:   Mat_Product *product = C->product;
3635:   Mat          A = product->A, B = product->B;

3637:   PetscFunctionBegin;
3638:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3639:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3640:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3641:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
3642:   PetscFunctionReturn(PETSC_SUCCESS);
3643: }

3645: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3646: {
3647:   Mat_Product *product     = C->product;
3648:   const char  *algTypes[2] = {"allgatherv", "cyclic"};
3649:   PetscInt     alg, nalg = 2;
3650:   PetscBool    flg = PETSC_FALSE;

3652:   PetscFunctionBegin;
3653:   /* Set default algorithm */
3654:   alg = 0; /* default is allgatherv */
3655:   PetscCall(PetscStrcmp(product->alg, "default", &flg));
3656:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

3658:   /* Get runtime option */
3659:   if (product->api_user) {
3660:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
3661:     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
3662:     PetscOptionsEnd();
3663:   } else {
3664:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
3665:     PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
3666:     PetscOptionsEnd();
3667:   }
3668:   if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));

3670:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3671:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
3672:   PetscFunctionReturn(PETSC_SUCCESS);
3673: }

3675: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3676: {
3677:   Mat_Product *product = C->product;

3679:   PetscFunctionBegin;
3680:   switch (product->type) {
3681: #if defined(PETSC_HAVE_ELEMENTAL)
3682:   case MATPRODUCT_AB:
3683:     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
3684:     break;
3685: #endif
3686:   case MATPRODUCT_AtB:
3687:     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3688:     break;
3689:   case MATPRODUCT_ABt:
3690:     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3691:     break;
3692:   default:
3693:     break;
3694:   }
3695:   PetscFunctionReturn(PETSC_SUCCESS);
3696: }