Actual source code: mpidense.c
2: /*
3: Basic functions for basic parallel dense matrices.
4: Portions of this code are under:
5: Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
6: */
8: #include <../src/mat/impls/dense/mpi/mpidense.h>
9: #include <../src/mat/impls/aij/mpi/mpiaij.h>
10: #include <petscblaslapack.h>
12: /*@
13: MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
14: matrix that represents the operator. For sequential matrices it returns itself.
16: Input Parameter:
17: . A - the sequential or MPI `MATDENSE` matrix
19: Output Parameter:
20: . B - the inner matrix
22: Level: intermediate
24: .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
25: @*/
26: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
27: {
28: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
29: PetscBool flg;
31: PetscFunctionBegin;
34: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
35: if (flg) *B = mat->A;
36: else {
37: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
38: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
39: *B = A;
40: }
41: PetscFunctionReturn(PETSC_SUCCESS);
42: }
44: PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
45: {
46: Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
47: Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
49: PetscFunctionBegin;
50: PetscCall(MatCopy(Amat->A, Bmat->A, s));
51: PetscFunctionReturn(PETSC_SUCCESS);
52: }
54: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
55: {
56: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
57: PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
58: PetscScalar *v;
60: PetscFunctionBegin;
61: PetscCall(MatDenseGetArray(mat->A, &v));
62: PetscCall(MatDenseGetLDA(mat->A, &lda));
63: rend2 = PetscMin(rend, A->cmap->N);
64: if (rend2 > rstart) {
65: for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
66: PetscCall(PetscLogFlops(rend2 - rstart));
67: }
68: PetscCall(MatDenseRestoreArray(mat->A, &v));
69: PetscFunctionReturn(PETSC_SUCCESS);
70: }
72: PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
73: {
74: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
75: PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
77: PetscFunctionBegin;
78: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
79: lrow = row - rstart;
80: PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
81: PetscFunctionReturn(PETSC_SUCCESS);
82: }
84: PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
85: {
86: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
87: PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
89: PetscFunctionBegin;
90: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
91: lrow = row - rstart;
92: PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
93: PetscFunctionReturn(PETSC_SUCCESS);
94: }
96: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
97: {
98: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
99: PetscInt m = A->rmap->n, rstart = A->rmap->rstart;
100: PetscScalar *array;
101: MPI_Comm comm;
102: PetscBool flg;
103: Mat B;
105: PetscFunctionBegin;
106: PetscCall(MatHasCongruentLayouts(A, &flg));
107: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
108: PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
109: if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
110: #if defined(PETSC_HAVE_CUDA)
111: PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
112: PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
113: #elif (PETSC_HAVE_HIP)
114: PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
115: PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
116: #endif
117: PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
118: PetscCall(MatCreate(comm, &B));
119: PetscCall(MatSetSizes(B, m, m, m, m));
120: PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
121: PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
122: PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
123: PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
124: PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
125: *a = B;
126: PetscCall(MatDestroy(&B));
127: } else *a = B;
128: PetscFunctionReturn(PETSC_SUCCESS);
129: }
131: PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
132: {
133: Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
134: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
135: PetscBool roworiented = A->roworiented;
137: PetscFunctionBegin;
138: for (i = 0; i < m; i++) {
139: if (idxm[i] < 0) continue;
140: PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
141: if (idxm[i] >= rstart && idxm[i] < rend) {
142: row = idxm[i] - rstart;
143: if (roworiented) {
144: PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
145: } else {
146: for (j = 0; j < n; j++) {
147: if (idxn[j] < 0) continue;
148: PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
149: PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
150: }
151: }
152: } else if (!A->donotstash) {
153: mat->assembled = PETSC_FALSE;
154: if (roworiented) {
155: PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
156: } else {
157: PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
158: }
159: }
160: }
161: PetscFunctionReturn(PETSC_SUCCESS);
162: }
164: PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
165: {
166: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
167: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
169: PetscFunctionBegin;
170: for (i = 0; i < m; i++) {
171: if (idxm[i] < 0) continue; /* negative row */
172: PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
173: if (idxm[i] >= rstart && idxm[i] < rend) {
174: row = idxm[i] - rstart;
175: for (j = 0; j < n; j++) {
176: if (idxn[j] < 0) continue; /* negative column */
177: PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
178: PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
179: }
180: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
181: }
182: PetscFunctionReturn(PETSC_SUCCESS);
183: }
185: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
186: {
187: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
189: PetscFunctionBegin;
190: PetscCall(MatDenseGetLDA(a->A, lda));
191: PetscFunctionReturn(PETSC_SUCCESS);
192: }
194: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
195: {
196: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
197: MatType mtype = MATSEQDENSE;
199: PetscFunctionBegin;
200: if (!a->A) {
201: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
202: PetscCall(PetscLayoutSetUp(A->rmap));
203: PetscCall(PetscLayoutSetUp(A->cmap));
204: PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
205: PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
206: #if defined(PETSC_HAVE_CUDA)
207: PetscBool iscuda;
208: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
209: if (iscuda) mtype = MATSEQDENSECUDA;
210: #elif (PETSC_HAVE_HIP)
211: PetscBool iship;
212: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
213: if (iship) mtype = MATSEQDENSEHIP;
214: #endif
215: PetscCall(MatSetType(a->A, mtype));
216: }
217: PetscCall(MatDenseSetLDA(a->A, lda));
218: PetscFunctionReturn(PETSC_SUCCESS);
219: }
221: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
222: {
223: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
225: PetscFunctionBegin;
226: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
227: PetscCall(MatDenseGetArray(a->A, array));
228: PetscFunctionReturn(PETSC_SUCCESS);
229: }
231: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
232: {
233: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
235: PetscFunctionBegin;
236: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
237: PetscCall(MatDenseGetArrayRead(a->A, array));
238: PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
242: {
243: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
245: PetscFunctionBegin;
246: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
247: PetscCall(MatDenseGetArrayWrite(a->A, array));
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
252: {
253: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
255: PetscFunctionBegin;
256: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
257: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
258: PetscCall(MatDensePlaceArray(a->A, array));
259: PetscFunctionReturn(PETSC_SUCCESS);
260: }
262: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
263: {
264: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
266: PetscFunctionBegin;
267: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
268: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
269: PetscCall(MatDenseResetArray(a->A));
270: PetscFunctionReturn(PETSC_SUCCESS);
271: }
273: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
274: {
275: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
277: PetscFunctionBegin;
278: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
279: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
280: PetscCall(MatDenseReplaceArray(a->A, array));
281: PetscFunctionReturn(PETSC_SUCCESS);
282: }
284: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
285: {
286: Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd;
287: PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
288: const PetscInt *irow, *icol;
289: const PetscScalar *v;
290: PetscScalar *bv;
291: Mat newmat;
292: IS iscol_local;
293: MPI_Comm comm_is, comm_mat;
295: PetscFunctionBegin;
296: PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
297: PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
298: PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
300: PetscCall(ISAllGather(iscol, &iscol_local));
301: PetscCall(ISGetIndices(isrow, &irow));
302: PetscCall(ISGetIndices(iscol_local, &icol));
303: PetscCall(ISGetLocalSize(isrow, &nrows));
304: PetscCall(ISGetLocalSize(iscol, &ncols));
305: PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
307: /* No parallel redistribution currently supported! Should really check each index set
308: to confirm that it is OK. ... Currently supports only submatrix same partitioning as
309: original matrix! */
311: PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
312: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
314: /* Check submatrix call */
315: if (scall == MAT_REUSE_MATRIX) {
316: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
317: /* Really need to test rows and column sizes! */
318: newmat = *B;
319: } else {
320: /* Create and fill new matrix */
321: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
322: PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
323: PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
324: PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
325: }
327: /* Now extract the data pointers and do the copy, column at a time */
328: newmatd = (Mat_MPIDense *)newmat->data;
329: PetscCall(MatDenseGetArray(newmatd->A, &bv));
330: PetscCall(MatDenseGetArrayRead(mat->A, &v));
331: PetscCall(MatDenseGetLDA(mat->A, &lda));
332: for (i = 0; i < Ncols; i++) {
333: const PetscScalar *av = v + lda * icol[i];
334: for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
335: }
336: PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
337: PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
339: /* Assemble the matrices so that the correct flags are set */
340: PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
341: PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
343: /* Free work space */
344: PetscCall(ISRestoreIndices(isrow, &irow));
345: PetscCall(ISRestoreIndices(iscol_local, &icol));
346: PetscCall(ISDestroy(&iscol_local));
347: *B = newmat;
348: PetscFunctionReturn(PETSC_SUCCESS);
349: }
351: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
352: {
353: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
355: PetscFunctionBegin;
356: PetscCall(MatDenseRestoreArray(a->A, array));
357: PetscFunctionReturn(PETSC_SUCCESS);
358: }
360: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
361: {
362: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
364: PetscFunctionBegin;
365: PetscCall(MatDenseRestoreArrayRead(a->A, array));
366: PetscFunctionReturn(PETSC_SUCCESS);
367: }
369: PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
370: {
371: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
373: PetscFunctionBegin;
374: PetscCall(MatDenseRestoreArrayWrite(a->A, array));
375: PetscFunctionReturn(PETSC_SUCCESS);
376: }
378: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
379: {
380: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
381: PetscInt nstash, reallocs;
383: PetscFunctionBegin;
384: if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
386: PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
387: PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
388: PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
389: PetscFunctionReturn(PETSC_SUCCESS);
390: }
392: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
393: {
394: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
395: PetscInt i, *row, *col, flg, j, rstart, ncols;
396: PetscMPIInt n;
397: PetscScalar *val;
399: PetscFunctionBegin;
400: if (!mdn->donotstash && !mat->nooffprocentries) {
401: /* wait on receives */
402: while (1) {
403: PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
404: if (!flg) break;
406: for (i = 0; i < n;) {
407: /* Now identify the consecutive vals belonging to the same row */
408: for (j = i, rstart = row[j]; j < n; j++) {
409: if (row[j] != rstart) break;
410: }
411: if (j < n) ncols = j - i;
412: else ncols = n - i;
413: /* Now assemble all these values with a single function call */
414: PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
415: i = j;
416: }
417: }
418: PetscCall(MatStashScatterEnd_Private(&mat->stash));
419: }
421: PetscCall(MatAssemblyBegin(mdn->A, mode));
422: PetscCall(MatAssemblyEnd(mdn->A, mode));
423: PetscFunctionReturn(PETSC_SUCCESS);
424: }
426: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
427: {
428: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
430: PetscFunctionBegin;
431: PetscCall(MatZeroEntries(l->A));
432: PetscFunctionReturn(PETSC_SUCCESS);
433: }
435: PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
436: {
437: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
438: PetscInt i, len, *lrows;
440: PetscFunctionBegin;
441: /* get locally owned rows */
442: PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
443: /* fix right hand side if needed */
444: if (x && b) {
445: const PetscScalar *xx;
446: PetscScalar *bb;
448: PetscCall(VecGetArrayRead(x, &xx));
449: PetscCall(VecGetArrayWrite(b, &bb));
450: for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
451: PetscCall(VecRestoreArrayRead(x, &xx));
452: PetscCall(VecRestoreArrayWrite(b, &bb));
453: }
454: PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
455: if (diag != 0.0) {
456: Vec d;
458: PetscCall(MatCreateVecs(A, NULL, &d));
459: PetscCall(VecSet(d, diag));
460: PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
461: PetscCall(VecDestroy(&d));
462: }
463: PetscCall(PetscFree(lrows));
464: PetscFunctionReturn(PETSC_SUCCESS);
465: }
467: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
468: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
469: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
470: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
472: PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
473: {
474: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
475: const PetscScalar *ax;
476: PetscScalar *ay;
477: PetscMemType axmtype, aymtype;
479: PetscFunctionBegin;
480: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
481: PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
482: PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
483: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
484: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
485: PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
486: PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
487: PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
488: PetscFunctionReturn(PETSC_SUCCESS);
489: }
491: PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
492: {
493: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
494: const PetscScalar *ax;
495: PetscScalar *ay;
496: PetscMemType axmtype, aymtype;
498: PetscFunctionBegin;
499: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
500: PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
501: PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
502: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
503: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
504: PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
505: PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
506: PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
507: PetscFunctionReturn(PETSC_SUCCESS);
508: }
510: PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
511: {
512: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
513: const PetscScalar *ax;
514: PetscScalar *ay;
515: PetscMemType axmtype, aymtype;
517: PetscFunctionBegin;
518: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
519: PetscCall(VecSet(yy, 0.0));
520: PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
521: PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
522: PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
523: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
524: PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
525: PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
526: PetscCall(VecRestoreArrayAndMemType(yy, &ay));
527: PetscFunctionReturn(PETSC_SUCCESS);
528: }
530: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
531: {
532: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
533: const PetscScalar *ax;
534: PetscScalar *ay;
535: PetscMemType axmtype, aymtype;
537: PetscFunctionBegin;
538: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
539: PetscCall(VecCopy(yy, zz));
540: PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
541: PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
542: PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
543: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
544: PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
545: PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
546: PetscCall(VecRestoreArrayAndMemType(zz, &ay));
547: PetscFunctionReturn(PETSC_SUCCESS);
548: }
550: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
551: {
552: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
553: PetscInt lda, len, i, n, m = A->rmap->n, radd;
554: PetscScalar *x, zero = 0.0;
555: const PetscScalar *av;
557: PetscFunctionBegin;
558: PetscCall(VecSet(v, zero));
559: PetscCall(VecGetArray(v, &x));
560: PetscCall(VecGetSize(v, &n));
561: PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
562: len = PetscMin(a->A->rmap->n, a->A->cmap->n);
563: radd = A->rmap->rstart * m;
564: PetscCall(MatDenseGetArrayRead(a->A, &av));
565: PetscCall(MatDenseGetLDA(a->A, &lda));
566: for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
567: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
568: PetscCall(VecRestoreArray(v, &x));
569: PetscFunctionReturn(PETSC_SUCCESS);
570: }
572: PetscErrorCode MatDestroy_MPIDense(Mat mat)
573: {
574: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
576: PetscFunctionBegin;
577: #if defined(PETSC_USE_LOG)
578: PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
579: #endif
580: PetscCall(MatStashDestroy_Private(&mat->stash));
581: PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
582: PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
583: PetscCall(MatDestroy(&mdn->A));
584: PetscCall(VecDestroy(&mdn->lvec));
585: PetscCall(PetscSFDestroy(&mdn->Mvctx));
586: PetscCall(VecDestroy(&mdn->cvec));
587: PetscCall(MatDestroy(&mdn->cmat));
589: PetscCall(PetscFree(mat->data));
590: PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
592: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
593: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
594: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
595: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
596: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
597: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
598: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
599: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
600: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
601: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
602: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
603: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
604: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
605: #if defined(PETSC_HAVE_ELEMENTAL)
606: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
607: #endif
608: #if defined(PETSC_HAVE_SCALAPACK)
609: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
610: #endif
611: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
612: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
613: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
614: #if defined(PETSC_HAVE_CUDA)
615: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
616: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
617: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
618: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
619: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
620: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
621: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
622: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
623: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
624: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
625: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
626: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
627: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
628: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
629: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
630: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
631: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
632: #endif
633: #if defined(PETSC_HAVE_HIP)
634: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
635: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
636: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
637: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
638: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
639: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
640: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
641: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
642: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
643: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
644: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
645: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
646: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
647: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
648: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
649: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
650: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
651: #endif
652: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
663: PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664: PetscFunctionReturn(PETSC_SUCCESS);
665: }
667: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
669: #include <petscdraw.h>
670: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671: {
672: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
673: PetscMPIInt rank;
674: PetscViewerType vtype;
675: PetscBool iascii, isdraw;
676: PetscViewer sviewer;
677: PetscViewerFormat format;
679: PetscFunctionBegin;
680: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683: if (iascii) {
684: PetscCall(PetscViewerGetType(viewer, &vtype));
685: PetscCall(PetscViewerGetFormat(viewer, &format));
686: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687: MatInfo info;
688: PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691: (PetscInt)info.memory));
692: PetscCall(PetscViewerFlush(viewer));
693: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694: if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695: PetscFunctionReturn(PETSC_SUCCESS);
696: } else if (format == PETSC_VIEWER_ASCII_INFO) {
697: PetscFunctionReturn(PETSC_SUCCESS);
698: }
699: } else if (isdraw) {
700: PetscDraw draw;
701: PetscBool isnull;
703: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704: PetscCall(PetscDrawIsNull(draw, &isnull));
705: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706: }
708: {
709: /* assemble the entire matrix onto first processor. */
710: Mat A;
711: PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712: PetscInt *cols;
713: PetscScalar *vals;
715: PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716: if (rank == 0) {
717: PetscCall(MatSetSizes(A, M, N, M, N));
718: } else {
719: PetscCall(MatSetSizes(A, 0, 0, M, N));
720: }
721: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722: PetscCall(MatSetType(A, MATMPIDENSE));
723: PetscCall(MatMPIDenseSetPreallocation(A, NULL));
725: /* Copy the matrix ... This isn't the most efficient means,
726: but it's quick for now */
727: A->insertmode = INSERT_VALUES;
729: row = mat->rmap->rstart;
730: m = mdn->A->rmap->n;
731: for (i = 0; i < m; i++) {
732: PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733: PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734: PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735: row++;
736: }
738: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741: if (rank == 0) {
742: PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743: PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744: }
745: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746: PetscCall(PetscViewerFlush(viewer));
747: PetscCall(MatDestroy(&A));
748: }
749: PetscFunctionReturn(PETSC_SUCCESS);
750: }
752: PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753: {
754: PetscBool iascii, isbinary, isdraw, issocket;
756: PetscFunctionBegin;
757: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
762: if (iascii || issocket || isdraw) {
763: PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764: } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765: PetscFunctionReturn(PETSC_SUCCESS);
766: }
768: PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769: {
770: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
771: Mat mdn = mat->A;
772: PetscLogDouble isend[5], irecv[5];
774: PetscFunctionBegin;
775: info->block_size = 1.0;
777: PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
779: isend[0] = info->nz_used;
780: isend[1] = info->nz_allocated;
781: isend[2] = info->nz_unneeded;
782: isend[3] = info->memory;
783: isend[4] = info->mallocs;
784: if (flag == MAT_LOCAL) {
785: info->nz_used = isend[0];
786: info->nz_allocated = isend[1];
787: info->nz_unneeded = isend[2];
788: info->memory = isend[3];
789: info->mallocs = isend[4];
790: } else if (flag == MAT_GLOBAL_MAX) {
791: PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
793: info->nz_used = irecv[0];
794: info->nz_allocated = irecv[1];
795: info->nz_unneeded = irecv[2];
796: info->memory = irecv[3];
797: info->mallocs = irecv[4];
798: } else if (flag == MAT_GLOBAL_SUM) {
799: PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
801: info->nz_used = irecv[0];
802: info->nz_allocated = irecv[1];
803: info->nz_unneeded = irecv[2];
804: info->memory = irecv[3];
805: info->mallocs = irecv[4];
806: }
807: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
808: info->fill_ratio_needed = 0;
809: info->factor_mallocs = 0;
810: PetscFunctionReturn(PETSC_SUCCESS);
811: }
813: PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814: {
815: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
817: PetscFunctionBegin;
818: switch (op) {
819: case MAT_NEW_NONZERO_LOCATIONS:
820: case MAT_NEW_NONZERO_LOCATION_ERR:
821: case MAT_NEW_NONZERO_ALLOCATION_ERR:
822: MatCheckPreallocated(A, 1);
823: PetscCall(MatSetOption(a->A, op, flg));
824: break;
825: case MAT_ROW_ORIENTED:
826: MatCheckPreallocated(A, 1);
827: a->roworiented = flg;
828: PetscCall(MatSetOption(a->A, op, flg));
829: break;
830: case MAT_FORCE_DIAGONAL_ENTRIES:
831: case MAT_KEEP_NONZERO_PATTERN:
832: case MAT_USE_HASH_TABLE:
833: case MAT_SORTED_FULL:
834: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835: break;
836: case MAT_IGNORE_OFF_PROC_ENTRIES:
837: a->donotstash = flg;
838: break;
839: case MAT_SYMMETRIC:
840: case MAT_STRUCTURALLY_SYMMETRIC:
841: case MAT_HERMITIAN:
842: case MAT_SYMMETRY_ETERNAL:
843: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844: case MAT_SPD:
845: case MAT_IGNORE_LOWER_TRIANGULAR:
846: case MAT_IGNORE_ZERO_ENTRIES:
847: case MAT_SPD_ETERNAL:
848: /* if the diagonal matrix is square it inherits some of the properties above */
849: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850: break;
851: default:
852: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853: }
854: PetscFunctionReturn(PETSC_SUCCESS);
855: }
857: PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858: {
859: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
860: const PetscScalar *l;
861: PetscScalar x, *v, *vv, *r;
862: PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
864: PetscFunctionBegin;
865: PetscCall(MatDenseGetArray(mdn->A, &vv));
866: PetscCall(MatDenseGetLDA(mdn->A, &lda));
867: PetscCall(MatGetLocalSize(A, &s2, &s3));
868: if (ll) {
869: PetscCall(VecGetLocalSize(ll, &s2a));
870: PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871: PetscCall(VecGetArrayRead(ll, &l));
872: for (i = 0; i < m; i++) {
873: x = l[i];
874: v = vv + i;
875: for (j = 0; j < n; j++) {
876: (*v) *= x;
877: v += lda;
878: }
879: }
880: PetscCall(VecRestoreArrayRead(ll, &l));
881: PetscCall(PetscLogFlops(1.0 * n * m));
882: }
883: if (rr) {
884: const PetscScalar *ar;
886: PetscCall(VecGetLocalSize(rr, &s3a));
887: PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888: PetscCall(VecGetArrayRead(rr, &ar));
889: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890: PetscCall(VecGetArray(mdn->lvec, &r));
891: PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893: PetscCall(VecRestoreArrayRead(rr, &ar));
894: for (i = 0; i < n; i++) {
895: x = r[i];
896: v = vv + i * lda;
897: for (j = 0; j < m; j++) (*v++) *= x;
898: }
899: PetscCall(VecRestoreArray(mdn->lvec, &r));
900: PetscCall(PetscLogFlops(1.0 * n * m));
901: }
902: PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903: PetscFunctionReturn(PETSC_SUCCESS);
904: }
906: PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907: {
908: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
909: PetscInt i, j;
910: PetscMPIInt size;
911: PetscReal sum = 0.0;
912: const PetscScalar *av, *v;
914: PetscFunctionBegin;
915: PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916: v = av;
917: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918: if (size == 1) {
919: PetscCall(MatNorm(mdn->A, type, nrm));
920: } else {
921: if (type == NORM_FROBENIUS) {
922: for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923: sum += PetscRealPart(PetscConj(*v) * (*v));
924: v++;
925: }
926: PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927: *nrm = PetscSqrtReal(*nrm);
928: PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929: } else if (type == NORM_1) {
930: PetscReal *tmp, *tmp2;
931: PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932: *nrm = 0.0;
933: v = av;
934: for (j = 0; j < mdn->A->cmap->n; j++) {
935: for (i = 0; i < mdn->A->rmap->n; i++) {
936: tmp[j] += PetscAbsScalar(*v);
937: v++;
938: }
939: }
940: PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941: for (j = 0; j < A->cmap->N; j++) {
942: if (tmp2[j] > *nrm) *nrm = tmp2[j];
943: }
944: PetscCall(PetscFree2(tmp, tmp2));
945: PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946: } else if (type == NORM_INFINITY) { /* max row norm */
947: PetscReal ntemp;
948: PetscCall(MatNorm(mdn->A, type, &ntemp));
949: PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951: }
952: PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953: PetscFunctionReturn(PETSC_SUCCESS);
954: }
956: PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957: {
958: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959: Mat B;
960: PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961: PetscInt j, i, lda;
962: PetscScalar *v;
964: PetscFunctionBegin;
965: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968: PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969: PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970: PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971: } else B = *matout;
973: m = a->A->rmap->n;
974: n = a->A->cmap->n;
975: PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976: PetscCall(MatDenseGetLDA(a->A, &lda));
977: PetscCall(PetscMalloc1(m, &rwork));
978: for (i = 0; i < m; i++) rwork[i] = rstart + i;
979: for (j = 0; j < n; j++) {
980: PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981: v += lda;
982: }
983: PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984: PetscCall(PetscFree(rwork));
985: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988: *matout = B;
989: } else {
990: PetscCall(MatHeaderMerge(A, &B));
991: }
992: PetscFunctionReturn(PETSC_SUCCESS);
993: }
995: static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
998: PetscErrorCode MatSetUp_MPIDense(Mat A)
999: {
1000: PetscFunctionBegin;
1001: PetscCall(PetscLayoutSetUp(A->rmap));
1002: PetscCall(PetscLayoutSetUp(A->cmap));
1003: if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004: PetscFunctionReturn(PETSC_SUCCESS);
1005: }
1007: PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008: {
1009: Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1011: PetscFunctionBegin;
1012: PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013: PetscFunctionReturn(PETSC_SUCCESS);
1014: }
1016: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017: {
1018: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1020: PetscFunctionBegin;
1021: PetscCall(MatConjugate(a->A));
1022: PetscFunctionReturn(PETSC_SUCCESS);
1023: }
1025: PetscErrorCode MatRealPart_MPIDense(Mat A)
1026: {
1027: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1029: PetscFunctionBegin;
1030: PetscCall(MatRealPart(a->A));
1031: PetscFunctionReturn(PETSC_SUCCESS);
1032: }
1034: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035: {
1036: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1038: PetscFunctionBegin;
1039: PetscCall(MatImaginaryPart(a->A));
1040: PetscFunctionReturn(PETSC_SUCCESS);
1041: }
1043: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044: {
1045: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1047: PetscFunctionBegin;
1048: PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049: PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050: PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051: PetscFunctionReturn(PETSC_SUCCESS);
1052: }
1054: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1056: PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057: {
1058: PetscInt i, m, n;
1059: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060: PetscReal *work;
1062: PetscFunctionBegin;
1063: PetscCall(MatGetSize(A, &m, &n));
1064: PetscCall(PetscMalloc1(n, &work));
1065: if (type == REDUCTION_MEAN_REALPART) {
1066: PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067: } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068: PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069: } else {
1070: PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071: }
1072: if (type == NORM_2) {
1073: for (i = 0; i < n; i++) work[i] *= work[i];
1074: }
1075: if (type == NORM_INFINITY) {
1076: PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077: } else {
1078: PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079: }
1080: PetscCall(PetscFree(work));
1081: if (type == NORM_2) {
1082: for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084: for (i = 0; i < n; i++) reductions[i] /= m;
1085: }
1086: PetscFunctionReturn(PETSC_SUCCESS);
1087: }
1089: #if defined(PETSC_HAVE_CUDA)
1090: PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1091: {
1092: PetscScalar *da;
1093: PetscInt lda;
1095: PetscFunctionBegin;
1096: PetscCall(MatDenseCUDAGetArray(A, &da));
1097: PetscCall(MatDenseGetLDA(A, &lda));
1098: PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1099: PetscCall(MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1100: PetscCall(MatDenseCUDARestoreArray(A, &da));
1101: PetscFunctionReturn(PETSC_SUCCESS);
1102: }
1104: static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1105: {
1106: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1107: PetscInt lda;
1109: PetscFunctionBegin;
1110: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1111: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1112: if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1113: a->vecinuse = col + 1;
1114: PetscCall(MatDenseGetLDA(a->A, &lda));
1115: PetscCall(MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1116: PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1117: *v = a->cvec;
1118: PetscFunctionReturn(PETSC_SUCCESS);
1119: }
1121: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1122: {
1123: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1125: PetscFunctionBegin;
1126: PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1127: PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1128: a->vecinuse = 0;
1129: PetscCall(MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1130: PetscCall(VecCUDAResetArray(a->cvec));
1131: if (v) *v = NULL;
1132: PetscFunctionReturn(PETSC_SUCCESS);
1133: }
1135: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1136: {
1137: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1138: PetscInt lda;
1140: PetscFunctionBegin;
1141: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1142: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1143: if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1144: a->vecinuse = col + 1;
1145: PetscCall(MatDenseGetLDA(a->A, &lda));
1146: PetscCall(MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse));
1147: PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1148: PetscCall(VecLockReadPush(a->cvec));
1149: *v = a->cvec;
1150: PetscFunctionReturn(PETSC_SUCCESS);
1151: }
1153: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1154: {
1155: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1157: PetscFunctionBegin;
1158: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1159: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1160: a->vecinuse = 0;
1161: PetscCall(MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse));
1162: PetscCall(VecLockReadPop(a->cvec));
1163: PetscCall(VecCUDAResetArray(a->cvec));
1164: if (v) *v = NULL;
1165: PetscFunctionReturn(PETSC_SUCCESS);
1166: }
1168: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1169: {
1170: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1171: PetscInt lda;
1173: PetscFunctionBegin;
1174: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1175: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1176: if (!a->cvec) { PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1177: a->vecinuse = col + 1;
1178: PetscCall(MatDenseGetLDA(a->A, &lda));
1179: PetscCall(MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1180: PetscCall(VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1181: *v = a->cvec;
1182: PetscFunctionReturn(PETSC_SUCCESS);
1183: }
1185: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1186: {
1187: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1189: PetscFunctionBegin;
1190: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1191: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1192: a->vecinuse = 0;
1193: PetscCall(MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1194: PetscCall(VecCUDAResetArray(a->cvec));
1195: if (v) *v = NULL;
1196: PetscFunctionReturn(PETSC_SUCCESS);
1197: }
1199: static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1200: {
1201: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1203: PetscFunctionBegin;
1204: PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1205: PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1206: PetscCall(MatDenseCUDAPlaceArray(l->A, a));
1207: PetscFunctionReturn(PETSC_SUCCESS);
1208: }
1210: static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1211: {
1212: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1214: PetscFunctionBegin;
1215: PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1216: PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1217: PetscCall(MatDenseCUDAResetArray(l->A));
1218: PetscFunctionReturn(PETSC_SUCCESS);
1219: }
1221: static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1222: {
1223: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1225: PetscFunctionBegin;
1226: PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1227: PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1228: PetscCall(MatDenseCUDAReplaceArray(l->A, a));
1229: PetscFunctionReturn(PETSC_SUCCESS);
1230: }
1232: static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1233: {
1234: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1236: PetscFunctionBegin;
1237: PetscCall(MatDenseCUDAGetArrayWrite(l->A, a));
1238: PetscFunctionReturn(PETSC_SUCCESS);
1239: }
1241: static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1242: {
1243: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1245: PetscFunctionBegin;
1246: PetscCall(MatDenseCUDARestoreArrayWrite(l->A, a));
1247: PetscFunctionReturn(PETSC_SUCCESS);
1248: }
1250: static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1251: {
1252: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1254: PetscFunctionBegin;
1255: PetscCall(MatDenseCUDAGetArrayRead(l->A, a));
1256: PetscFunctionReturn(PETSC_SUCCESS);
1257: }
1259: static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1260: {
1261: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1263: PetscFunctionBegin;
1264: PetscCall(MatDenseCUDARestoreArrayRead(l->A, a));
1265: PetscFunctionReturn(PETSC_SUCCESS);
1266: }
1268: static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1269: {
1270: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1272: PetscFunctionBegin;
1273: PetscCall(MatDenseCUDAGetArray(l->A, a));
1274: PetscFunctionReturn(PETSC_SUCCESS);
1275: }
1277: static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1278: {
1279: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1281: PetscFunctionBegin;
1282: PetscCall(MatDenseCUDARestoreArray(l->A, a));
1283: PetscFunctionReturn(PETSC_SUCCESS);
1284: }
1286: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1287: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1288: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1289: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1290: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1291: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1292: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1294: static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1295: {
1296: Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1298: PetscFunctionBegin;
1299: PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1300: PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1301: if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1302: mat->boundtocpu = bind;
1303: if (!bind) {
1304: PetscBool iscuda;
1306: PetscCall(PetscFree(mat->defaultrandtype));
1307: PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1308: PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda));
1309: if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1310: PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda));
1311: if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1312: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA));
1313: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA));
1314: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA));
1315: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1316: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA));
1317: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1318: mat->ops->shift = MatShift_MPIDenseCUDA;
1319: } else {
1320: PetscCall(PetscFree(mat->defaultrandtype));
1321: PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1322: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1323: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1324: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1325: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1326: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1327: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1328: mat->ops->shift = MatShift_MPIDense;
1329: }
1330: if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1331: PetscFunctionReturn(PETSC_SUCCESS);
1332: }
1334: PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1335: {
1336: Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1337: PetscBool iscuda;
1339: PetscFunctionBegin;
1341: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
1342: if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS);
1343: PetscCall(PetscLayoutSetUp(A->rmap));
1344: PetscCall(PetscLayoutSetUp(A->cmap));
1345: if (!d->A) {
1346: PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1347: PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1348: }
1349: PetscCall(MatSetType(d->A, MATSEQDENSECUDA));
1350: PetscCall(MatSeqDenseCUDASetPreallocation(d->A, d_data));
1351: A->preallocated = PETSC_TRUE;
1352: A->assembled = PETSC_TRUE;
1353: PetscFunctionReturn(PETSC_SUCCESS);
1354: }
1355: #endif
1357: #if defined(PETSC_HAVE_HIP)
1358: PetscErrorCode MatShift_MPIDenseHIP(Mat A, PetscScalar alpha)
1359: {
1360: PetscScalar *da;
1361: PetscInt lda;
1363: PetscFunctionBegin;
1364: PetscCall(MatDenseHIPGetArray(A, &da));
1365: PetscCall(MatDenseGetLDA(A, &lda));
1366: PetscCall(PetscInfo(A, "Performing Shift on backend\n"));
1367: PetscCall(MatShift_DenseHIP_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N));
1368: PetscCall(MatDenseHIPRestoreArray(A, &da));
1369: PetscFunctionReturn(PETSC_SUCCESS);
1370: }
1372: static PetscErrorCode MatDenseGetColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1373: {
1374: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1375: PetscInt lda;
1377: PetscFunctionBegin;
1378: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1379: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1380: if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1381: a->vecinuse = col + 1;
1382: PetscCall(MatDenseGetLDA(a->A, &lda));
1383: PetscCall(MatDenseHIPGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1384: PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1385: *v = a->cvec;
1386: PetscFunctionReturn(PETSC_SUCCESS);
1387: }
1389: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1390: {
1391: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1393: PetscFunctionBegin;
1394: PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1395: PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1396: a->vecinuse = 0;
1397: PetscCall(MatDenseHIPRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1398: PetscCall(VecHIPResetArray(a->cvec));
1399: if (v) *v = NULL;
1400: PetscFunctionReturn(PETSC_SUCCESS);
1401: }
1403: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1404: {
1405: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1406: PetscInt lda;
1408: PetscFunctionBegin;
1409: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1410: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1411: if (!a->cvec) { PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec)); }
1412: a->vecinuse = col + 1;
1413: PetscCall(MatDenseGetLDA(a->A, &lda));
1414: PetscCall(MatDenseHIPGetArrayRead(a->A, &a->ptrinuse));
1415: PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1416: PetscCall(VecLockReadPush(a->cvec));
1417: *v = a->cvec;
1418: PetscFunctionReturn(PETSC_SUCCESS);
1419: }
1421: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1422: {
1423: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1425: PetscFunctionBegin;
1426: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1427: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1428: a->vecinuse = 0;
1429: PetscCall(MatDenseHIPRestoreArrayRead(a->A, &a->ptrinuse));
1430: PetscCall(VecLockReadPop(a->cvec));
1431: PetscCall(VecHIPResetArray(a->cvec));
1432: if (v) *v = NULL;
1433: PetscFunctionReturn(PETSC_SUCCESS);
1434: }
1436: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1437: {
1438: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1439: PetscInt lda;
1441: PetscFunctionBegin;
1442: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1443: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1444: if (!a->cvec) PetscCall(VecCreateMPIHIPWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
1445: a->vecinuse = col + 1;
1446: PetscCall(MatDenseGetLDA(a->A, &lda));
1447: PetscCall(MatDenseHIPGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1448: PetscCall(VecHIPPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1449: *v = a->cvec;
1450: PetscFunctionReturn(PETSC_SUCCESS);
1451: }
1453: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseHIP(Mat A, PetscInt col, Vec *v)
1454: {
1455: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1457: PetscFunctionBegin;
1458: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1459: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1460: a->vecinuse = 0;
1461: PetscCall(MatDenseHIPRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1462: PetscCall(VecHIPResetArray(a->cvec));
1463: if (v) *v = NULL;
1464: PetscFunctionReturn(PETSC_SUCCESS);
1465: }
1467: static PetscErrorCode MatDenseHIPPlaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1468: {
1469: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1471: PetscFunctionBegin;
1472: PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1473: PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1474: PetscCall(MatDenseHIPPlaceArray(l->A, a));
1475: PetscFunctionReturn(PETSC_SUCCESS);
1476: }
1478: static PetscErrorCode MatDenseHIPResetArray_MPIDenseHIP(Mat A)
1479: {
1480: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1482: PetscFunctionBegin;
1483: PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1484: PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1485: PetscCall(MatDenseHIPResetArray(l->A));
1486: PetscFunctionReturn(PETSC_SUCCESS);
1487: }
1489: static PetscErrorCode MatDenseHIPReplaceArray_MPIDenseHIP(Mat A, const PetscScalar *a)
1490: {
1491: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1493: PetscFunctionBegin;
1494: PetscCheck(!l->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1495: PetscCheck(!l->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1496: PetscCall(MatDenseHIPReplaceArray(l->A, a));
1497: PetscFunctionReturn(PETSC_SUCCESS);
1498: }
1500: static PetscErrorCode MatDenseHIPGetArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1501: {
1502: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1504: PetscFunctionBegin;
1505: PetscCall(MatDenseHIPGetArrayWrite(l->A, a));
1506: PetscFunctionReturn(PETSC_SUCCESS);
1507: }
1509: static PetscErrorCode MatDenseHIPRestoreArrayWrite_MPIDenseHIP(Mat A, PetscScalar **a)
1510: {
1511: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1513: PetscFunctionBegin;
1514: PetscCall(MatDenseHIPRestoreArrayWrite(l->A, a));
1515: PetscFunctionReturn(PETSC_SUCCESS);
1516: }
1518: static PetscErrorCode MatDenseHIPGetArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1519: {
1520: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1522: PetscFunctionBegin;
1523: PetscCall(MatDenseHIPGetArrayRead(l->A, a));
1524: PetscFunctionReturn(PETSC_SUCCESS);
1525: }
1527: static PetscErrorCode MatDenseHIPRestoreArrayRead_MPIDenseHIP(Mat A, const PetscScalar **a)
1528: {
1529: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1531: PetscFunctionBegin;
1532: PetscCall(MatDenseHIPRestoreArrayRead(l->A, a));
1533: PetscFunctionReturn(PETSC_SUCCESS);
1534: }
1536: static PetscErrorCode MatDenseHIPGetArray_MPIDenseHIP(Mat A, PetscScalar **a)
1537: {
1538: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1540: PetscFunctionBegin;
1541: PetscCall(MatDenseHIPGetArray(l->A, a));
1542: PetscFunctionReturn(PETSC_SUCCESS);
1543: }
1545: static PetscErrorCode MatDenseHIPRestoreArray_MPIDenseHIP(Mat A, PetscScalar **a)
1546: {
1547: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1549: PetscFunctionBegin;
1550: PetscCall(MatDenseHIPRestoreArray(l->A, a));
1551: PetscFunctionReturn(PETSC_SUCCESS);
1552: }
1554: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1555: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1556: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1557: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1558: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1559: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1560: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1562: static PetscErrorCode MatBindToCPU_MPIDenseHIP(Mat mat, PetscBool bind)
1563: {
1564: Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1566: PetscFunctionBegin;
1567: PetscCheck(!d->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1568: PetscCheck(!d->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1569: if (d->A) PetscCall(MatBindToCPU(d->A, bind));
1570: mat->boundtocpu = bind;
1571: if (!bind) {
1572: PetscBool iscuda;
1574: PetscCall(PetscFree(mat->defaultrandtype));
1575: PetscCall(PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype));
1576: PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec, VECMPIHIP, &iscuda));
1577: if (!iscuda) PetscCall(VecDestroy(&d->cvec));
1578: PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSEHIP, &iscuda));
1579: if (!iscuda) PetscCall(MatDestroy(&d->cmat));
1580: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseHIP));
1581: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseHIP));
1582: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseHIP));
1583: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseHIP));
1584: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseHIP));
1585: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseHIP));
1586: mat->ops->shift = MatShift_MPIDenseHIP;
1587: } else {
1588: PetscCall(PetscFree(mat->defaultrandtype));
1589: PetscCall(PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype));
1590: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1591: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1592: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1593: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1594: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1595: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1596: mat->ops->shift = MatShift_MPIDense;
1597: }
1598: if (d->cmat) PetscCall(MatBindToCPU(d->cmat, bind));
1599: PetscFunctionReturn(PETSC_SUCCESS);
1600: }
1602: PetscErrorCode MatMPIDenseHIPSetPreallocation(Mat A, PetscScalar *d_data)
1603: {
1604: Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1605: PetscBool iscuda;
1607: PetscFunctionBegin;
1609: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iscuda));
1610: if (!iscuda) PetscFunctionReturn(PETSC_SUCCESS);
1611: PetscCall(PetscLayoutSetUp(A->rmap));
1612: PetscCall(PetscLayoutSetUp(A->cmap));
1613: if (!d->A) {
1614: PetscCall(MatCreate(PETSC_COMM_SELF, &d->A));
1615: PetscCall(MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
1616: }
1617: PetscCall(MatSetType(d->A, MATSEQDENSEHIP));
1618: PetscCall(MatSeqDenseHIPSetPreallocation(d->A, d_data));
1619: A->preallocated = PETSC_TRUE;
1620: A->assembled = PETSC_TRUE;
1621: PetscFunctionReturn(PETSC_SUCCESS);
1622: }
1623: #endif
1625: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1626: {
1627: Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1629: PetscFunctionBegin;
1630: PetscCall(MatSetRandom(d->A, rctx));
1631: #if defined(PETSC_HAVE_DEVICE)
1632: x->offloadmask = d->A->offloadmask;
1633: #endif
1634: PetscFunctionReturn(PETSC_SUCCESS);
1635: }
1637: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1638: {
1639: PetscFunctionBegin;
1640: *missing = PETSC_FALSE;
1641: PetscFunctionReturn(PETSC_SUCCESS);
1642: }
1644: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1645: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1646: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1647: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1648: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1649: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1651: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1652: MatGetRow_MPIDense,
1653: MatRestoreRow_MPIDense,
1654: MatMult_MPIDense,
1655: /* 4*/ MatMultAdd_MPIDense,
1656: MatMultTranspose_MPIDense,
1657: MatMultTransposeAdd_MPIDense,
1658: NULL,
1659: NULL,
1660: NULL,
1661: /* 10*/ NULL,
1662: NULL,
1663: NULL,
1664: NULL,
1665: MatTranspose_MPIDense,
1666: /* 15*/ MatGetInfo_MPIDense,
1667: MatEqual_MPIDense,
1668: MatGetDiagonal_MPIDense,
1669: MatDiagonalScale_MPIDense,
1670: MatNorm_MPIDense,
1671: /* 20*/ MatAssemblyBegin_MPIDense,
1672: MatAssemblyEnd_MPIDense,
1673: MatSetOption_MPIDense,
1674: MatZeroEntries_MPIDense,
1675: /* 24*/ MatZeroRows_MPIDense,
1676: NULL,
1677: NULL,
1678: NULL,
1679: NULL,
1680: /* 29*/ MatSetUp_MPIDense,
1681: NULL,
1682: NULL,
1683: MatGetDiagonalBlock_MPIDense,
1684: NULL,
1685: /* 34*/ MatDuplicate_MPIDense,
1686: NULL,
1687: NULL,
1688: NULL,
1689: NULL,
1690: /* 39*/ MatAXPY_MPIDense,
1691: MatCreateSubMatrices_MPIDense,
1692: NULL,
1693: MatGetValues_MPIDense,
1694: MatCopy_MPIDense,
1695: /* 44*/ NULL,
1696: MatScale_MPIDense,
1697: MatShift_MPIDense,
1698: NULL,
1699: NULL,
1700: /* 49*/ MatSetRandom_MPIDense,
1701: NULL,
1702: NULL,
1703: NULL,
1704: NULL,
1705: /* 54*/ NULL,
1706: NULL,
1707: NULL,
1708: NULL,
1709: NULL,
1710: /* 59*/ MatCreateSubMatrix_MPIDense,
1711: MatDestroy_MPIDense,
1712: MatView_MPIDense,
1713: NULL,
1714: NULL,
1715: /* 64*/ NULL,
1716: NULL,
1717: NULL,
1718: NULL,
1719: NULL,
1720: /* 69*/ NULL,
1721: NULL,
1722: NULL,
1723: NULL,
1724: NULL,
1725: /* 74*/ NULL,
1726: NULL,
1727: NULL,
1728: NULL,
1729: NULL,
1730: /* 79*/ NULL,
1731: NULL,
1732: NULL,
1733: NULL,
1734: /* 83*/ MatLoad_MPIDense,
1735: NULL,
1736: NULL,
1737: NULL,
1738: NULL,
1739: NULL,
1740: /* 89*/ NULL,
1741: NULL,
1742: NULL,
1743: NULL,
1744: NULL,
1745: /* 94*/ NULL,
1746: NULL,
1747: MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1748: MatMatTransposeMultNumeric_MPIDense_MPIDense,
1749: NULL,
1750: /* 99*/ MatProductSetFromOptions_MPIDense,
1751: NULL,
1752: NULL,
1753: MatConjugate_MPIDense,
1754: NULL,
1755: /*104*/ NULL,
1756: MatRealPart_MPIDense,
1757: MatImaginaryPart_MPIDense,
1758: NULL,
1759: NULL,
1760: /*109*/ NULL,
1761: NULL,
1762: NULL,
1763: MatGetColumnVector_MPIDense,
1764: MatMissingDiagonal_MPIDense,
1765: /*114*/ NULL,
1766: NULL,
1767: NULL,
1768: NULL,
1769: NULL,
1770: /*119*/ NULL,
1771: NULL,
1772: NULL,
1773: NULL,
1774: NULL,
1775: /*124*/ NULL,
1776: MatGetColumnReductions_MPIDense,
1777: NULL,
1778: NULL,
1779: NULL,
1780: /*129*/ NULL,
1781: NULL,
1782: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1783: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1784: NULL,
1785: /*134*/ NULL,
1786: NULL,
1787: NULL,
1788: NULL,
1789: NULL,
1790: /*139*/ NULL,
1791: NULL,
1792: NULL,
1793: NULL,
1794: NULL,
1795: MatCreateMPIMatConcatenateSeqMat_MPIDense,
1796: /*145*/ NULL,
1797: NULL,
1798: NULL,
1799: NULL,
1800: NULL,
1801: /*150*/ NULL,
1802: NULL};
1804: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1805: {
1806: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1807: MatType mtype = MATSEQDENSE;
1809: PetscFunctionBegin;
1810: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1811: PetscCall(PetscLayoutSetUp(mat->rmap));
1812: PetscCall(PetscLayoutSetUp(mat->cmap));
1813: if (!a->A) {
1814: PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1815: PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1816: }
1817: #if defined(PETSC_HAVE_CUDA)
1818: PetscBool iscuda;
1819: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1820: if (iscuda) mtype = MATSEQDENSECUDA;
1821: #endif
1822: #if defined(PETSC_HAVE_HIP)
1823: PetscBool iship;
1824: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1825: if (iship) mtype = MATSEQDENSEHIP;
1826: #endif
1827: PetscCall(MatSetType(a->A, mtype));
1828: PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1829: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1830: mat->offloadmask = a->A->offloadmask;
1831: #endif
1832: mat->preallocated = PETSC_TRUE;
1833: mat->assembled = PETSC_TRUE;
1834: PetscFunctionReturn(PETSC_SUCCESS);
1835: }
1837: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1838: {
1839: Mat B, C;
1841: PetscFunctionBegin;
1842: PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1843: PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1844: PetscCall(MatDestroy(&C));
1845: if (reuse == MAT_REUSE_MATRIX) {
1846: C = *newmat;
1847: } else C = NULL;
1848: PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1849: PetscCall(MatDestroy(&B));
1850: if (reuse == MAT_INPLACE_MATRIX) {
1851: PetscCall(MatHeaderReplace(A, &C));
1852: } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1853: PetscFunctionReturn(PETSC_SUCCESS);
1854: }
1856: PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1857: {
1858: Mat B, C;
1860: PetscFunctionBegin;
1861: PetscCall(MatDenseGetLocalMatrix(A, &C));
1862: PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1863: if (reuse == MAT_REUSE_MATRIX) {
1864: C = *newmat;
1865: } else C = NULL;
1866: PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1867: PetscCall(MatDestroy(&B));
1868: if (reuse == MAT_INPLACE_MATRIX) {
1869: PetscCall(MatHeaderReplace(A, &C));
1870: } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1871: PetscFunctionReturn(PETSC_SUCCESS);
1872: }
1874: #if defined(PETSC_HAVE_ELEMENTAL)
1875: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1876: {
1877: Mat mat_elemental;
1878: PetscScalar *v;
1879: PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1881: PetscFunctionBegin;
1882: if (reuse == MAT_REUSE_MATRIX) {
1883: mat_elemental = *newmat;
1884: PetscCall(MatZeroEntries(*newmat));
1885: } else {
1886: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1887: PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1888: PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1889: PetscCall(MatSetUp(mat_elemental));
1890: PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1891: }
1893: PetscCall(PetscMalloc2(m, &rows, N, &cols));
1894: for (i = 0; i < N; i++) cols[i] = i;
1895: for (i = 0; i < m; i++) rows[i] = rstart + i;
1897: /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1898: PetscCall(MatDenseGetArray(A, &v));
1899: PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1900: PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1901: PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1902: PetscCall(MatDenseRestoreArray(A, &v));
1903: PetscCall(PetscFree2(rows, cols));
1905: if (reuse == MAT_INPLACE_MATRIX) {
1906: PetscCall(MatHeaderReplace(A, &mat_elemental));
1907: } else {
1908: *newmat = mat_elemental;
1909: }
1910: PetscFunctionReturn(PETSC_SUCCESS);
1911: }
1912: #endif
1914: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1915: {
1916: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1918: PetscFunctionBegin;
1919: PetscCall(MatDenseGetColumn(mat->A, col, vals));
1920: PetscFunctionReturn(PETSC_SUCCESS);
1921: }
1923: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1924: {
1925: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1927: PetscFunctionBegin;
1928: PetscCall(MatDenseRestoreColumn(mat->A, vals));
1929: PetscFunctionReturn(PETSC_SUCCESS);
1930: }
1932: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1933: {
1934: Mat_MPIDense *mat;
1935: PetscInt m, nloc, N;
1937: PetscFunctionBegin;
1938: PetscCall(MatGetSize(inmat, &m, &N));
1939: PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1940: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1941: PetscInt sum;
1943: if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1944: /* Check sum(n) = N */
1945: PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1946: PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1948: PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1949: PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1950: }
1952: /* numeric phase */
1953: mat = (Mat_MPIDense *)(*outmat)->data;
1954: PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1955: PetscFunctionReturn(PETSC_SUCCESS);
1956: }
1958: #if defined(PETSC_HAVE_CUDA)
1959: PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1960: {
1961: Mat B;
1962: Mat_MPIDense *m;
1964: PetscFunctionBegin;
1965: if (reuse == MAT_INITIAL_MATRIX) {
1966: PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
1967: } else if (reuse == MAT_REUSE_MATRIX) {
1968: PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
1969: }
1971: B = *newmat;
1972: PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE));
1973: PetscCall(PetscFree(B->defaultvectype));
1974: PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
1975: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
1976: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL));
1977: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
1978: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
1979: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
1980: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
1981: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL));
1982: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL));
1983: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL));
1984: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL));
1985: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL));
1986: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL));
1987: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL));
1988: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL));
1989: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL));
1990: m = (Mat_MPIDense *)(B)->data;
1991: if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
1992: B->ops->bindtocpu = NULL;
1993: B->offloadmask = PETSC_OFFLOAD_CPU;
1994: PetscFunctionReturn(PETSC_SUCCESS);
1995: }
1997: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1998: {
1999: Mat B;
2000: Mat_MPIDense *m;
2002: PetscFunctionBegin;
2003: if (reuse == MAT_INITIAL_MATRIX) {
2004: PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2005: } else if (reuse == MAT_REUSE_MATRIX) {
2006: PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2007: }
2009: B = *newmat;
2010: PetscCall(PetscFree(B->defaultvectype));
2011: PetscCall(PetscStrallocpy(VECCUDA, &B->defaultvectype));
2012: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA));
2013: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense));
2014: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2015: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2016: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2017: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2018: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA));
2019: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA));
2020: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
2021: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA));
2022: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
2023: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
2024: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA));
2025: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA));
2026: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA));
2027: m = (Mat_MPIDense *)(B->data);
2028: if (m->A) {
2029: PetscCall(MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A));
2030: B->offloadmask = PETSC_OFFLOAD_BOTH;
2031: } else {
2032: B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2033: }
2034: PetscCall(MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE));
2036: B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
2037: PetscFunctionReturn(PETSC_SUCCESS);
2038: }
2039: #endif
2041: #if defined(PETSC_HAVE_HIP)
2042: PetscErrorCode MatConvert_MPIDenseHIP_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2043: {
2044: Mat B;
2045: Mat_MPIDense *m;
2047: PetscFunctionBegin;
2048: if (reuse == MAT_INITIAL_MATRIX) {
2049: PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2050: } else if (reuse == MAT_REUSE_MATRIX) {
2051: PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2052: }
2054: B = *newmat;
2055: PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_TRUE));
2056: PetscCall(PetscFree(B->defaultvectype));
2057: PetscCall(PetscStrallocpy(VECSTANDARD, &B->defaultvectype));
2058: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE));
2059: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", NULL));
2060: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
2061: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
2062: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
2063: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
2064: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", NULL));
2065: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", NULL));
2066: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", NULL));
2067: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", NULL));
2068: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", NULL));
2069: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", NULL));
2070: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", NULL));
2071: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", NULL));
2072: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", NULL));
2073: m = (Mat_MPIDense *)(B)->data;
2074: if (m->A) PetscCall(MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A));
2075: B->ops->bindtocpu = NULL;
2076: B->offloadmask = PETSC_OFFLOAD_CPU;
2077: PetscFunctionReturn(PETSC_SUCCESS);
2078: }
2080: PetscErrorCode MatConvert_MPIDense_MPIDenseHIP(Mat M, MatType type, MatReuse reuse, Mat *newmat)
2081: {
2082: Mat B;
2083: Mat_MPIDense *m;
2085: PetscFunctionBegin;
2086: if (reuse == MAT_INITIAL_MATRIX) {
2087: PetscCall(MatDuplicate(M, MAT_COPY_VALUES, newmat));
2088: } else if (reuse == MAT_REUSE_MATRIX) {
2089: PetscCall(MatCopy(M, *newmat, SAME_NONZERO_PATTERN));
2090: }
2092: B = *newmat;
2093: PetscCall(PetscFree(B->defaultvectype));
2094: PetscCall(PetscStrallocpy(VECHIP, &B->defaultvectype));
2095: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSEHIP));
2096: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensehip_mpidense_C", MatConvert_MPIDenseHIP_MPIDense));
2097: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2098: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2099: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2100: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2101: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArray_C", MatDenseHIPGetArray_MPIDenseHIP));
2102: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayRead_C", MatDenseHIPGetArrayRead_MPIDenseHIP));
2103: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPGetArrayWrite_C", MatDenseHIPGetArrayWrite_MPIDenseHIP));
2104: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArray_C", MatDenseHIPRestoreArray_MPIDenseHIP));
2105: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayRead_C", MatDenseHIPRestoreArrayRead_MPIDenseHIP));
2106: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPRestoreArrayWrite_C", MatDenseHIPRestoreArrayWrite_MPIDenseHIP));
2107: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPPlaceArray_C", MatDenseHIPPlaceArray_MPIDenseHIP));
2108: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPResetArray_C", MatDenseHIPResetArray_MPIDenseHIP));
2109: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDenseHIPReplaceArray_C", MatDenseHIPReplaceArray_MPIDenseHIP));
2110: m = (Mat_MPIDense *)(B->data);
2111: if (m->A) {
2112: PetscCall(MatConvert(m->A, MATSEQDENSEHIP, MAT_INPLACE_MATRIX, &m->A));
2113: B->offloadmask = PETSC_OFFLOAD_BOTH;
2114: } else {
2115: B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
2116: }
2117: PetscCall(MatBindToCPU_MPIDenseHIP(B, PETSC_FALSE));
2119: B->ops->bindtocpu = MatBindToCPU_MPIDenseHIP;
2120: PetscFunctionReturn(PETSC_SUCCESS);
2121: }
2122: #endif
2124: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2125: {
2126: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2127: PetscInt lda;
2129: PetscFunctionBegin;
2130: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2131: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2132: if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2133: a->vecinuse = col + 1;
2134: PetscCall(MatDenseGetLDA(a->A, &lda));
2135: PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
2136: PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2137: *v = a->cvec;
2138: PetscFunctionReturn(PETSC_SUCCESS);
2139: }
2141: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
2142: {
2143: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2145: PetscFunctionBegin;
2146: PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2147: PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
2148: a->vecinuse = 0;
2149: PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
2150: PetscCall(VecResetArray(a->cvec));
2151: if (v) *v = NULL;
2152: PetscFunctionReturn(PETSC_SUCCESS);
2153: }
2155: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2156: {
2157: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2158: PetscInt lda;
2160: PetscFunctionBegin;
2161: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2162: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2163: if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2164: a->vecinuse = col + 1;
2165: PetscCall(MatDenseGetLDA(a->A, &lda));
2166: PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
2167: PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2168: PetscCall(VecLockReadPush(a->cvec));
2169: *v = a->cvec;
2170: PetscFunctionReturn(PETSC_SUCCESS);
2171: }
2173: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
2174: {
2175: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2177: PetscFunctionBegin;
2178: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2179: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2180: a->vecinuse = 0;
2181: PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
2182: PetscCall(VecLockReadPop(a->cvec));
2183: PetscCall(VecResetArray(a->cvec));
2184: if (v) *v = NULL;
2185: PetscFunctionReturn(PETSC_SUCCESS);
2186: }
2188: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2189: {
2190: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2191: PetscInt lda;
2193: PetscFunctionBegin;
2194: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2195: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2196: if (!a->cvec) PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec));
2197: a->vecinuse = col + 1;
2198: PetscCall(MatDenseGetLDA(a->A, &lda));
2199: PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2200: PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
2201: *v = a->cvec;
2202: PetscFunctionReturn(PETSC_SUCCESS);
2203: }
2205: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
2206: {
2207: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2209: PetscFunctionBegin;
2210: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
2211: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
2212: a->vecinuse = 0;
2213: PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
2214: PetscCall(VecResetArray(a->cvec));
2215: if (v) *v = NULL;
2216: PetscFunctionReturn(PETSC_SUCCESS);
2217: }
2219: PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
2220: {
2221: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2222: Mat_MPIDense *c;
2223: MPI_Comm comm;
2224: PetscInt pbegin, pend;
2226: PetscFunctionBegin;
2227: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2228: PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
2229: PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2230: pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
2231: pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
2232: if (!a->cmat) {
2233: PetscCall(MatCreate(comm, &a->cmat));
2234: PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
2235: if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
2236: else {
2237: PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2238: PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2239: PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2240: }
2241: PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2242: PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2243: } else {
2244: PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
2245: if (same && a->cmat->rmap->N != A->rmap->N) {
2246: same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
2247: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2248: }
2249: if (!same) {
2250: PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
2251: PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
2252: PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
2253: PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
2254: PetscCall(PetscLayoutSetUp(a->cmat->rmap));
2255: }
2256: if (cend - cbegin != a->cmat->cmap->N) {
2257: PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
2258: PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
2259: PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
2260: PetscCall(PetscLayoutSetUp(a->cmat->cmap));
2261: }
2262: }
2263: c = (Mat_MPIDense *)a->cmat->data;
2264: PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
2265: PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
2267: a->cmat->preallocated = PETSC_TRUE;
2268: a->cmat->assembled = PETSC_TRUE;
2269: #if defined(PETSC_HAVE_DEVICE)
2270: a->cmat->offloadmask = c->A->offloadmask;
2271: #endif
2272: a->matinuse = cbegin + 1;
2273: *v = a->cmat;
2274: PetscFunctionReturn(PETSC_SUCCESS);
2275: }
2277: PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
2278: {
2279: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
2280: Mat_MPIDense *c;
2282: PetscFunctionBegin;
2283: PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
2284: PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
2285: PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
2286: a->matinuse = 0;
2287: c = (Mat_MPIDense *)a->cmat->data;
2288: PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
2289: if (v) *v = NULL;
2290: #if defined(PETSC_HAVE_DEVICE)
2291: A->offloadmask = a->A->offloadmask;
2292: #endif
2293: PetscFunctionReturn(PETSC_SUCCESS);
2294: }
2296: /*MC
2297: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
2299: Options Database Key:
2300: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
2302: Level: beginner
2304: .seealso: [](chapter_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
2305: M*/
2306: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
2307: {
2308: Mat_MPIDense *a;
2310: PetscFunctionBegin;
2311: PetscCall(PetscNew(&a));
2312: mat->data = (void *)a;
2313: PetscCall(PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps)));
2315: mat->insertmode = NOT_SET_VALUES;
2317: /* build cache for off array entries formed */
2318: a->donotstash = PETSC_FALSE;
2320: PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
2322: /* stuff used for matrix vector multiply */
2323: a->lvec = NULL;
2324: a->Mvctx = NULL;
2325: a->roworiented = PETSC_TRUE;
2327: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
2328: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
2329: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
2330: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
2331: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
2332: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
2333: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
2334: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
2335: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
2336: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
2337: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
2338: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
2339: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
2340: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
2341: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
2342: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
2343: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
2344: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
2345: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
2346: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
2347: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
2348: #if defined(PETSC_HAVE_ELEMENTAL)
2349: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
2350: #endif
2351: #if defined(PETSC_HAVE_SCALAPACK)
2352: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
2353: #endif
2354: #if defined(PETSC_HAVE_CUDA)
2355: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
2356: #endif
2357: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
2358: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2359: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2360: #if defined(PETSC_HAVE_CUDA)
2361: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2362: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2363: #endif
2364: #if defined(PETSC_HAVE_HIP)
2365: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
2366: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
2367: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
2368: #endif
2369: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
2370: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
2371: PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
2372: PetscFunctionReturn(PETSC_SUCCESS);
2373: }
2375: /*MC
2376: MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
2378: Options Database Key:
2379: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
2381: Level: beginner
2383: .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
2384: M*/
2385: #if defined(PETSC_HAVE_CUDA)
2386: #include <petsc/private/deviceimpl.h>
2387: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2388: {
2389: PetscFunctionBegin;
2390: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2391: PetscCall(MatCreate_MPIDense(B));
2392: PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B));
2393: PetscFunctionReturn(PETSC_SUCCESS);
2394: }
2395: #endif
2397: /*MC
2398: MATMPIDENSEHIP - MATMPIDENSEHIP = "mpidensehip" - A matrix type to be used for distributed dense matrices on GPUs.
2400: Options Database Key:
2401: . -mat_type mpidensehip - sets the matrix type to `MATMPIDENSEHIP` during a call to `MatSetFromOptions()`
2403: Level: beginner
2405: .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`, `MATMPIDENSEHIP`
2406: M*/
2407: #if defined(PETSC_HAVE_HIP)
2408: #include <petsc/private/deviceimpl.h>
2409: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseHIP(Mat B)
2410: {
2411: PetscFunctionBegin;
2412: PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
2413: PetscCall(MatCreate_MPIDense(B));
2414: PetscCall(MatConvert_MPIDense_MPIDenseHIP(B, MATMPIDENSEHIP, MAT_INPLACE_MATRIX, &B));
2415: PetscFunctionReturn(PETSC_SUCCESS);
2416: }
2417: #endif
2419: /*MC
2420: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2422: This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
2423: and `MATMPIDENSE` otherwise.
2425: Options Database Key:
2426: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
2428: Level: beginner
2430: .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
2431: M*/
2433: /*MC
2434: MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.
2435: Similarly,
2436: `MATDENSEHIP` = "densehip"
2438: This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
2439: and `MATMPIDENSECUDA` otherwise.
2441: Options Database Key:
2442: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
2444: Level: beginner
2446: .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2447: M*/
2449: /*MC
2450: MATDENSEHIP - "densehip" - A matrix type to be used for dense matrices on GPUs.
2452: This matrix type is identical to `MATSEQDENSEHIP` when constructed with a single process communicator,
2453: and `MATMPIDENSEHIP` otherwise.
2455: Options Database Key:
2456: . -mat_type densehip - sets the matrix type to `MATDENSEHIP` during a call to `MatSetFromOptions()`
2458: Level: beginner
2460: .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, `MATMPIDENSEHIP`, `MATDENSE`
2461: M*/
2463: /*@C
2464: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2466: Collective
2468: Input Parameters:
2469: . B - the matrix
2470: - data - optional location of matrix data. Set to `NULL` for PETSc
2471: to control all matrix memory allocation.
2473: Level: intermediate
2475: Notes:
2476: The dense format is fully compatible with standard Fortran
2477: storage by columns.
2479: The data input variable is intended primarily for Fortran programmers
2480: who wish to allocate their own matrix memory space. Most users should
2481: set `data` to `NULL`.
2483: .seealso: [](chapter_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2484: @*/
2485: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
2486: {
2487: PetscFunctionBegin;
2489: PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
2490: PetscFunctionReturn(PETSC_SUCCESS);
2491: }
2493: /*@
2494: MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
2495: array provided by the user. This is useful to avoid copying an array
2496: into a matrix
2498: Not Collective
2500: Input Parameters:
2501: + mat - the matrix
2502: - array - the array in column major order
2504: Level: developer
2506: Note:
2507: You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
2508: freed when the matrix is destroyed.
2510: .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
2511: `MatDenseReplaceArray()`
2512: @*/
2513: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
2514: {
2515: PetscFunctionBegin;
2517: PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2518: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2519: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2520: mat->offloadmask = PETSC_OFFLOAD_CPU;
2521: #endif
2522: PetscFunctionReturn(PETSC_SUCCESS);
2523: }
2525: /*@
2526: MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2528: Not Collective
2530: Input Parameters:
2531: . mat - the matrix
2533: Level: developer
2535: Note:
2536: You can only call this after a call to `MatDensePlaceArray()`
2538: .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2539: @*/
2540: PetscErrorCode MatDenseResetArray(Mat mat)
2541: {
2542: PetscFunctionBegin;
2544: PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2545: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2546: PetscFunctionReturn(PETSC_SUCCESS);
2547: }
2549: /*@
2550: MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2551: array provided by the user. This is useful to avoid copying an array
2552: into a matrix
2554: Not Collective
2556: Input Parameters:
2557: + mat - the matrix
2558: - array - the array in column major order
2560: Level: developer
2562: Note:
2563: The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2564: freed by the user. It will be freed when the matrix is destroyed.
2566: .seealso: [](chapter_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2567: @*/
2568: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2569: {
2570: PetscFunctionBegin;
2572: PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2573: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2574: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
2575: mat->offloadmask = PETSC_OFFLOAD_CPU;
2576: #endif
2577: PetscFunctionReturn(PETSC_SUCCESS);
2578: }
2580: #if defined(PETSC_HAVE_CUDA)
2581: /*@C
2582: MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2583: array provided by the user. This is useful to avoid copying an array
2584: into a matrix
2586: Not Collective
2588: Input Parameters:
2589: + mat - the matrix
2590: - array - the array in column major order
2592: Level: developer
2594: Note:
2595: You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2596: freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2598: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2599: @*/
2600: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2601: {
2602: PetscFunctionBegin;
2604: PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2605: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2606: mat->offloadmask = PETSC_OFFLOAD_GPU;
2607: PetscFunctionReturn(PETSC_SUCCESS);
2608: }
2610: /*@C
2611: MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2613: Not Collective
2615: Input Parameters:
2616: . mat - the matrix
2618: Level: developer
2620: Note:
2621: You can only call this after a call to `MatDenseCUDAPlaceArray()`
2623: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2624: @*/
2625: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2626: {
2627: PetscFunctionBegin;
2629: PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2630: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2631: PetscFunctionReturn(PETSC_SUCCESS);
2632: }
2634: /*@C
2635: MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2636: array provided by the user. This is useful to avoid copying an array
2637: into a matrix
2639: Not Collective
2641: Input Parameters:
2642: + mat - the matrix
2643: - array - the array in column major order
2645: Level: developer
2647: Note:
2648: This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2649: The memory passed in CANNOT be freed by the user. It will be freed
2650: when the matrix is destroyed. The array should respect the matrix leading dimension.
2652: .seealso: [](chapter_matrices), `Mat`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2653: @*/
2654: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2655: {
2656: PetscFunctionBegin;
2658: PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2659: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2660: mat->offloadmask = PETSC_OFFLOAD_GPU;
2661: PetscFunctionReturn(PETSC_SUCCESS);
2662: }
2664: /*@C
2665: MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2667: Not Collective
2669: Input Parameters:
2670: . A - the matrix
2672: Output Parameters
2673: . array - the GPU array in column major order
2675: Level: developer
2677: Notes:
2678: The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2680: The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2682: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2683: @*/
2684: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2685: {
2686: PetscFunctionBegin;
2688: PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2689: PetscCall(PetscObjectStateIncrease((PetscObject)A));
2690: PetscFunctionReturn(PETSC_SUCCESS);
2691: }
2693: /*@C
2694: MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2696: Not Collective
2698: Input Parameters:
2699: + A - the matrix
2700: - array - the GPU array in column major order
2702: Level: developer
2704: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2705: @*/
2706: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2707: {
2708: PetscFunctionBegin;
2710: PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2711: PetscCall(PetscObjectStateIncrease((PetscObject)A));
2712: A->offloadmask = PETSC_OFFLOAD_GPU;
2713: PetscFunctionReturn(PETSC_SUCCESS);
2714: }
2716: /*@C
2717: MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2719: Not Collective
2721: Input Parameters:
2722: . A - the matrix
2724: Output Parameters
2725: . array - the GPU array in column major order
2727: Level: developer
2729: Note:
2730: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2732: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2733: @*/
2734: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2735: {
2736: PetscFunctionBegin;
2738: PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2739: PetscFunctionReturn(PETSC_SUCCESS);
2740: }
2742: /*@C
2743: MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2745: Not Collective
2747: Input Parameters:
2748: + A - the matrix
2749: - array - the GPU array in column major order
2751: Level: developer
2753: Note:
2754: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2756: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2757: @*/
2758: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2759: {
2760: PetscFunctionBegin;
2761: PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2762: PetscFunctionReturn(PETSC_SUCCESS);
2763: }
2765: /*@C
2766: MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2768: Not Collective
2770: Input Parameters:
2771: . A - the matrix
2773: Output Parameters
2774: . array - the GPU array in column major order
2776: Level: developer
2778: Note:
2779: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2781: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2782: @*/
2783: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2784: {
2785: PetscFunctionBegin;
2787: PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2788: PetscCall(PetscObjectStateIncrease((PetscObject)A));
2789: PetscFunctionReturn(PETSC_SUCCESS);
2790: }
2792: /*@C
2793: MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2795: Not Collective
2797: Input Parameters:
2798: + A - the matrix
2799: - array - the GPU array in column major order
2801: Level: developer
2803: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2804: @*/
2805: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2806: {
2807: PetscFunctionBegin;
2809: PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2810: PetscCall(PetscObjectStateIncrease((PetscObject)A));
2811: A->offloadmask = PETSC_OFFLOAD_GPU;
2812: PetscFunctionReturn(PETSC_SUCCESS);
2813: }
2814: #endif
2816: #if defined(PETSC_HAVE_HIP)
2817: /*@C
2818: MatDenseHIPPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2819: array provided by the user. This is useful to avoid copying an array
2820: into a matrix
2822: Not Collective
2824: Input Parameters:
2825: + mat - the matrix
2826: - array - the array in column major order
2828: Level: developer
2830: Note:
2831: You can return to the original array with a call to `MatDenseHIPResetArray()`. The user is responsible for freeing this array; it will not be
2832: freed when the matrix is destroyed. The array must have been allocated with `hipMalloc()`.
2834: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPResetArray()
2835: @*/
2836: PetscErrorCode MatDenseHIPPlaceArray(Mat mat, const PetscScalar *array)
2837: {
2838: PetscFunctionBegin;
2840: PetscUseMethod(mat, "MatDenseHIPPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2841: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2842: mat->offloadmask = PETSC_OFFLOAD_GPU;
2843: PetscFunctionReturn(PETSC_SUCCESS);
2844: }
2846: /*@C
2847: MatDenseHIPResetArray - Resets the matrix array to that it previously had before the call to `MatDenseHIPPlaceArray()`
2849: Not Collective
2851: Input Parameters:
2852: . mat - the matrix
2854: Level: developer
2856: Notes:
2857: You can only call this after a call to `MatDenseHIPPlaceArray()`
2859: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPPlaceArray()
2860: @*/
2861: PetscErrorCode MatDenseHIPResetArray(Mat mat)
2862: {
2863: PetscFunctionBegin;
2865: PetscUseMethod(mat, "MatDenseHIPResetArray_C", (Mat), (mat));
2866: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2867: PetscFunctionReturn(PETSC_SUCCESS);
2868: }
2870: /*@C
2871: MatDenseHIPReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2872: array provided by the user. This is useful to avoid copying an array
2873: into a matrix
2875: Not Collective
2877: Input Parameters:
2878: + mat - the matrix
2879: - array - the array in column major order
2881: Level: developer
2883: Note:
2884: This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2885: The memory passed in CANNOT be freed by the user. It will be freed
2886: when the matrix is destroyed. The array should respect the matrix leading dimension.
2888: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPPlaceArray(), MatDenseHIPResetArray()
2889: @*/
2890: PetscErrorCode MatDenseHIPReplaceArray(Mat mat, const PetscScalar *array)
2891: {
2892: PetscFunctionBegin;
2894: PetscUseMethod(mat, "MatDenseHIPReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2895: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2896: mat->offloadmask = PETSC_OFFLOAD_GPU;
2897: PetscFunctionReturn(PETSC_SUCCESS);
2898: }
2900: /*@C
2901: MatDenseHIPGetArrayWrite - Provides write access to the HIP buffer inside a dense matrix.
2903: Not Collective
2905: Input Parameters:
2906: . A - the matrix
2908: Output Parameters
2909: . array - the GPU array in column major order
2911: Level: developer
2913: Note:
2914: The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseHIPGetArray()`.
2915: The array must be restored with `MatDenseHIPRestoreArrayWrite()` when no longer needed.
2917: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArrayRead()
2918: @*/
2919: PetscErrorCode MatDenseHIPGetArrayWrite(Mat A, PetscScalar **a)
2920: {
2921: PetscFunctionBegin;
2923: PetscUseMethod(A, "MatDenseHIPGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2924: PetscCall(PetscObjectStateIncrease((PetscObject)A));
2925: PetscFunctionReturn(PETSC_SUCCESS);
2926: }
2928: /*@C
2929: MatDenseHIPRestoreArrayWrite - Restore write access to the HIP buffer inside a dense matrix previously obtained with `MatDenseHIPGetArrayWrite()`.
2931: Not Collective
2933: Input Parameters:
2934: + A - the matrix
2935: - array - the GPU array in column major order
2937: Level: developer
2939: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
2940: @*/
2941: PetscErrorCode MatDenseHIPRestoreArrayWrite(Mat A, PetscScalar **a)
2942: {
2943: PetscFunctionBegin;
2945: PetscUseMethod(A, "MatDenseHIPRestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2946: PetscCall(PetscObjectStateIncrease((PetscObject)A));
2947: A->offloadmask = PETSC_OFFLOAD_GPU;
2948: PetscFunctionReturn(PETSC_SUCCESS);
2949: }
2951: /*@C
2952: MatDenseHIPGetArrayRead - Provides read-only access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArrayRead() when no longer needed.
2954: Not Collective
2956: Input Parameters:
2957: . A - the matrix
2959: Output Parameters
2960: . array - the GPU array in column major order
2962: Level: developer
2964: Note:
2965: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.
2967: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
2968: @*/
2969: PetscErrorCode MatDenseHIPGetArrayRead(Mat A, const PetscScalar **a)
2970: {
2971: PetscFunctionBegin;
2973: PetscUseMethod(A, "MatDenseHIPGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2974: PetscFunctionReturn(PETSC_SUCCESS);
2975: }
2977: /*@C
2978: MatDenseHIPRestoreArrayRead - Restore read-only access to the HIP buffer inside a dense matrix previously obtained with a call to MatDenseHIPGetArrayRead().
2980: Not Collective
2982: Input Parameters:
2983: + A - the matrix
2984: - array - the GPU array in column major order
2986: Level: developer
2988: Notes:
2989: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.
2991: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPGetArrayRead()
2992: @*/
2993: PetscErrorCode MatDenseHIPRestoreArrayRead(Mat A, const PetscScalar **a)
2994: {
2995: PetscFunctionBegin;
2996: PetscUseMethod(A, "MatDenseHIPRestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2997: PetscFunctionReturn(PETSC_SUCCESS);
2998: }
3000: /*@C
3001: MatDenseHIPGetArray - Provides access to the HIP buffer inside a dense matrix. The array must be restored with MatDenseHIPRestoreArray() when no longer needed.
3003: Not Collective
3005: Input Parameters:
3006: . A - the matrix
3008: Output Parameters
3009: . array - the GPU array in column major order
3011: Level: developer
3013: Note:
3014: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseHIPGetArrayWrite()`.
3016: For read-only access, use `MatDenseHIPGetArrayRead()`.
3018: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArrayRead(), MatDenseHIPRestoreArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead()
3019: @*/
3020: PetscErrorCode MatDenseHIPGetArray(Mat A, PetscScalar **a)
3021: {
3022: PetscFunctionBegin;
3024: PetscUseMethod(A, "MatDenseHIPGetArray_C", (Mat, PetscScalar **), (A, a));
3025: PetscCall(PetscObjectStateIncrease((PetscObject)A));
3026: PetscFunctionReturn(PETSC_SUCCESS);
3027: }
3029: /*@C
3030: MatDenseHIPRestoreArray - Restore access to the HIP buffer inside a dense matrix previously obtained with MatDenseHIPGetArray().
3032: Not Collective
3034: Input Parameters:
3035: + A - the matrix
3036: - array - the GPU array in column major order
3038: Level: developer
3040: .seealso: [](chapter_matrices), `Mat`, MatDenseHIPGetArray(), MatDenseHIPRestoreArrayWrite(), MatDenseHIPGetArrayWrite(), MatDenseHIPRestoreArrayRead(), MatDenseHIPGetArrayRead()
3041: @*/
3042: PetscErrorCode MatDenseHIPRestoreArray(Mat A, PetscScalar **a)
3043: {
3044: PetscFunctionBegin;
3046: PetscUseMethod(A, "MatDenseHIPRestoreArray_C", (Mat, PetscScalar **), (A, a));
3047: PetscCall(PetscObjectStateIncrease((PetscObject)A));
3048: A->offloadmask = PETSC_OFFLOAD_GPU;
3049: PetscFunctionReturn(PETSC_SUCCESS);
3050: }
3051: #endif
3053: /*@C
3054: MatCreateDense - Creates a matrix in `MATDENSE` format.
3056: Collective
3058: Input Parameters:
3059: + comm - MPI communicator
3060: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3061: . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3062: . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3063: . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3064: - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
3065: to control all matrix memory allocation.
3067: Output Parameter:
3068: . A - the matrix
3070: Level: intermediate
3072: Notes:
3073: The dense format is fully compatible with standard Fortran
3074: storage by columns.
3076: Although local portions of the matrix are stored in column-major
3077: order, the matrix is partitioned across MPI ranks by row.
3079: The data input variable is intended primarily for Fortran programmers
3080: who wish to allocate their own matrix memory space. Most users should
3081: set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
3083: The user MUST specify either the local or global matrix dimensions
3084: (possibly both).
3086: .seealso: [](chapter_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
3087: @*/
3088: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3089: {
3090: PetscFunctionBegin;
3091: PetscCall(MatCreate(comm, A));
3092: PetscCall(MatSetSizes(*A, m, n, M, N));
3093: PetscCall(MatSetType(*A, MATDENSE));
3094: PetscCall(MatSeqDenseSetPreallocation(*A, data));
3095: PetscCall(MatMPIDenseSetPreallocation(*A, data));
3096: PetscFunctionReturn(PETSC_SUCCESS);
3097: }
3099: #if defined(PETSC_HAVE_CUDA)
3100: /*@C
3101: MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
3103: Collective
3105: Input Parameters:
3106: + comm - MPI communicator
3107: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3108: . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3109: . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3110: . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3111: - data - optional location of GPU matrix data. Use `NULL` for PETSc
3112: to control matrix memory allocation.
3114: Output Parameter:
3115: . A - the matrix
3117: Level: intermediate
3119: .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
3120: @*/
3121: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3122: {
3123: PetscFunctionBegin;
3124: PetscCall(MatCreate(comm, A));
3126: PetscCall(MatSetSizes(*A, m, n, M, N));
3127: PetscCall(MatSetType(*A, MATDENSECUDA));
3128: PetscCall(MatSeqDenseCUDASetPreallocation(*A, data));
3129: PetscCall(MatMPIDenseCUDASetPreallocation(*A, data));
3130: PetscFunctionReturn(PETSC_SUCCESS);
3131: }
3132: #endif
3134: #if defined(PETSC_HAVE_HIP)
3135: /*@C
3136: MatCreateDenseHIP - Creates a matrix in `MATDENSEHIP` format using HIP.
3138: Collective
3140: Input Parameters:
3141: + comm - MPI communicator
3142: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
3143: . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
3144: . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
3145: . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
3146: - data - optional location of GPU matrix data. Use `NULL` for PETSc
3147: to control matrix memory allocation.
3149: Output Parameter:
3150: . A - the matrix
3152: Level: intermediate
3154: .seealso: [](chapter_matrices), `Mat`, `MATDENSEHIP`, `MatCreate()`, `MatCreateDense()`
3155: @*/
3156: PetscErrorCode MatCreateDenseHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
3157: {
3158: PetscFunctionBegin;
3159: PetscCall(MatCreate(comm, A));
3161: PetscCall(MatSetSizes(*A, m, n, M, N));
3162: PetscCall(MatSetType(*A, MATDENSEHIP));
3163: PetscCall(MatSeqDenseHIPSetPreallocation(*A, data));
3164: PetscCall(MatMPIDenseHIPSetPreallocation(*A, data));
3165: PetscFunctionReturn(PETSC_SUCCESS);
3166: }
3167: #endif
3169: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
3170: {
3171: Mat mat;
3172: Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
3174: PetscFunctionBegin;
3175: *newmat = NULL;
3176: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
3177: PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3178: PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
3179: a = (Mat_MPIDense *)mat->data;
3181: mat->factortype = A->factortype;
3182: mat->assembled = PETSC_TRUE;
3183: mat->preallocated = PETSC_TRUE;
3185: mat->insertmode = NOT_SET_VALUES;
3186: a->donotstash = oldmat->donotstash;
3188: PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
3189: PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
3191: PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3193: *newmat = mat;
3194: PetscFunctionReturn(PETSC_SUCCESS);
3195: }
3197: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
3198: {
3199: PetscBool isbinary;
3200: #if defined(PETSC_HAVE_HDF5)
3201: PetscBool ishdf5;
3202: #endif
3204: PetscFunctionBegin;
3207: /* force binary viewer to load .info file if it has not yet done so */
3208: PetscCall(PetscViewerSetUp(viewer));
3209: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3210: #if defined(PETSC_HAVE_HDF5)
3211: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3212: #endif
3213: if (isbinary) {
3214: PetscCall(MatLoad_Dense_Binary(newMat, viewer));
3215: #if defined(PETSC_HAVE_HDF5)
3216: } else if (ishdf5) {
3217: PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
3218: #endif
3219: } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3220: PetscFunctionReturn(PETSC_SUCCESS);
3221: }
3223: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
3224: {
3225: Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
3226: Mat a, b;
3228: PetscFunctionBegin;
3229: a = matA->A;
3230: b = matB->A;
3231: PetscCall(MatEqual(a, b, flag));
3232: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
3233: PetscFunctionReturn(PETSC_SUCCESS);
3234: }
3236: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
3237: {
3238: Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
3240: PetscFunctionBegin;
3241: PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
3242: PetscCall(MatDestroy(&atb->atb));
3243: PetscCall(PetscFree(atb));
3244: PetscFunctionReturn(PETSC_SUCCESS);
3245: }
3247: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
3248: {
3249: Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
3251: PetscFunctionBegin;
3252: PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
3253: PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
3254: PetscCall(PetscFree(abt));
3255: PetscFunctionReturn(PETSC_SUCCESS);
3256: }
3258: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3259: {
3260: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3261: Mat_TransMatMultDense *atb;
3262: MPI_Comm comm;
3263: PetscMPIInt size, *recvcounts;
3264: PetscScalar *carray, *sendbuf;
3265: const PetscScalar *atbarray;
3266: PetscInt i, cN = C->cmap->N, proc, k, j, lda;
3267: const PetscInt *ranges;
3269: PetscFunctionBegin;
3270: MatCheckProduct(C, 3);
3271: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3272: atb = (Mat_TransMatMultDense *)C->product->data;
3273: recvcounts = atb->recvcounts;
3274: sendbuf = atb->sendbuf;
3276: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3277: PetscCallMPI(MPI_Comm_size(comm, &size));
3279: /* compute atbarray = aseq^T * bseq */
3280: PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
3282: PetscCall(MatGetOwnershipRanges(C, &ranges));
3284: /* arrange atbarray into sendbuf */
3285: PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
3286: PetscCall(MatDenseGetLDA(atb->atb, &lda));
3287: for (proc = 0, k = 0; proc < size; proc++) {
3288: for (j = 0; j < cN; j++) {
3289: for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
3290: }
3291: }
3292: PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
3294: /* sum all atbarray to local values of C */
3295: PetscCall(MatDenseGetArrayWrite(c->A, &carray));
3296: PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
3297: PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
3298: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3299: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3300: PetscFunctionReturn(PETSC_SUCCESS);
3301: }
3303: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3304: {
3305: MPI_Comm comm;
3306: PetscMPIInt size;
3307: PetscInt cm = A->cmap->n, cM, cN = B->cmap->N;
3308: Mat_TransMatMultDense *atb;
3309: PetscBool cisdense = PETSC_FALSE;
3310: PetscInt i;
3311: const PetscInt *ranges;
3313: PetscFunctionBegin;
3314: MatCheckProduct(C, 4);
3315: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3316: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3317: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
3318: SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3319: }
3321: /* create matrix product C */
3322: PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
3323: #if defined(PETSC_HAVE_CUDA)
3324: PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
3325: #endif
3326: #if defined(PETSC_HAVE_HIP)
3327: PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
3328: #endif
3329: if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
3330: PetscCall(MatSetUp(C));
3332: /* create data structure for reuse C */
3333: PetscCallMPI(MPI_Comm_size(comm, &size));
3334: PetscCall(PetscNew(&atb));
3335: cM = C->rmap->N;
3336: PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
3337: PetscCall(MatGetOwnershipRanges(C, &ranges));
3338: for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
3340: C->product->data = atb;
3341: C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
3342: PetscFunctionReturn(PETSC_SUCCESS);
3343: }
3345: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3346: {
3347: MPI_Comm comm;
3348: PetscMPIInt i, size;
3349: PetscInt maxRows, bufsiz;
3350: PetscMPIInt tag;
3351: PetscInt alg;
3352: Mat_MatTransMultDense *abt;
3353: Mat_Product *product = C->product;
3354: PetscBool flg;
3356: PetscFunctionBegin;
3357: MatCheckProduct(C, 4);
3358: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3359: /* check local size of A and B */
3360: PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
3362: PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
3363: alg = flg ? 0 : 1;
3365: /* setup matrix product C */
3366: PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
3367: PetscCall(MatSetType(C, MATMPIDENSE));
3368: PetscCall(MatSetUp(C));
3369: PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
3371: /* create data structure for reuse C */
3372: PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3373: PetscCallMPI(MPI_Comm_size(comm, &size));
3374: PetscCall(PetscNew(&abt));
3375: abt->tag = tag;
3376: abt->alg = alg;
3377: switch (alg) {
3378: case 1: /* alg: "cyclic" */
3379: for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
3380: bufsiz = A->cmap->N * maxRows;
3381: PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
3382: break;
3383: default: /* alg: "allgatherv" */
3384: PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
3385: PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
3386: for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
3387: for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
3388: break;
3389: }
3391: C->product->data = abt;
3392: C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
3393: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
3394: PetscFunctionReturn(PETSC_SUCCESS);
3395: }
3397: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
3398: {
3399: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3400: Mat_MatTransMultDense *abt;
3401: MPI_Comm comm;
3402: PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
3403: PetscScalar *sendbuf, *recvbuf = NULL, *cv;
3404: PetscInt i, cK = A->cmap->N, k, j, bn;
3405: PetscScalar _DOne = 1.0, _DZero = 0.0;
3406: const PetscScalar *av, *bv;
3407: PetscBLASInt cm, cn, ck, alda, blda = 0, clda;
3408: MPI_Request reqs[2];
3409: const PetscInt *ranges;
3411: PetscFunctionBegin;
3412: MatCheckProduct(C, 3);
3413: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3414: abt = (Mat_MatTransMultDense *)C->product->data;
3415: PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
3416: PetscCallMPI(MPI_Comm_rank(comm, &rank));
3417: PetscCallMPI(MPI_Comm_size(comm, &size));
3418: PetscCall(MatDenseGetArrayRead(a->A, &av));
3419: PetscCall(MatDenseGetArrayRead(b->A, &bv));
3420: PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3421: PetscCall(MatDenseGetLDA(a->A, &i));
3422: PetscCall(PetscBLASIntCast(i, &alda));
3423: PetscCall(MatDenseGetLDA(b->A, &i));
3424: PetscCall(PetscBLASIntCast(i, &blda));
3425: PetscCall(MatDenseGetLDA(c->A, &i));
3426: PetscCall(PetscBLASIntCast(i, &clda));
3427: PetscCall(MatGetOwnershipRanges(B, &ranges));
3428: bn = B->rmap->n;
3429: if (blda == bn) {
3430: sendbuf = (PetscScalar *)bv;
3431: } else {
3432: sendbuf = abt->buf[0];
3433: for (k = 0, i = 0; i < cK; i++) {
3434: for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
3435: }
3436: }
3437: if (size > 1) {
3438: sendto = (rank + size - 1) % size;
3439: recvfrom = (rank + size + 1) % size;
3440: } else {
3441: sendto = recvfrom = 0;
3442: }
3443: PetscCall(PetscBLASIntCast(cK, &ck));
3444: PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3445: recvisfrom = rank;
3446: for (i = 0; i < size; i++) {
3447: /* we have finished receiving in sending, bufs can be read/modified */
3448: PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
3449: PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
3451: if (nextrecvisfrom != rank) {
3452: /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
3453: sendsiz = cK * bn;
3454: recvsiz = cK * nextbn;
3455: recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
3456: PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
3457: PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
3458: }
3460: /* local aseq * sendbuf^T */
3461: PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
3462: if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
3464: if (nextrecvisfrom != rank) {
3465: /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
3466: PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
3467: }
3468: bn = nextbn;
3469: recvisfrom = nextrecvisfrom;
3470: sendbuf = recvbuf;
3471: }
3472: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3473: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3474: PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3475: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3476: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3477: PetscFunctionReturn(PETSC_SUCCESS);
3478: }
3480: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
3481: {
3482: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
3483: Mat_MatTransMultDense *abt;
3484: MPI_Comm comm;
3485: PetscMPIInt size;
3486: PetscScalar *cv, *sendbuf, *recvbuf;
3487: const PetscScalar *av, *bv;
3488: PetscInt blda, i, cK = A->cmap->N, k, j, bn;
3489: PetscScalar _DOne = 1.0, _DZero = 0.0;
3490: PetscBLASInt cm, cn, ck, alda, clda;
3492: PetscFunctionBegin;
3493: MatCheckProduct(C, 3);
3494: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3495: abt = (Mat_MatTransMultDense *)C->product->data;
3496: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3497: PetscCallMPI(MPI_Comm_size(comm, &size));
3498: PetscCall(MatDenseGetArrayRead(a->A, &av));
3499: PetscCall(MatDenseGetArrayRead(b->A, &bv));
3500: PetscCall(MatDenseGetArrayWrite(c->A, &cv));
3501: PetscCall(MatDenseGetLDA(a->A, &i));
3502: PetscCall(PetscBLASIntCast(i, &alda));
3503: PetscCall(MatDenseGetLDA(b->A, &blda));
3504: PetscCall(MatDenseGetLDA(c->A, &i));
3505: PetscCall(PetscBLASIntCast(i, &clda));
3506: /* copy transpose of B into buf[0] */
3507: bn = B->rmap->n;
3508: sendbuf = abt->buf[0];
3509: recvbuf = abt->buf[1];
3510: for (k = 0, j = 0; j < bn; j++) {
3511: for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
3512: }
3513: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3514: PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
3515: PetscCall(PetscBLASIntCast(cK, &ck));
3516: PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
3517: PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
3518: if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
3519: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
3520: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
3521: PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
3522: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3523: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3524: PetscFunctionReturn(PETSC_SUCCESS);
3525: }
3527: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3528: {
3529: Mat_MatTransMultDense *abt;
3531: PetscFunctionBegin;
3532: MatCheckProduct(C, 3);
3533: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
3534: abt = (Mat_MatTransMultDense *)C->product->data;
3535: switch (abt->alg) {
3536: case 1:
3537: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
3538: break;
3539: default:
3540: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
3541: break;
3542: }
3543: PetscFunctionReturn(PETSC_SUCCESS);
3544: }
3546: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
3547: {
3548: Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
3550: PetscFunctionBegin;
3551: PetscCall(MatDestroy(&ab->Ce));
3552: PetscCall(MatDestroy(&ab->Ae));
3553: PetscCall(MatDestroy(&ab->Be));
3554: PetscCall(PetscFree(ab));
3555: PetscFunctionReturn(PETSC_SUCCESS);
3556: }
3558: #if defined(PETSC_HAVE_ELEMENTAL)
3559: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
3560: {
3561: Mat_MatMultDense *ab;
3563: PetscFunctionBegin;
3564: MatCheckProduct(C, 3);
3565: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
3566: ab = (Mat_MatMultDense *)C->product->data;
3567: PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
3568: PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
3569: PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
3570: PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
3571: PetscFunctionReturn(PETSC_SUCCESS);
3572: }
3574: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
3575: {
3576: Mat Ae, Be, Ce;
3577: Mat_MatMultDense *ab;
3579: PetscFunctionBegin;
3580: MatCheckProduct(C, 4);
3581: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
3582: /* check local size of A and B */
3583: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
3584: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3585: }
3587: /* create elemental matrices Ae and Be */
3588: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
3589: PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
3590: PetscCall(MatSetType(Ae, MATELEMENTAL));
3591: PetscCall(MatSetUp(Ae));
3592: PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
3594: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
3595: PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
3596: PetscCall(MatSetType(Be, MATELEMENTAL));
3597: PetscCall(MatSetUp(Be));
3598: PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
3600: /* compute symbolic Ce = Ae*Be */
3601: PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
3602: PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
3604: /* setup C */
3605: PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
3606: PetscCall(MatSetType(C, MATDENSE));
3607: PetscCall(MatSetUp(C));
3609: /* create data structure for reuse Cdense */
3610: PetscCall(PetscNew(&ab));
3611: ab->Ae = Ae;
3612: ab->Be = Be;
3613: ab->Ce = Ce;
3615: C->product->data = ab;
3616: C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
3617: C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
3618: PetscFunctionReturn(PETSC_SUCCESS);
3619: }
3620: #endif
3622: #if defined(PETSC_HAVE_ELEMENTAL)
3623: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
3624: {
3625: PetscFunctionBegin;
3626: C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
3627: C->ops->productsymbolic = MatProductSymbolic_AB;
3628: PetscFunctionReturn(PETSC_SUCCESS);
3629: }
3630: #endif
3632: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
3633: {
3634: Mat_Product *product = C->product;
3635: Mat A = product->A, B = product->B;
3637: PetscFunctionBegin;
3638: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
3639: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
3640: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
3641: C->ops->productsymbolic = MatProductSymbolic_AtB;
3642: PetscFunctionReturn(PETSC_SUCCESS);
3643: }
3645: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
3646: {
3647: Mat_Product *product = C->product;
3648: const char *algTypes[2] = {"allgatherv", "cyclic"};
3649: PetscInt alg, nalg = 2;
3650: PetscBool flg = PETSC_FALSE;
3652: PetscFunctionBegin;
3653: /* Set default algorithm */
3654: alg = 0; /* default is allgatherv */
3655: PetscCall(PetscStrcmp(product->alg, "default", &flg));
3656: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3658: /* Get runtime option */
3659: if (product->api_user) {
3660: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
3661: PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
3662: PetscOptionsEnd();
3663: } else {
3664: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
3665: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
3666: PetscOptionsEnd();
3667: }
3668: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
3670: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
3671: C->ops->productsymbolic = MatProductSymbolic_ABt;
3672: PetscFunctionReturn(PETSC_SUCCESS);
3673: }
3675: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
3676: {
3677: Mat_Product *product = C->product;
3679: PetscFunctionBegin;
3680: switch (product->type) {
3681: #if defined(PETSC_HAVE_ELEMENTAL)
3682: case MATPRODUCT_AB:
3683: PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
3684: break;
3685: #endif
3686: case MATPRODUCT_AtB:
3687: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3688: break;
3689: case MATPRODUCT_ABt:
3690: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3691: break;
3692: default:
3693: break;
3694: }
3695: PetscFunctionReturn(PETSC_SUCCESS);
3696: }