Actual source code: mpidense.c
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
6: #include <../src/mat/impls/dense/mpi/mpidense.h>
7: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8: #include <petscblaslapack.h>
10: /*@
11: MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
12: matrix that represents the operator. For sequential matrices it returns itself.
14: Input Parameter:
15: . A - the sequential or MPI dense matrix
17: Output Parameter:
18: . B - the inner matrix
20: Level: intermediate
22: .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
23: @*/
24: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
25: {
26: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
27: PetscBool flg;
31: PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg);
32: if (flg) *B = mat->A;
33: else {
34: PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg);
36: *B = A;
37: }
38: return 0;
39: }
41: PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
42: {
43: Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
44: Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
46: MatCopy(Amat->A, Bmat->A, s);
47: return 0;
48: }
50: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
51: {
52: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
53: PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
54: PetscScalar *v;
56: MatDenseGetArray(mat->A, &v);
57: MatDenseGetLDA(mat->A, &lda);
58: rend2 = PetscMin(rend, A->cmap->N);
59: if (rend2 > rstart) {
60: for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
61: PetscLogFlops(rend2 - rstart);
62: }
63: MatDenseRestoreArray(mat->A, &v);
64: return 0;
65: }
67: PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
68: {
69: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
70: PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
73: lrow = row - rstart;
74: MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v);
75: return 0;
76: }
78: PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
79: {
80: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
81: PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
84: lrow = row - rstart;
85: MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v);
86: return 0;
87: }
89: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
90: {
91: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
92: PetscInt m = A->rmap->n, rstart = A->rmap->rstart;
93: PetscScalar *array;
94: MPI_Comm comm;
95: PetscBool flg;
96: Mat B;
98: MatHasCongruentLayouts(A, &flg);
100: PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B);
101: if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
103: PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg);
105: PetscObjectGetComm((PetscObject)(mdn->A), &comm);
106: MatCreate(comm, &B);
107: MatSetSizes(B, m, m, m, m);
108: MatSetType(B, ((PetscObject)mdn->A)->type_name);
109: MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array);
110: MatSeqDenseSetPreallocation(B, array + m * rstart);
111: MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array);
112: PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B);
113: *a = B;
114: MatDestroy(&B);
115: } else *a = B;
116: return 0;
117: }
119: PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
120: {
121: Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
122: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
123: PetscBool roworiented = A->roworiented;
125: for (i = 0; i < m; i++) {
126: if (idxm[i] < 0) continue;
128: if (idxm[i] >= rstart && idxm[i] < rend) {
129: row = idxm[i] - rstart;
130: if (roworiented) {
131: MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv);
132: } else {
133: for (j = 0; j < n; j++) {
134: if (idxn[j] < 0) continue;
136: MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv);
137: }
138: }
139: } else if (!A->donotstash) {
140: mat->assembled = PETSC_FALSE;
141: if (roworiented) {
142: MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE);
143: } else {
144: MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE);
145: }
146: }
147: }
148: return 0;
149: }
151: PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
152: {
153: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
154: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
156: for (i = 0; i < m; i++) {
157: if (idxm[i] < 0) continue; /* negative row */
159: if (idxm[i] >= rstart && idxm[i] < rend) {
160: row = idxm[i] - rstart;
161: for (j = 0; j < n; j++) {
162: if (idxn[j] < 0) continue; /* negative column */
164: MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j);
165: }
166: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
167: }
168: return 0;
169: }
171: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
172: {
173: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
175: MatDenseGetLDA(a->A, lda);
176: return 0;
177: }
179: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
180: {
181: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
182: PetscBool iscuda;
184: if (!a->A) {
186: PetscLayoutSetUp(A->rmap);
187: PetscLayoutSetUp(A->cmap);
188: MatCreate(PETSC_COMM_SELF, &a->A);
189: MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N);
190: PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda);
191: MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE);
192: }
193: MatDenseSetLDA(a->A, lda);
194: return 0;
195: }
197: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
198: {
199: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
202: MatDenseGetArray(a->A, array);
203: return 0;
204: }
206: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
207: {
208: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
211: MatDenseGetArrayRead(a->A, array);
212: return 0;
213: }
215: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
216: {
217: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
220: MatDenseGetArrayWrite(a->A, array);
221: return 0;
222: }
224: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
225: {
226: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
230: MatDensePlaceArray(a->A, array);
231: return 0;
232: }
234: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
235: {
236: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
240: MatDenseResetArray(a->A);
241: return 0;
242: }
244: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
245: {
246: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
250: MatDenseReplaceArray(a->A, array);
251: return 0;
252: }
254: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
255: {
256: Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd;
257: PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
258: const PetscInt *irow, *icol;
259: const PetscScalar *v;
260: PetscScalar *bv;
261: Mat newmat;
262: IS iscol_local;
263: MPI_Comm comm_is, comm_mat;
265: PetscObjectGetComm((PetscObject)A, &comm_mat);
266: PetscObjectGetComm((PetscObject)iscol, &comm_is);
269: ISAllGather(iscol, &iscol_local);
270: ISGetIndices(isrow, &irow);
271: ISGetIndices(iscol_local, &icol);
272: ISGetLocalSize(isrow, &nrows);
273: ISGetLocalSize(iscol, &ncols);
274: ISGetSize(iscol, &Ncols); /* global number of columns, size of iscol_local */
276: /* No parallel redistribution currently supported! Should really check each index set
277: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
278: original matrix! */
280: MatGetLocalSize(A, &nlrows, &nlcols);
281: MatGetOwnershipRange(A, &rstart, &rend);
283: /* Check submatrix call */
284: if (scall == MAT_REUSE_MATRIX) {
285: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
286: /* Really need to test rows and column sizes! */
287: newmat = *B;
288: } else {
289: /* Create and fill new matrix */
290: MatCreate(PetscObjectComm((PetscObject)A), &newmat);
291: MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols);
292: MatSetType(newmat, ((PetscObject)A)->type_name);
293: MatMPIDenseSetPreallocation(newmat, NULL);
294: }
296: /* Now extract the data pointers and do the copy, column at a time */
297: newmatd = (Mat_MPIDense *)newmat->data;
298: MatDenseGetArray(newmatd->A, &bv);
299: MatDenseGetArrayRead(mat->A, &v);
300: MatDenseGetLDA(mat->A, &lda);
301: for (i = 0; i < Ncols; i++) {
302: const PetscScalar *av = v + lda * icol[i];
303: for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
304: }
305: MatDenseRestoreArrayRead(mat->A, &v);
306: MatDenseRestoreArray(newmatd->A, &bv);
308: /* Assemble the matrices so that the correct flags are set */
309: MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY);
310: MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY);
312: /* Free work space */
313: ISRestoreIndices(isrow, &irow);
314: ISRestoreIndices(iscol_local, &icol);
315: ISDestroy(&iscol_local);
316: *B = newmat;
317: return 0;
318: }
320: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
321: {
322: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
324: MatDenseRestoreArray(a->A, array);
325: return 0;
326: }
328: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
329: {
330: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
332: MatDenseRestoreArrayRead(a->A, array);
333: return 0;
334: }
336: PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
337: {
338: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
340: MatDenseRestoreArrayWrite(a->A, array);
341: return 0;
342: }
344: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
345: {
346: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
347: PetscInt nstash, reallocs;
349: if (mdn->donotstash || mat->nooffprocentries) return 0;
351: MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range);
352: MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs);
353: PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
354: return 0;
355: }
357: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
358: {
359: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
360: PetscInt i, *row, *col, flg, j, rstart, ncols;
361: PetscMPIInt n;
362: PetscScalar *val;
364: if (!mdn->donotstash && !mat->nooffprocentries) {
365: /* wait on receives */
366: while (1) {
367: MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg);
368: if (!flg) break;
370: for (i = 0; i < n;) {
371: /* Now identify the consecutive vals belonging to the same row */
372: for (j = i, rstart = row[j]; j < n; j++) {
373: if (row[j] != rstart) break;
374: }
375: if (j < n) ncols = j - i;
376: else ncols = n - i;
377: /* Now assemble all these values with a single function call */
378: MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode);
379: i = j;
380: }
381: }
382: MatStashScatterEnd_Private(&mat->stash);
383: }
385: MatAssemblyBegin(mdn->A, mode);
386: MatAssemblyEnd(mdn->A, mode);
387: return 0;
388: }
390: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
391: {
392: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
394: MatZeroEntries(l->A);
395: return 0;
396: }
398: PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
399: {
400: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
401: PetscInt i, len, *lrows;
403: /* get locally owned rows */
404: PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL);
405: /* fix right hand side if needed */
406: if (x && b) {
407: const PetscScalar *xx;
408: PetscScalar *bb;
410: VecGetArrayRead(x, &xx);
411: VecGetArrayWrite(b, &bb);
412: for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
413: VecRestoreArrayRead(x, &xx);
414: VecRestoreArrayWrite(b, &bb);
415: }
416: MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL);
417: if (diag != 0.0) {
418: Vec d;
420: MatCreateVecs(A, NULL, &d);
421: VecSet(d, diag);
422: MatDiagonalSet(A, d, INSERT_VALUES);
423: VecDestroy(&d);
424: }
425: PetscFree(lrows);
426: return 0;
427: }
429: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
430: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
431: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
432: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
434: PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
435: {
436: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
437: const PetscScalar *ax;
438: PetscScalar *ay;
439: PetscMemType axmtype, aymtype;
441: if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(mat);
442: VecGetArrayReadAndMemType(xx, &ax, &axmtype);
443: VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype);
444: PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE);
445: PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE);
446: VecRestoreArrayAndMemType(mdn->lvec, &ay);
447: VecRestoreArrayReadAndMemType(xx, &ax);
448: (*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy);
449: return 0;
450: }
452: PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
453: {
454: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
455: const PetscScalar *ax;
456: PetscScalar *ay;
457: PetscMemType axmtype, aymtype;
459: if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(mat);
460: VecGetArrayReadAndMemType(xx, &ax, &axmtype);
461: VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype);
462: PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE);
463: PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE);
464: VecRestoreArrayAndMemType(mdn->lvec, &ay);
465: VecRestoreArrayReadAndMemType(xx, &ax);
466: (*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz);
467: return 0;
468: }
470: PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
471: {
472: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
473: const PetscScalar *ax;
474: PetscScalar *ay;
475: PetscMemType axmtype, aymtype;
477: if (!a->Mvctx) MatSetUpMultiply_MPIDense(A);
478: VecSet(yy, 0.0);
479: (*a->A->ops->multtranspose)(a->A, xx, a->lvec);
480: VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype);
481: VecGetArrayAndMemType(yy, &ay, &aymtype);
482: PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM);
483: PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM);
484: VecRestoreArrayReadAndMemType(a->lvec, &ax);
485: VecRestoreArrayAndMemType(yy, &ay);
486: return 0;
487: }
489: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
490: {
491: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
492: const PetscScalar *ax;
493: PetscScalar *ay;
494: PetscMemType axmtype, aymtype;
496: if (!a->Mvctx) MatSetUpMultiply_MPIDense(A);
497: VecCopy(yy, zz);
498: (*a->A->ops->multtranspose)(a->A, xx, a->lvec);
499: VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype);
500: VecGetArrayAndMemType(zz, &ay, &aymtype);
501: PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM);
502: PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM);
503: VecRestoreArrayReadAndMemType(a->lvec, &ax);
504: VecRestoreArrayAndMemType(zz, &ay);
505: return 0;
506: }
508: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
509: {
510: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
511: PetscInt lda, len, i, n, m = A->rmap->n, radd;
512: PetscScalar *x, zero = 0.0;
513: const PetscScalar *av;
515: VecSet(v, zero);
516: VecGetArray(v, &x);
517: VecGetSize(v, &n);
519: len = PetscMin(a->A->rmap->n, a->A->cmap->n);
520: radd = A->rmap->rstart * m;
521: MatDenseGetArrayRead(a->A, &av);
522: MatDenseGetLDA(a->A, &lda);
523: for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
524: MatDenseRestoreArrayRead(a->A, &av);
525: VecRestoreArray(v, &x);
526: return 0;
527: }
529: PetscErrorCode MatDestroy_MPIDense(Mat mat)
530: {
531: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
533: #if defined(PETSC_USE_LOG)
534: PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
535: #endif
536: MatStashDestroy_Private(&mat->stash);
539: MatDestroy(&mdn->A);
540: VecDestroy(&mdn->lvec);
541: PetscSFDestroy(&mdn->Mvctx);
542: VecDestroy(&mdn->cvec);
543: MatDestroy(&mdn->cmat);
545: PetscFree(mat->data);
546: PetscObjectChangeTypeName((PetscObject)mat, NULL);
548: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL);
549: PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL);
550: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL);
551: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL);
552: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL);
553: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL);
554: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL);
555: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL);
556: PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL);
557: PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL);
558: PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL);
559: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL);
560: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL);
561: #if defined(PETSC_HAVE_ELEMENTAL)
562: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL);
563: #endif
564: #if defined(PETSC_HAVE_SCALAPACK)
565: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL);
566: #endif
567: PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL);
568: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL);
569: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL);
570: #if defined(PETSC_HAVE_CUDA)
571: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL);
572: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL);
573: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL);
574: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL);
575: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL);
576: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL);
577: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL);
578: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL);
579: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL);
580: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL);
581: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL);
582: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL);
583: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL);
584: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL);
585: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL);
586: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL);
587: PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL);
588: #endif
589: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL);
590: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL);
591: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL);
592: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL);
593: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL);
594: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL);
595: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL);
596: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL);
597: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL);
598: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL);
600: PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL);
601: return 0;
602: }
604: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
606: #include <petscdraw.h>
607: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
608: {
609: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
610: PetscMPIInt rank;
611: PetscViewerType vtype;
612: PetscBool iascii, isdraw;
613: PetscViewer sviewer;
614: PetscViewerFormat format;
616: MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank);
617: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
618: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
619: if (iascii) {
620: PetscViewerGetType(viewer, &vtype);
621: PetscViewerGetFormat(viewer, &format);
622: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
623: MatInfo info;
624: MatGetInfo(mat, MAT_LOCAL, &info);
625: PetscViewerASCIIPushSynchronized(viewer);
626: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
627: (PetscInt)info.memory));
628: PetscViewerFlush(viewer);
629: PetscViewerASCIIPopSynchronized(viewer);
630: if (mdn->Mvctx) PetscSFView(mdn->Mvctx, viewer);
631: return 0;
632: } else if (format == PETSC_VIEWER_ASCII_INFO) {
633: return 0;
634: }
635: } else if (isdraw) {
636: PetscDraw draw;
637: PetscBool isnull;
639: PetscViewerDrawGetDraw(viewer, 0, &draw);
640: PetscDrawIsNull(draw, &isnull);
641: if (isnull) return 0;
642: }
644: {
645: /* assemble the entire matrix onto first processor. */
646: Mat A;
647: PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
648: PetscInt *cols;
649: PetscScalar *vals;
651: MatCreate(PetscObjectComm((PetscObject)mat), &A);
652: if (rank == 0) {
653: MatSetSizes(A, M, N, M, N);
654: } else {
655: MatSetSizes(A, 0, 0, M, N);
656: }
657: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
658: MatSetType(A, MATMPIDENSE);
659: MatMPIDenseSetPreallocation(A, NULL);
661: /* Copy the matrix ... This isn't the most efficient means,
662: but it's quick for now */
663: A->insertmode = INSERT_VALUES;
665: row = mat->rmap->rstart;
666: m = mdn->A->rmap->n;
667: for (i = 0; i < m; i++) {
668: MatGetRow_MPIDense(mat, row, &nz, &cols, &vals);
669: MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES);
670: MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals);
671: row++;
672: }
674: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
675: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
676: PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
677: if (rank == 0) {
678: PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name);
679: MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer);
680: }
681: PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
682: PetscViewerFlush(viewer);
683: MatDestroy(&A);
684: }
685: return 0;
686: }
688: PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
689: {
690: PetscBool iascii, isbinary, isdraw, issocket;
692: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
693: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
694: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket);
695: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
697: if (iascii || issocket || isdraw) {
698: MatView_MPIDense_ASCIIorDraworSocket(mat, viewer);
699: } else if (isbinary) MatView_Dense_Binary(mat, viewer);
700: return 0;
701: }
703: PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
704: {
705: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
706: Mat mdn = mat->A;
707: PetscLogDouble isend[5], irecv[5];
709: info->block_size = 1.0;
711: MatGetInfo(mdn, MAT_LOCAL, info);
713: isend[0] = info->nz_used;
714: isend[1] = info->nz_allocated;
715: isend[2] = info->nz_unneeded;
716: isend[3] = info->memory;
717: isend[4] = info->mallocs;
718: if (flag == MAT_LOCAL) {
719: info->nz_used = isend[0];
720: info->nz_allocated = isend[1];
721: info->nz_unneeded = isend[2];
722: info->memory = isend[3];
723: info->mallocs = isend[4];
724: } else if (flag == MAT_GLOBAL_MAX) {
725: MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A));
727: info->nz_used = irecv[0];
728: info->nz_allocated = irecv[1];
729: info->nz_unneeded = irecv[2];
730: info->memory = irecv[3];
731: info->mallocs = irecv[4];
732: } else if (flag == MAT_GLOBAL_SUM) {
733: MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A));
735: info->nz_used = irecv[0];
736: info->nz_allocated = irecv[1];
737: info->nz_unneeded = irecv[2];
738: info->memory = irecv[3];
739: info->mallocs = irecv[4];
740: }
741: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
742: info->fill_ratio_needed = 0;
743: info->factor_mallocs = 0;
744: return 0;
745: }
747: PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
748: {
749: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
751: switch (op) {
752: case MAT_NEW_NONZERO_LOCATIONS:
753: case MAT_NEW_NONZERO_LOCATION_ERR:
754: case MAT_NEW_NONZERO_ALLOCATION_ERR:
755: MatCheckPreallocated(A, 1);
756: MatSetOption(a->A, op, flg);
757: break;
758: case MAT_ROW_ORIENTED:
759: MatCheckPreallocated(A, 1);
760: a->roworiented = flg;
761: MatSetOption(a->A, op, flg);
762: break;
763: case MAT_FORCE_DIAGONAL_ENTRIES:
764: case MAT_KEEP_NONZERO_PATTERN:
765: case MAT_USE_HASH_TABLE:
766: case MAT_SORTED_FULL:
767: PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
768: break;
769: case MAT_IGNORE_OFF_PROC_ENTRIES:
770: a->donotstash = flg;
771: break;
772: case MAT_SYMMETRIC:
773: case MAT_STRUCTURALLY_SYMMETRIC:
774: case MAT_HERMITIAN:
775: case MAT_SYMMETRY_ETERNAL:
776: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
777: case MAT_SPD:
778: case MAT_IGNORE_LOWER_TRIANGULAR:
779: case MAT_IGNORE_ZERO_ENTRIES:
780: case MAT_SPD_ETERNAL:
781: /* if the diagonal matrix is square it inherits some of the properties above */
782: PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
783: break;
784: default:
785: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
786: }
787: return 0;
788: }
790: PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
791: {
792: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
793: const PetscScalar *l;
794: PetscScalar x, *v, *vv, *r;
795: PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
797: MatDenseGetArray(mdn->A, &vv);
798: MatDenseGetLDA(mdn->A, &lda);
799: MatGetLocalSize(A, &s2, &s3);
800: if (ll) {
801: VecGetLocalSize(ll, &s2a);
803: VecGetArrayRead(ll, &l);
804: for (i = 0; i < m; i++) {
805: x = l[i];
806: v = vv + i;
807: for (j = 0; j < n; j++) {
808: (*v) *= x;
809: v += lda;
810: }
811: }
812: VecRestoreArrayRead(ll, &l);
813: PetscLogFlops(1.0 * n * m);
814: }
815: if (rr) {
816: const PetscScalar *ar;
818: VecGetLocalSize(rr, &s3a);
820: VecGetArrayRead(rr, &ar);
821: if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(A);
822: VecGetArray(mdn->lvec, &r);
823: PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE);
824: PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE);
825: VecRestoreArrayRead(rr, &ar);
826: for (i = 0; i < n; i++) {
827: x = r[i];
828: v = vv + i * lda;
829: for (j = 0; j < m; j++) (*v++) *= x;
830: }
831: VecRestoreArray(mdn->lvec, &r);
832: PetscLogFlops(1.0 * n * m);
833: }
834: MatDenseRestoreArray(mdn->A, &vv);
835: return 0;
836: }
838: PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
839: {
840: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
841: PetscInt i, j;
842: PetscMPIInt size;
843: PetscReal sum = 0.0;
844: const PetscScalar *av, *v;
846: MatDenseGetArrayRead(mdn->A, &av);
847: v = av;
848: MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
849: if (size == 1) {
850: MatNorm(mdn->A, type, nrm);
851: } else {
852: if (type == NORM_FROBENIUS) {
853: for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
854: sum += PetscRealPart(PetscConj(*v) * (*v));
855: v++;
856: }
857: MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A));
858: *nrm = PetscSqrtReal(*nrm);
859: PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n);
860: } else if (type == NORM_1) {
861: PetscReal *tmp, *tmp2;
862: PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2);
863: *nrm = 0.0;
864: v = av;
865: for (j = 0; j < mdn->A->cmap->n; j++) {
866: for (i = 0; i < mdn->A->rmap->n; i++) {
867: tmp[j] += PetscAbsScalar(*v);
868: v++;
869: }
870: }
871: MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A));
872: for (j = 0; j < A->cmap->N; j++) {
873: if (tmp2[j] > *nrm) *nrm = tmp2[j];
874: }
875: PetscFree2(tmp, tmp2);
876: PetscLogFlops(A->cmap->n * A->rmap->n);
877: } else if (type == NORM_INFINITY) { /* max row norm */
878: PetscReal ntemp;
879: MatNorm(mdn->A, type, &ntemp);
880: MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A));
881: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
882: }
883: MatDenseRestoreArrayRead(mdn->A, &av);
884: return 0;
885: }
887: PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
888: {
889: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
890: Mat B;
891: PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
892: PetscInt j, i, lda;
893: PetscScalar *v;
895: if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
896: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
897: MatCreate(PetscObjectComm((PetscObject)A), &B);
898: MatSetSizes(B, A->cmap->n, A->rmap->n, N, M);
899: MatSetType(B, ((PetscObject)A)->type_name);
900: MatMPIDenseSetPreallocation(B, NULL);
901: } else B = *matout;
903: m = a->A->rmap->n;
904: n = a->A->cmap->n;
905: MatDenseGetArrayRead(a->A, (const PetscScalar **)&v);
906: MatDenseGetLDA(a->A, &lda);
907: PetscMalloc1(m, &rwork);
908: for (i = 0; i < m; i++) rwork[i] = rstart + i;
909: for (j = 0; j < n; j++) {
910: MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES);
911: v += lda;
912: }
913: MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v);
914: PetscFree(rwork);
915: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
916: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
917: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
918: *matout = B;
919: } else {
920: MatHeaderMerge(A, &B);
921: }
922: return 0;
923: }
925: static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
926: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
928: PetscErrorCode MatSetUp_MPIDense(Mat A)
929: {
930: PetscLayoutSetUp(A->rmap);
931: PetscLayoutSetUp(A->cmap);
932: if (!A->preallocated) MatMPIDenseSetPreallocation(A, NULL);
933: return 0;
934: }
936: PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
937: {
938: Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
940: MatAXPY(A->A, alpha, B->A, str);
941: return 0;
942: }
944: PetscErrorCode MatConjugate_MPIDense(Mat mat)
945: {
946: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
948: MatConjugate(a->A);
949: return 0;
950: }
952: PetscErrorCode MatRealPart_MPIDense(Mat A)
953: {
954: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
956: MatRealPart(a->A);
957: return 0;
958: }
960: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
961: {
962: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
964: MatImaginaryPart(a->A);
965: return 0;
966: }
968: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
969: {
970: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
974: (*a->A->ops->getcolumnvector)(a->A, v, col);
975: return 0;
976: }
978: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
980: PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
981: {
982: PetscInt i, m, n;
983: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
984: PetscReal *work;
986: MatGetSize(A, &m, &n);
987: PetscMalloc1(n, &work);
988: if (type == REDUCTION_MEAN_REALPART) {
989: MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work);
990: } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
991: MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work);
992: } else {
993: MatGetColumnReductions_SeqDense(a->A, type, work);
994: }
995: if (type == NORM_2) {
996: for (i = 0; i < n; i++) work[i] *= work[i];
997: }
998: if (type == NORM_INFINITY) {
999: MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm);
1000: } else {
1001: MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm);
1002: }
1003: PetscFree(work);
1004: if (type == NORM_2) {
1005: for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1006: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1007: for (i = 0; i < n; i++) reductions[i] /= m;
1008: }
1009: return 0;
1010: }
1012: #if defined(PETSC_HAVE_CUDA)
1013: PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1014: {
1015: PetscScalar *da;
1016: PetscInt lda;
1018: MatDenseCUDAGetArray(A, &da);
1019: MatDenseGetLDA(A, &lda);
1020: PetscInfo(A, "Performing Shift on backend\n");
1021: MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N);
1022: MatDenseCUDARestoreArray(A, &da);
1023: return 0;
1024: }
1026: static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1027: {
1028: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1029: PetscInt lda;
1033: if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1034: a->vecinuse = col + 1;
1035: MatDenseGetLDA(a->A, &lda);
1036: MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse);
1037: VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1038: *v = a->cvec;
1039: return 0;
1040: }
1042: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1043: {
1044: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1048: a->vecinuse = 0;
1049: MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse);
1050: VecCUDAResetArray(a->cvec);
1051: if (v) *v = NULL;
1052: return 0;
1053: }
1055: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1056: {
1057: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1058: PetscInt lda;
1062: if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1063: a->vecinuse = col + 1;
1064: MatDenseGetLDA(a->A, &lda);
1065: MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse);
1066: VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1067: VecLockReadPush(a->cvec);
1068: *v = a->cvec;
1069: return 0;
1070: }
1072: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1073: {
1074: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1078: a->vecinuse = 0;
1079: MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse);
1080: VecLockReadPop(a->cvec);
1081: VecCUDAResetArray(a->cvec);
1082: if (v) *v = NULL;
1083: return 0;
1084: }
1086: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1087: {
1088: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1089: PetscInt lda;
1093: if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1094: a->vecinuse = col + 1;
1095: MatDenseGetLDA(a->A, &lda);
1096: MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1097: VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1098: *v = a->cvec;
1099: return 0;
1100: }
1102: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1103: {
1104: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1108: a->vecinuse = 0;
1109: MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1110: VecCUDAResetArray(a->cvec);
1111: if (v) *v = NULL;
1112: return 0;
1113: }
1115: static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1116: {
1117: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1121: MatDenseCUDAPlaceArray(l->A, a);
1122: return 0;
1123: }
1125: static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1126: {
1127: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1131: MatDenseCUDAResetArray(l->A);
1132: return 0;
1133: }
1135: static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1136: {
1137: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1141: MatDenseCUDAReplaceArray(l->A, a);
1142: return 0;
1143: }
1145: static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1146: {
1147: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1149: MatDenseCUDAGetArrayWrite(l->A, a);
1150: return 0;
1151: }
1153: static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1154: {
1155: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1157: MatDenseCUDARestoreArrayWrite(l->A, a);
1158: return 0;
1159: }
1161: static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1162: {
1163: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1165: MatDenseCUDAGetArrayRead(l->A, a);
1166: return 0;
1167: }
1169: static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1170: {
1171: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1173: MatDenseCUDARestoreArrayRead(l->A, a);
1174: return 0;
1175: }
1177: static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1178: {
1179: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1181: MatDenseCUDAGetArray(l->A, a);
1182: return 0;
1183: }
1185: static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1186: {
1187: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
1189: MatDenseCUDARestoreArray(l->A, a);
1190: return 0;
1191: }
1193: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1194: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1195: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1196: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1197: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1198: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1199: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);
1201: static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1202: {
1203: Mat_MPIDense *d = (Mat_MPIDense *)mat->data;
1207: if (d->A) MatBindToCPU(d->A, bind);
1208: mat->boundtocpu = bind;
1209: if (!bind) {
1210: PetscBool iscuda;
1212: PetscFree(mat->defaultrandtype);
1213: PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype);
1214: PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda);
1215: if (!iscuda) VecDestroy(&d->cvec);
1216: PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda);
1217: if (!iscuda) MatDestroy(&d->cmat);
1218: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA);
1219: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA);
1220: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA);
1221: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA);
1222: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA);
1223: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA);
1224: mat->ops->shift = MatShift_MPIDenseCUDA;
1225: } else {
1226: PetscFree(mat->defaultrandtype);
1227: PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype);
1228: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense);
1229: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense);
1230: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense);
1231: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense);
1232: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense);
1233: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense);
1234: mat->ops->shift = MatShift_MPIDense;
1235: }
1236: if (d->cmat) MatBindToCPU(d->cmat, bind);
1237: return 0;
1238: }
1240: PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1241: {
1242: Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1243: PetscBool iscuda;
1246: PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda);
1247: if (!iscuda) return 0;
1248: PetscLayoutSetUp(A->rmap);
1249: PetscLayoutSetUp(A->cmap);
1250: if (!d->A) {
1251: MatCreate(PETSC_COMM_SELF, &d->A);
1252: MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N);
1253: }
1254: MatSetType(d->A, MATSEQDENSECUDA);
1255: MatSeqDenseCUDASetPreallocation(d->A, d_data);
1256: A->preallocated = PETSC_TRUE;
1257: A->assembled = PETSC_TRUE;
1258: return 0;
1259: }
1260: #endif
1262: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1263: {
1264: Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1266: MatSetRandom(d->A, rctx);
1267: #if defined(PETSC_HAVE_DEVICE)
1268: x->offloadmask = d->A->offloadmask;
1269: #endif
1270: return 0;
1271: }
1273: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1274: {
1275: *missing = PETSC_FALSE;
1276: return 0;
1277: }
1279: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1280: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1281: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1282: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1283: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1284: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1286: /* -------------------------------------------------------------------*/
1287: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1288: MatGetRow_MPIDense,
1289: MatRestoreRow_MPIDense,
1290: MatMult_MPIDense,
1291: /* 4*/ MatMultAdd_MPIDense,
1292: MatMultTranspose_MPIDense,
1293: MatMultTransposeAdd_MPIDense,
1294: NULL,
1295: NULL,
1296: NULL,
1297: /* 10*/ NULL,
1298: NULL,
1299: NULL,
1300: NULL,
1301: MatTranspose_MPIDense,
1302: /* 15*/ MatGetInfo_MPIDense,
1303: MatEqual_MPIDense,
1304: MatGetDiagonal_MPIDense,
1305: MatDiagonalScale_MPIDense,
1306: MatNorm_MPIDense,
1307: /* 20*/ MatAssemblyBegin_MPIDense,
1308: MatAssemblyEnd_MPIDense,
1309: MatSetOption_MPIDense,
1310: MatZeroEntries_MPIDense,
1311: /* 24*/ MatZeroRows_MPIDense,
1312: NULL,
1313: NULL,
1314: NULL,
1315: NULL,
1316: /* 29*/ MatSetUp_MPIDense,
1317: NULL,
1318: NULL,
1319: MatGetDiagonalBlock_MPIDense,
1320: NULL,
1321: /* 34*/ MatDuplicate_MPIDense,
1322: NULL,
1323: NULL,
1324: NULL,
1325: NULL,
1326: /* 39*/ MatAXPY_MPIDense,
1327: MatCreateSubMatrices_MPIDense,
1328: NULL,
1329: MatGetValues_MPIDense,
1330: MatCopy_MPIDense,
1331: /* 44*/ NULL,
1332: MatScale_MPIDense,
1333: MatShift_MPIDense,
1334: NULL,
1335: NULL,
1336: /* 49*/ MatSetRandom_MPIDense,
1337: NULL,
1338: NULL,
1339: NULL,
1340: NULL,
1341: /* 54*/ NULL,
1342: NULL,
1343: NULL,
1344: NULL,
1345: NULL,
1346: /* 59*/ MatCreateSubMatrix_MPIDense,
1347: MatDestroy_MPIDense,
1348: MatView_MPIDense,
1349: NULL,
1350: NULL,
1351: /* 64*/ NULL,
1352: NULL,
1353: NULL,
1354: NULL,
1355: NULL,
1356: /* 69*/ NULL,
1357: NULL,
1358: NULL,
1359: NULL,
1360: NULL,
1361: /* 74*/ NULL,
1362: NULL,
1363: NULL,
1364: NULL,
1365: NULL,
1366: /* 79*/ NULL,
1367: NULL,
1368: NULL,
1369: NULL,
1370: /* 83*/ MatLoad_MPIDense,
1371: NULL,
1372: NULL,
1373: NULL,
1374: NULL,
1375: NULL,
1376: /* 89*/ NULL,
1377: NULL,
1378: NULL,
1379: NULL,
1380: NULL,
1381: /* 94*/ NULL,
1382: NULL,
1383: MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1384: MatMatTransposeMultNumeric_MPIDense_MPIDense,
1385: NULL,
1386: /* 99*/ MatProductSetFromOptions_MPIDense,
1387: NULL,
1388: NULL,
1389: MatConjugate_MPIDense,
1390: NULL,
1391: /*104*/ NULL,
1392: MatRealPart_MPIDense,
1393: MatImaginaryPart_MPIDense,
1394: NULL,
1395: NULL,
1396: /*109*/ NULL,
1397: NULL,
1398: NULL,
1399: MatGetColumnVector_MPIDense,
1400: MatMissingDiagonal_MPIDense,
1401: /*114*/ NULL,
1402: NULL,
1403: NULL,
1404: NULL,
1405: NULL,
1406: /*119*/ NULL,
1407: NULL,
1408: NULL,
1409: NULL,
1410: NULL,
1411: /*124*/ NULL,
1412: MatGetColumnReductions_MPIDense,
1413: NULL,
1414: NULL,
1415: NULL,
1416: /*129*/ NULL,
1417: NULL,
1418: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1419: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1420: NULL,
1421: /*134*/ NULL,
1422: NULL,
1423: NULL,
1424: NULL,
1425: NULL,
1426: /*139*/ NULL,
1427: NULL,
1428: NULL,
1429: NULL,
1430: NULL,
1431: MatCreateMPIMatConcatenateSeqMat_MPIDense,
1432: /*145*/ NULL,
1433: NULL,
1434: NULL,
1435: NULL,
1436: NULL,
1437: /*150*/ NULL};
1439: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1440: {
1441: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1442: PetscBool iscuda;
1445: PetscLayoutSetUp(mat->rmap);
1446: PetscLayoutSetUp(mat->cmap);
1447: if (!a->A) {
1448: MatCreate(PETSC_COMM_SELF, &a->A);
1449: MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N);
1450: }
1451: PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda);
1452: MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE);
1453: MatSeqDenseSetPreallocation(a->A, data);
1454: #if defined(PETSC_HAVE_DEVICE)
1455: mat->offloadmask = a->A->offloadmask;
1456: #endif
1457: mat->preallocated = PETSC_TRUE;
1458: mat->assembled = PETSC_TRUE;
1459: return 0;
1460: }
1462: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1463: {
1464: Mat B, C;
1466: MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C);
1467: MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B);
1468: MatDestroy(&C);
1469: if (reuse == MAT_REUSE_MATRIX) {
1470: C = *newmat;
1471: } else C = NULL;
1472: MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C);
1473: MatDestroy(&B);
1474: if (reuse == MAT_INPLACE_MATRIX) {
1475: MatHeaderReplace(A, &C);
1476: } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1477: return 0;
1478: }
1480: PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1481: {
1482: Mat B, C;
1484: MatDenseGetLocalMatrix(A, &C);
1485: MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B);
1486: if (reuse == MAT_REUSE_MATRIX) {
1487: C = *newmat;
1488: } else C = NULL;
1489: MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C);
1490: MatDestroy(&B);
1491: if (reuse == MAT_INPLACE_MATRIX) {
1492: MatHeaderReplace(A, &C);
1493: } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1494: return 0;
1495: }
1497: #if defined(PETSC_HAVE_ELEMENTAL)
1498: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1499: {
1500: Mat mat_elemental;
1501: PetscScalar *v;
1502: PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;
1504: if (reuse == MAT_REUSE_MATRIX) {
1505: mat_elemental = *newmat;
1506: MatZeroEntries(*newmat);
1507: } else {
1508: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1509: MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N);
1510: MatSetType(mat_elemental, MATELEMENTAL);
1511: MatSetUp(mat_elemental);
1512: MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE);
1513: }
1515: PetscMalloc2(m, &rows, N, &cols);
1516: for (i = 0; i < N; i++) cols[i] = i;
1517: for (i = 0; i < m; i++) rows[i] = rstart + i;
1519: /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1520: MatDenseGetArray(A, &v);
1521: MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES);
1522: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1523: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1524: MatDenseRestoreArray(A, &v);
1525: PetscFree2(rows, cols);
1527: if (reuse == MAT_INPLACE_MATRIX) {
1528: MatHeaderReplace(A, &mat_elemental);
1529: } else {
1530: *newmat = mat_elemental;
1531: }
1532: return 0;
1533: }
1534: #endif
1536: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1537: {
1538: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1540: MatDenseGetColumn(mat->A, col, vals);
1541: return 0;
1542: }
1544: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1545: {
1546: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1548: MatDenseRestoreColumn(mat->A, vals);
1549: return 0;
1550: }
1552: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1553: {
1554: Mat_MPIDense *mat;
1555: PetscInt m, nloc, N;
1557: MatGetSize(inmat, &m, &N);
1558: MatGetLocalSize(inmat, NULL, &nloc);
1559: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1560: PetscInt sum;
1562: if (n == PETSC_DECIDE) PetscSplitOwnership(comm, &n, &N);
1563: /* Check sum(n) = N */
1564: MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm);
1567: MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat);
1568: MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
1569: }
1571: /* numeric phase */
1572: mat = (Mat_MPIDense *)(*outmat)->data;
1573: MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN);
1574: return 0;
1575: }
1577: #if defined(PETSC_HAVE_CUDA)
1578: PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1579: {
1580: Mat B;
1581: Mat_MPIDense *m;
1583: if (reuse == MAT_INITIAL_MATRIX) {
1584: MatDuplicate(M, MAT_COPY_VALUES, newmat);
1585: } else if (reuse == MAT_REUSE_MATRIX) {
1586: MatCopy(M, *newmat, SAME_NONZERO_PATTERN);
1587: }
1589: B = *newmat;
1590: MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE);
1591: PetscFree(B->defaultvectype);
1592: PetscStrallocpy(VECSTANDARD, &B->defaultvectype);
1593: PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE);
1594: PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL);
1595: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL);
1596: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL);
1597: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL);
1598: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL);
1599: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL);
1600: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL);
1601: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL);
1602: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL);
1603: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL);
1604: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL);
1605: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL);
1606: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL);
1607: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL);
1608: m = (Mat_MPIDense *)(B)->data;
1609: if (m->A) MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A);
1610: B->ops->bindtocpu = NULL;
1611: B->offloadmask = PETSC_OFFLOAD_CPU;
1612: return 0;
1613: }
1615: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1616: {
1617: Mat B;
1618: Mat_MPIDense *m;
1620: if (reuse == MAT_INITIAL_MATRIX) {
1621: MatDuplicate(M, MAT_COPY_VALUES, newmat);
1622: } else if (reuse == MAT_REUSE_MATRIX) {
1623: MatCopy(M, *newmat, SAME_NONZERO_PATTERN);
1624: }
1626: B = *newmat;
1627: PetscFree(B->defaultvectype);
1628: PetscStrallocpy(VECCUDA, &B->defaultvectype);
1629: PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA);
1630: PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);
1631: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1632: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1633: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1634: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1635: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);
1636: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);
1637: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);
1638: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);
1639: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);
1640: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);
1641: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);
1642: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);
1643: PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);
1644: m = (Mat_MPIDense *)(B->data);
1645: if (m->A) {
1646: MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A);
1647: B->offloadmask = PETSC_OFFLOAD_BOTH;
1648: } else {
1649: B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1650: }
1651: MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE);
1653: B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1654: return 0;
1655: }
1656: #endif
1658: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1659: {
1660: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1661: PetscInt lda;
1665: if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1666: a->vecinuse = col + 1;
1667: MatDenseGetLDA(a->A, &lda);
1668: MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse);
1669: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1670: *v = a->cvec;
1671: return 0;
1672: }
1674: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1675: {
1676: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1680: a->vecinuse = 0;
1681: MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse);
1682: VecResetArray(a->cvec);
1683: if (v) *v = NULL;
1684: return 0;
1685: }
1687: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1688: {
1689: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1690: PetscInt lda;
1694: if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1695: a->vecinuse = col + 1;
1696: MatDenseGetLDA(a->A, &lda);
1697: MatDenseGetArrayRead(a->A, &a->ptrinuse);
1698: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1699: VecLockReadPush(a->cvec);
1700: *v = a->cvec;
1701: return 0;
1702: }
1704: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1705: {
1706: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1710: a->vecinuse = 0;
1711: MatDenseRestoreArrayRead(a->A, &a->ptrinuse);
1712: VecLockReadPop(a->cvec);
1713: VecResetArray(a->cvec);
1714: if (v) *v = NULL;
1715: return 0;
1716: }
1718: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1719: {
1720: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1721: PetscInt lda;
1725: if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1726: a->vecinuse = col + 1;
1727: MatDenseGetLDA(a->A, &lda);
1728: MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1729: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1730: *v = a->cvec;
1731: return 0;
1732: }
1734: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1735: {
1736: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1740: a->vecinuse = 0;
1741: MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1742: VecResetArray(a->cvec);
1743: if (v) *v = NULL;
1744: return 0;
1745: }
1747: PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1748: {
1749: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1750: Mat_MPIDense *c;
1751: MPI_Comm comm;
1752: PetscInt pbegin, pend;
1754: PetscObjectGetComm((PetscObject)A, &comm);
1757: pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1758: pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1759: if (!a->cmat) {
1760: MatCreate(comm, &a->cmat);
1761: MatSetType(a->cmat, ((PetscObject)A)->type_name);
1762: if (rend - rbegin == A->rmap->N) PetscLayoutReference(A->rmap, &a->cmat->rmap);
1763: else {
1764: PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin);
1765: PetscLayoutSetSize(a->cmat->rmap, rend - rbegin);
1766: PetscLayoutSetUp(a->cmat->rmap);
1767: }
1768: PetscLayoutSetSize(a->cmat->cmap, cend - cbegin);
1769: PetscLayoutSetUp(a->cmat->cmap);
1770: } else {
1771: PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1772: if (same && a->cmat->rmap->N != A->rmap->N) {
1773: same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1774: MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
1775: }
1776: if (!same) {
1777: PetscLayoutDestroy(&a->cmat->rmap);
1778: PetscLayoutCreate(comm, &a->cmat->rmap);
1779: PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin);
1780: PetscLayoutSetSize(a->cmat->rmap, rend - rbegin);
1781: PetscLayoutSetUp(a->cmat->rmap);
1782: }
1783: if (cend - cbegin != a->cmat->cmap->N) {
1784: PetscLayoutDestroy(&a->cmat->cmap);
1785: PetscLayoutCreate(comm, &a->cmat->cmap);
1786: PetscLayoutSetSize(a->cmat->cmap, cend - cbegin);
1787: PetscLayoutSetUp(a->cmat->cmap);
1788: }
1789: }
1790: c = (Mat_MPIDense *)a->cmat->data;
1792: MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A);
1794: a->cmat->preallocated = PETSC_TRUE;
1795: a->cmat->assembled = PETSC_TRUE;
1796: #if defined(PETSC_HAVE_DEVICE)
1797: a->cmat->offloadmask = c->A->offloadmask;
1798: #endif
1799: a->matinuse = cbegin + 1;
1800: *v = a->cmat;
1801: return 0;
1802: }
1804: PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1805: {
1806: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1807: Mat_MPIDense *c;
1812: a->matinuse = 0;
1813: c = (Mat_MPIDense *)a->cmat->data;
1814: MatDenseRestoreSubMatrix(a->A, &c->A);
1815: if (v) *v = NULL;
1816: #if defined(PETSC_HAVE_DEVICE)
1817: A->offloadmask = a->A->offloadmask;
1818: #endif
1819: return 0;
1820: }
1822: /*MC
1823: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1825: Options Database Keys:
1826: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1828: Level: beginner
1830: .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1831: M*/
1832: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1833: {
1834: Mat_MPIDense *a;
1836: PetscNew(&a);
1837: mat->data = (void *)a;
1838: PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps));
1840: mat->insertmode = NOT_SET_VALUES;
1842: /* build cache for off array entries formed */
1843: a->donotstash = PETSC_FALSE;
1845: MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash);
1847: /* stuff used for matrix vector multiply */
1848: a->lvec = NULL;
1849: a->Mvctx = NULL;
1850: a->roworiented = PETSC_TRUE;
1852: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense);
1853: PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense);
1854: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense);
1855: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense);
1856: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense);
1857: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense);
1858: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense);
1859: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense);
1860: PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense);
1861: PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense);
1862: PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense);
1863: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense);
1864: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense);
1865: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense);
1866: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense);
1867: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense);
1868: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense);
1869: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense);
1870: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense);
1871: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense);
1872: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ);
1873: #if defined(PETSC_HAVE_ELEMENTAL)
1874: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental);
1875: #endif
1876: #if defined(PETSC_HAVE_SCALAPACK)
1877: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK);
1878: #endif
1879: #if defined(PETSC_HAVE_CUDA)
1880: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA);
1881: #endif
1882: PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense);
1883: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1884: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1885: #if defined(PETSC_HAVE_CUDA)
1886: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1887: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1888: #endif
1890: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense);
1891: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense);
1892: PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE);
1893: return 0;
1894: }
1896: /*MC
1897: MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1899: Options Database Keys:
1900: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`
1902: Level: beginner
1904: .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`
1905: M*/
1906: #if defined(PETSC_HAVE_CUDA)
1907: #include <petsc/private/deviceimpl.h>
1908: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1909: {
1910: PetscDeviceInitialize(PETSC_DEVICE_CUDA);
1911: MatCreate_MPIDense(B);
1912: MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B);
1913: return 0;
1914: }
1915: #endif
1917: /*MC
1918: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1920: This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1921: and `MATMPIDENSE` otherwise.
1923: Options Database Keys:
1924: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1926: Level: beginner
1928: .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
1929: M*/
1931: /*MC
1932: MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1934: This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
1935: and `MATMPIDENSECUDA` otherwise.
1937: Options Database Keys:
1938: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`
1940: Level: beginner
1942: .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1943: M*/
1945: /*@C
1946: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1948: Collective
1950: Input Parameters:
1951: . B - the matrix
1952: - data - optional location of matrix data. Set data=NULL for PETSc
1953: to control all matrix memory allocation.
1955: Notes:
1956: The dense format is fully compatible with standard Fortran 77
1957: storage by columns.
1959: The data input variable is intended primarily for Fortran programmers
1960: who wish to allocate their own matrix memory space. Most users should
1961: set data=NULL.
1963: Level: intermediate
1965: .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1966: @*/
1967: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1968: {
1970: PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1971: return 0;
1972: }
1974: /*@
1975: MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1976: array provided by the user. This is useful to avoid copying an array
1977: into a matrix
1979: Not Collective
1981: Input Parameters:
1982: + mat - the matrix
1983: - array - the array in column major order
1985: Note:
1986: You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1987: freed when the matrix is destroyed.
1989: Level: developer
1991: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1992: `MatDenseReplaceArray()`
1993: @*/
1994: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1995: {
1997: PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1998: PetscObjectStateIncrease((PetscObject)mat);
1999: #if defined(PETSC_HAVE_DEVICE)
2000: mat->offloadmask = PETSC_OFFLOAD_CPU;
2001: #endif
2002: return 0;
2003: }
2005: /*@
2006: MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
2008: Not Collective
2010: Input Parameters:
2011: . mat - the matrix
2013: Note:
2014: You can only call this after a call to `MatDensePlaceArray()`
2016: Level: developer
2018: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2019: @*/
2020: PetscErrorCode MatDenseResetArray(Mat mat)
2021: {
2023: PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2024: PetscObjectStateIncrease((PetscObject)mat);
2025: return 0;
2026: }
2028: /*@
2029: MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2030: array provided by the user. This is useful to avoid copying an array
2031: into a matrix
2033: Not Collective
2035: Input Parameters:
2036: + mat - the matrix
2037: - array - the array in column major order
2039: Note:
2040: The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2041: freed by the user. It will be freed when the matrix is destroyed.
2043: Level: developer
2045: .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2046: @*/
2047: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2048: {
2050: PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2051: PetscObjectStateIncrease((PetscObject)mat);
2052: #if defined(PETSC_HAVE_DEVICE)
2053: mat->offloadmask = PETSC_OFFLOAD_CPU;
2054: #endif
2055: return 0;
2056: }
2058: #if defined(PETSC_HAVE_CUDA)
2059: /*@C
2060: MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2061: array provided by the user. This is useful to avoid copying an array
2062: into a matrix
2064: Not Collective
2066: Input Parameters:
2067: + mat - the matrix
2068: - array - the array in column major order
2070: Note:
2071: You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2072: freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2074: Level: developer
2076: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2077: @*/
2078: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2079: {
2081: PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2082: PetscObjectStateIncrease((PetscObject)mat);
2083: mat->offloadmask = PETSC_OFFLOAD_GPU;
2084: return 0;
2085: }
2087: /*@C
2088: MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`
2090: Not Collective
2092: Input Parameters:
2093: . mat - the matrix
2095: Note:
2096: You can only call this after a call to `MatDenseCUDAPlaceArray()`
2098: Level: developer
2100: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2101: @*/
2102: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2103: {
2105: PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2106: PetscObjectStateIncrease((PetscObject)mat);
2107: return 0;
2108: }
2110: /*@C
2111: MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2112: array provided by the user. This is useful to avoid copying an array
2113: into a matrix
2115: Not Collective
2117: Input Parameters:
2118: + mat - the matrix
2119: - array - the array in column major order
2121: Note:
2122: This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2123: The memory passed in CANNOT be freed by the user. It will be freed
2124: when the matrix is destroyed. The array should respect the matrix leading dimension.
2126: Level: developer
2128: .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2129: @*/
2130: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2131: {
2133: PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2134: PetscObjectStateIncrease((PetscObject)mat);
2135: mat->offloadmask = PETSC_OFFLOAD_GPU;
2136: return 0;
2137: }
2139: /*@C
2140: MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.
2142: Not Collective
2144: Input Parameters:
2145: . A - the matrix
2147: Output Parameters
2148: . array - the GPU array in column major order
2150: Notes:
2151: The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.
2153: The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
2155: Level: developer
2157: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2158: @*/
2159: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2160: {
2162: PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2163: PetscObjectStateIncrease((PetscObject)A);
2164: return 0;
2165: }
2167: /*@C
2168: MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
2170: Not Collective
2172: Input Parameters:
2173: + A - the matrix
2174: - array - the GPU array in column major order
2176: Level: developer
2178: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2179: @*/
2180: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2181: {
2183: PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2184: PetscObjectStateIncrease((PetscObject)A);
2185: A->offloadmask = PETSC_OFFLOAD_GPU;
2186: return 0;
2187: }
2189: /*@C
2190: MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.
2192: Not Collective
2194: Input Parameters:
2195: . A - the matrix
2197: Output Parameters
2198: . array - the GPU array in column major order
2200: Note:
2201: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2203: Level: developer
2205: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2206: @*/
2207: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2208: {
2210: PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2211: return 0;
2212: }
2214: /*@C
2215: MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
2217: Not Collective
2219: Input Parameters:
2220: + A - the matrix
2221: - array - the GPU array in column major order
2223: Note:
2224: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.
2226: Level: developer
2228: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2229: @*/
2230: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2231: {
2232: PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2233: return 0;
2234: }
2236: /*@C
2237: MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
2239: Not Collective
2241: Input Parameters:
2242: . A - the matrix
2244: Output Parameters
2245: . array - the GPU array in column major order
2247: Note:
2248: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.
2250: Level: developer
2252: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2253: @*/
2254: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2255: {
2257: PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2258: PetscObjectStateIncrease((PetscObject)A);
2259: return 0;
2260: }
2262: /*@C
2263: MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.
2265: Not Collective
2267: Input Parameters:
2268: + A - the matrix
2269: - array - the GPU array in column major order
2271: Level: developer
2273: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2274: @*/
2275: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2276: {
2278: PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2279: PetscObjectStateIncrease((PetscObject)A);
2280: A->offloadmask = PETSC_OFFLOAD_GPU;
2281: return 0;
2282: }
2283: #endif
2285: /*@C
2286: MatCreateDense - Creates a matrix in `MATDENSE` format.
2288: Collective
2290: Input Parameters:
2291: + comm - MPI communicator
2292: . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2293: . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2294: . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2295: . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2296: - data - optional location of matrix data. Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2297: to control all matrix memory allocation.
2299: Output Parameter:
2300: . A - the matrix
2302: Notes:
2303: The dense format is fully compatible with standard Fortran 77
2304: storage by columns.
2306: Although local portions of the matrix are stored in column-major
2307: order, the matrix is partitioned across MPI ranks by row.
2309: The data input variable is intended primarily for Fortran programmers
2310: who wish to allocate their own matrix memory space. Most users should
2311: set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).
2313: The user MUST specify either the local or global matrix dimensions
2314: (possibly both).
2316: Level: intermediate
2318: .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2319: @*/
2320: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
2321: {
2322: MatCreate(comm, A);
2323: MatSetSizes(*A, m, n, M, N);
2324: MatSetType(*A, MATDENSE);
2325: MatSeqDenseSetPreallocation(*A, data);
2326: MatMPIDenseSetPreallocation(*A, data);
2327: return 0;
2328: }
2330: #if defined(PETSC_HAVE_CUDA)
2331: /*@C
2332: MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
2334: Collective
2336: Input Parameters:
2337: + comm - MPI communicator
2338: . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2339: . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2340: . M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2341: . N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2342: - data - optional location of GPU matrix data. Set data=NULL for PETSc
2343: to control matrix memory allocation.
2345: Output Parameter:
2346: . A - the matrix
2348: Level: intermediate
2350: .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
2351: @*/
2352: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
2353: {
2354: MatCreate(comm, A);
2356: MatSetSizes(*A, m, n, M, N);
2357: MatSetType(*A, MATDENSECUDA);
2358: MatSeqDenseCUDASetPreallocation(*A, data);
2359: MatMPIDenseCUDASetPreallocation(*A, data);
2360: return 0;
2361: }
2362: #endif
2364: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
2365: {
2366: Mat mat;
2367: Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
2369: *newmat = NULL;
2370: MatCreate(PetscObjectComm((PetscObject)A), &mat);
2371: MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
2372: MatSetType(mat, ((PetscObject)A)->type_name);
2373: a = (Mat_MPIDense *)mat->data;
2375: mat->factortype = A->factortype;
2376: mat->assembled = PETSC_TRUE;
2377: mat->preallocated = PETSC_TRUE;
2379: mat->insertmode = NOT_SET_VALUES;
2380: a->donotstash = oldmat->donotstash;
2382: PetscLayoutReference(A->rmap, &mat->rmap);
2383: PetscLayoutReference(A->cmap, &mat->cmap);
2385: MatDuplicate(oldmat->A, cpvalues, &a->A);
2387: *newmat = mat;
2388: return 0;
2389: }
2391: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2392: {
2393: PetscBool isbinary;
2394: #if defined(PETSC_HAVE_HDF5)
2395: PetscBool ishdf5;
2396: #endif
2400: /* force binary viewer to load .info file if it has not yet done so */
2401: PetscViewerSetUp(viewer);
2402: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
2403: #if defined(PETSC_HAVE_HDF5)
2404: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
2405: #endif
2406: if (isbinary) {
2407: MatLoad_Dense_Binary(newMat, viewer);
2408: #if defined(PETSC_HAVE_HDF5)
2409: } else if (ishdf5) {
2410: MatLoad_Dense_HDF5(newMat, viewer);
2411: #endif
2412: } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2413: return 0;
2414: }
2416: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2417: {
2418: Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2419: Mat a, b;
2421: a = matA->A;
2422: b = matB->A;
2423: MatEqual(a, b, flag);
2424: MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
2425: return 0;
2426: }
2428: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2429: {
2430: Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2432: PetscFree2(atb->sendbuf, atb->recvcounts);
2433: MatDestroy(&atb->atb);
2434: PetscFree(atb);
2435: return 0;
2436: }
2438: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2439: {
2440: Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2442: PetscFree2(abt->buf[0], abt->buf[1]);
2443: PetscFree2(abt->recvcounts, abt->recvdispls);
2444: PetscFree(abt);
2445: return 0;
2446: }
2448: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2449: {
2450: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2451: Mat_TransMatMultDense *atb;
2452: MPI_Comm comm;
2453: PetscMPIInt size, *recvcounts;
2454: PetscScalar *carray, *sendbuf;
2455: const PetscScalar *atbarray;
2456: PetscInt i, cN = C->cmap->N, proc, k, j, lda;
2457: const PetscInt *ranges;
2459: MatCheckProduct(C, 3);
2461: atb = (Mat_TransMatMultDense *)C->product->data;
2462: recvcounts = atb->recvcounts;
2463: sendbuf = atb->sendbuf;
2465: PetscObjectGetComm((PetscObject)A, &comm);
2466: MPI_Comm_size(comm, &size);
2468: /* compute atbarray = aseq^T * bseq */
2469: MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb);
2471: MatGetOwnershipRanges(C, &ranges);
2473: /* arrange atbarray into sendbuf */
2474: MatDenseGetArrayRead(atb->atb, &atbarray);
2475: MatDenseGetLDA(atb->atb, &lda);
2476: for (proc = 0, k = 0; proc < size; proc++) {
2477: for (j = 0; j < cN; j++) {
2478: for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2479: }
2480: }
2481: MatDenseRestoreArrayRead(atb->atb, &atbarray);
2483: /* sum all atbarray to local values of C */
2484: MatDenseGetArrayWrite(c->A, &carray);
2485: MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm);
2486: MatDenseRestoreArrayWrite(c->A, &carray);
2487: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
2488: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
2489: return 0;
2490: }
2492: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2493: {
2494: MPI_Comm comm;
2495: PetscMPIInt size;
2496: PetscInt cm = A->cmap->n, cM, cN = B->cmap->N;
2497: Mat_TransMatMultDense *atb;
2498: PetscBool cisdense;
2499: PetscInt i;
2500: const PetscInt *ranges;
2502: MatCheckProduct(C, 4);
2504: PetscObjectGetComm((PetscObject)A, &comm);
2505: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2506: SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2507: }
2509: /* create matrix product C */
2510: MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N);
2511: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "");
2512: if (!cisdense) MatSetType(C, ((PetscObject)A)->type_name);
2513: MatSetUp(C);
2515: /* create data structure for reuse C */
2516: MPI_Comm_size(comm, &size);
2517: PetscNew(&atb);
2518: cM = C->rmap->N;
2519: PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts);
2520: MatGetOwnershipRanges(C, &ranges);
2521: for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2523: C->product->data = atb;
2524: C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2525: return 0;
2526: }
2528: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2529: {
2530: MPI_Comm comm;
2531: PetscMPIInt i, size;
2532: PetscInt maxRows, bufsiz;
2533: PetscMPIInt tag;
2534: PetscInt alg;
2535: Mat_MatTransMultDense *abt;
2536: Mat_Product *product = C->product;
2537: PetscBool flg;
2539: MatCheckProduct(C, 4);
2541: /* check local size of A and B */
2544: PetscStrcmp(product->alg, "allgatherv", &flg);
2545: alg = flg ? 0 : 1;
2547: /* setup matrix product C */
2548: MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N);
2549: MatSetType(C, MATMPIDENSE);
2550: MatSetUp(C);
2551: PetscObjectGetNewTag((PetscObject)C, &tag);
2553: /* create data structure for reuse C */
2554: PetscObjectGetComm((PetscObject)C, &comm);
2555: MPI_Comm_size(comm, &size);
2556: PetscNew(&abt);
2557: abt->tag = tag;
2558: abt->alg = alg;
2559: switch (alg) {
2560: case 1: /* alg: "cyclic" */
2561: for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2562: bufsiz = A->cmap->N * maxRows;
2563: PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]));
2564: break;
2565: default: /* alg: "allgatherv" */
2566: PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
2567: PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls));
2568: for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2569: for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2570: break;
2571: }
2573: C->product->data = abt;
2574: C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2575: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2576: return 0;
2577: }
2579: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2580: {
2581: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2582: Mat_MatTransMultDense *abt;
2583: MPI_Comm comm;
2584: PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2585: PetscScalar *sendbuf, *recvbuf = NULL, *cv;
2586: PetscInt i, cK = A->cmap->N, k, j, bn;
2587: PetscScalar _DOne = 1.0, _DZero = 0.0;
2588: const PetscScalar *av, *bv;
2589: PetscBLASInt cm, cn, ck, alda, blda = 0, clda;
2590: MPI_Request reqs[2];
2591: const PetscInt *ranges;
2593: MatCheckProduct(C, 3);
2595: abt = (Mat_MatTransMultDense *)C->product->data;
2596: PetscObjectGetComm((PetscObject)C, &comm);
2597: MPI_Comm_rank(comm, &rank);
2598: MPI_Comm_size(comm, &size);
2599: MatDenseGetArrayRead(a->A, &av);
2600: MatDenseGetArrayRead(b->A, &bv);
2601: MatDenseGetArrayWrite(c->A, &cv);
2602: MatDenseGetLDA(a->A, &i);
2603: PetscBLASIntCast(i, &alda);
2604: MatDenseGetLDA(b->A, &i);
2605: PetscBLASIntCast(i, &blda);
2606: MatDenseGetLDA(c->A, &i);
2607: PetscBLASIntCast(i, &clda);
2608: MatGetOwnershipRanges(B, &ranges);
2609: bn = B->rmap->n;
2610: if (blda == bn) {
2611: sendbuf = (PetscScalar *)bv;
2612: } else {
2613: sendbuf = abt->buf[0];
2614: for (k = 0, i = 0; i < cK; i++) {
2615: for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2616: }
2617: }
2618: if (size > 1) {
2619: sendto = (rank + size - 1) % size;
2620: recvfrom = (rank + size + 1) % size;
2621: } else {
2622: sendto = recvfrom = 0;
2623: }
2624: PetscBLASIntCast(cK, &ck);
2625: PetscBLASIntCast(c->A->rmap->n, &cm);
2626: recvisfrom = rank;
2627: for (i = 0; i < size; i++) {
2628: /* we have finished receiving in sending, bufs can be read/modified */
2629: PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2630: PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2632: if (nextrecvisfrom != rank) {
2633: /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2634: sendsiz = cK * bn;
2635: recvsiz = cK * nextbn;
2636: recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2637: MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
2638: MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
2639: }
2641: /* local aseq * sendbuf^T */
2642: PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
2643: if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2645: if (nextrecvisfrom != rank) {
2646: /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2647: MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
2648: }
2649: bn = nextbn;
2650: recvisfrom = nextrecvisfrom;
2651: sendbuf = recvbuf;
2652: }
2653: MatDenseRestoreArrayRead(a->A, &av);
2654: MatDenseRestoreArrayRead(b->A, &bv);
2655: MatDenseRestoreArrayWrite(c->A, &cv);
2656: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
2657: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
2658: return 0;
2659: }
2661: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2662: {
2663: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2664: Mat_MatTransMultDense *abt;
2665: MPI_Comm comm;
2666: PetscMPIInt size;
2667: PetscScalar *cv, *sendbuf, *recvbuf;
2668: const PetscScalar *av, *bv;
2669: PetscInt blda, i, cK = A->cmap->N, k, j, bn;
2670: PetscScalar _DOne = 1.0, _DZero = 0.0;
2671: PetscBLASInt cm, cn, ck, alda, clda;
2673: MatCheckProduct(C, 3);
2675: abt = (Mat_MatTransMultDense *)C->product->data;
2676: PetscObjectGetComm((PetscObject)A, &comm);
2677: MPI_Comm_size(comm, &size);
2678: MatDenseGetArrayRead(a->A, &av);
2679: MatDenseGetArrayRead(b->A, &bv);
2680: MatDenseGetArrayWrite(c->A, &cv);
2681: MatDenseGetLDA(a->A, &i);
2682: PetscBLASIntCast(i, &alda);
2683: MatDenseGetLDA(b->A, &blda);
2684: MatDenseGetLDA(c->A, &i);
2685: PetscBLASIntCast(i, &clda);
2686: /* copy transpose of B into buf[0] */
2687: bn = B->rmap->n;
2688: sendbuf = abt->buf[0];
2689: recvbuf = abt->buf[1];
2690: for (k = 0, j = 0; j < bn; j++) {
2691: for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2692: }
2693: MatDenseRestoreArrayRead(b->A, &bv);
2694: MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
2695: PetscBLASIntCast(cK, &ck);
2696: PetscBLASIntCast(c->A->rmap->n, &cm);
2697: PetscBLASIntCast(c->A->cmap->n, &cn);
2698: if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2699: MatDenseRestoreArrayRead(a->A, &av);
2700: MatDenseRestoreArrayRead(b->A, &bv);
2701: MatDenseRestoreArrayWrite(c->A, &cv);
2702: MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
2703: MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
2704: return 0;
2705: }
2707: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2708: {
2709: Mat_MatTransMultDense *abt;
2711: MatCheckProduct(C, 3);
2713: abt = (Mat_MatTransMultDense *)C->product->data;
2714: switch (abt->alg) {
2715: case 1:
2716: MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
2717: break;
2718: default:
2719: MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
2720: break;
2721: }
2722: return 0;
2723: }
2725: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2726: {
2727: Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2729: MatDestroy(&ab->Ce);
2730: MatDestroy(&ab->Ae);
2731: MatDestroy(&ab->Be);
2732: PetscFree(ab);
2733: return 0;
2734: }
2736: #if defined(PETSC_HAVE_ELEMENTAL)
2737: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2738: {
2739: Mat_MatMultDense *ab;
2741: MatCheckProduct(C, 3);
2743: ab = (Mat_MatMultDense *)C->product->data;
2744: MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae);
2745: MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be);
2746: MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce);
2747: MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C);
2748: return 0;
2749: }
2751: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2752: {
2753: Mat Ae, Be, Ce;
2754: Mat_MatMultDense *ab;
2756: MatCheckProduct(C, 4);
2758: /* check local size of A and B */
2759: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2760: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2761: }
2763: /* create elemental matrices Ae and Be */
2764: MatCreate(PetscObjectComm((PetscObject)A), &Ae);
2765: MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N);
2766: MatSetType(Ae, MATELEMENTAL);
2767: MatSetUp(Ae);
2768: MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE);
2770: MatCreate(PetscObjectComm((PetscObject)B), &Be);
2771: MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N);
2772: MatSetType(Be, MATELEMENTAL);
2773: MatSetUp(Be);
2774: MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE);
2776: /* compute symbolic Ce = Ae*Be */
2777: MatCreate(PetscObjectComm((PetscObject)C), &Ce);
2778: MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce);
2780: /* setup C */
2781: MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
2782: MatSetType(C, MATDENSE);
2783: MatSetUp(C);
2785: /* create data structure for reuse Cdense */
2786: PetscNew(&ab);
2787: ab->Ae = Ae;
2788: ab->Be = Be;
2789: ab->Ce = Ce;
2791: C->product->data = ab;
2792: C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2793: C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2794: return 0;
2795: }
2796: #endif
2797: /* ----------------------------------------------- */
2798: #if defined(PETSC_HAVE_ELEMENTAL)
2799: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2800: {
2801: C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2802: C->ops->productsymbolic = MatProductSymbolic_AB;
2803: return 0;
2804: }
2805: #endif
2807: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2808: {
2809: Mat_Product *product = C->product;
2810: Mat A = product->A, B = product->B;
2812: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2813: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2814: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2815: C->ops->productsymbolic = MatProductSymbolic_AtB;
2816: return 0;
2817: }
2819: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2820: {
2821: Mat_Product *product = C->product;
2822: const char *algTypes[2] = {"allgatherv", "cyclic"};
2823: PetscInt alg, nalg = 2;
2824: PetscBool flg = PETSC_FALSE;
2826: /* Set default algorithm */
2827: alg = 0; /* default is allgatherv */
2828: PetscStrcmp(product->alg, "default", &flg);
2829: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2831: /* Get runtime option */
2832: if (product->api_user) {
2833: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2834: PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg);
2835: PetscOptionsEnd();
2836: } else {
2837: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2838: PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg);
2839: PetscOptionsEnd();
2840: }
2841: if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);
2843: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2844: C->ops->productsymbolic = MatProductSymbolic_ABt;
2845: return 0;
2846: }
2848: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2849: {
2850: Mat_Product *product = C->product;
2852: switch (product->type) {
2853: #if defined(PETSC_HAVE_ELEMENTAL)
2854: case MATPRODUCT_AB:
2855: MatProductSetFromOptions_MPIDense_AB(C);
2856: break;
2857: #endif
2858: case MATPRODUCT_AtB:
2859: MatProductSetFromOptions_MPIDense_AtB(C);
2860: break;
2861: case MATPRODUCT_ABt:
2862: MatProductSetFromOptions_MPIDense_ABt(C);
2863: break;
2864: default:
2865: break;
2866: }
2867: return 0;
2868: }