Actual source code: mpidense.c


  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */

  6: #include <../src/mat/impls/dense/mpi/mpidense.h>
  7: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  8: #include <petscblaslapack.h>

 10: /*@
 11:       MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
 12:               matrix that represents the operator. For sequential matrices it returns itself.

 14:     Input Parameter:
 15: .      A - the sequential or MPI dense matrix

 17:     Output Parameter:
 18: .      B - the inner matrix

 20:     Level: intermediate

 22: .seealso: `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
 23: @*/
 24: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
 25: {
 26:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 27:   PetscBool     flg;

 31:   PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg);
 32:   if (flg) *B = mat->A;
 33:   else {
 34:     PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg);
 36:     *B = A;
 37:   }
 38:   return 0;
 39: }

 41: PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
 42: {
 43:   Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
 44:   Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;

 46:   MatCopy(Amat->A, Bmat->A, s);
 47:   return 0;
 48: }

 50: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
 51: {
 52:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 53:   PetscInt      j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
 54:   PetscScalar  *v;

 56:   MatDenseGetArray(mat->A, &v);
 57:   MatDenseGetLDA(mat->A, &lda);
 58:   rend2 = PetscMin(rend, A->cmap->N);
 59:   if (rend2 > rstart) {
 60:     for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
 61:     PetscLogFlops(rend2 - rstart);
 62:   }
 63:   MatDenseRestoreArray(mat->A, &v);
 64:   return 0;
 65: }

 67: PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 68: {
 69:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 70:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 73:   lrow = row - rstart;
 74:   MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v);
 75:   return 0;
 76: }

 78: PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
 79: {
 80:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
 81:   PetscInt      lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;

 84:   lrow = row - rstart;
 85:   MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v);
 86:   return 0;
 87: }

 89: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
 90: {
 91:   Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
 92:   PetscInt      m = A->rmap->n, rstart = A->rmap->rstart;
 93:   PetscScalar  *array;
 94:   MPI_Comm      comm;
 95:   PetscBool     flg;
 96:   Mat           B;

 98:   MatHasCongruentLayouts(A, &flg);
100:   PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B);
101:   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */

103:     PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg);
105:     PetscObjectGetComm((PetscObject)(mdn->A), &comm);
106:     MatCreate(comm, &B);
107:     MatSetSizes(B, m, m, m, m);
108:     MatSetType(B, ((PetscObject)mdn->A)->type_name);
109:     MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array);
110:     MatSeqDenseSetPreallocation(B, array + m * rstart);
111:     MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array);
112:     PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B);
113:     *a = B;
114:     MatDestroy(&B);
115:   } else *a = B;
116:   return 0;
117: }

119: PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
120: {
121:   Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
122:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
123:   PetscBool     roworiented = A->roworiented;

125:   for (i = 0; i < m; i++) {
126:     if (idxm[i] < 0) continue;
128:     if (idxm[i] >= rstart && idxm[i] < rend) {
129:       row = idxm[i] - rstart;
130:       if (roworiented) {
131:         MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv);
132:       } else {
133:         for (j = 0; j < n; j++) {
134:           if (idxn[j] < 0) continue;
136:           MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv);
137:         }
138:       }
139:     } else if (!A->donotstash) {
140:       mat->assembled = PETSC_FALSE;
141:       if (roworiented) {
142:         MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE);
143:       } else {
144:         MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE);
145:       }
146:     }
147:   }
148:   return 0;
149: }

151: PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
152: {
153:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
154:   PetscInt      i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;

156:   for (i = 0; i < m; i++) {
157:     if (idxm[i] < 0) continue; /* negative row */
159:     if (idxm[i] >= rstart && idxm[i] < rend) {
160:       row = idxm[i] - rstart;
161:       for (j = 0; j < n; j++) {
162:         if (idxn[j] < 0) continue; /* negative column */
164:         MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j);
165:       }
166:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
167:   }
168:   return 0;
169: }

171: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
172: {
173:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

175:   MatDenseGetLDA(a->A, lda);
176:   return 0;
177: }

179: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
180: {
181:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
182:   PetscBool     iscuda;

184:   if (!a->A) {
186:     PetscLayoutSetUp(A->rmap);
187:     PetscLayoutSetUp(A->cmap);
188:     MatCreate(PETSC_COMM_SELF, &a->A);
189:     MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N);
190:     PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda);
191:     MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE);
192:   }
193:   MatDenseSetLDA(a->A, lda);
194:   return 0;
195: }

197: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
198: {
199:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

202:   MatDenseGetArray(a->A, array);
203:   return 0;
204: }

206: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
207: {
208:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

211:   MatDenseGetArrayRead(a->A, array);
212:   return 0;
213: }

215: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
216: {
217:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

220:   MatDenseGetArrayWrite(a->A, array);
221:   return 0;
222: }

224: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
225: {
226:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

230:   MatDensePlaceArray(a->A, array);
231:   return 0;
232: }

234: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
235: {
236:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

240:   MatDenseResetArray(a->A);
241:   return 0;
242: }

244: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
245: {
246:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

250:   MatDenseReplaceArray(a->A, array);
251:   return 0;
252: }

254: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
255: {
256:   Mat_MPIDense      *mat = (Mat_MPIDense *)A->data, *newmatd;
257:   PetscInt           lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
258:   const PetscInt    *irow, *icol;
259:   const PetscScalar *v;
260:   PetscScalar       *bv;
261:   Mat                newmat;
262:   IS                 iscol_local;
263:   MPI_Comm           comm_is, comm_mat;

265:   PetscObjectGetComm((PetscObject)A, &comm_mat);
266:   PetscObjectGetComm((PetscObject)iscol, &comm_is);

269:   ISAllGather(iscol, &iscol_local);
270:   ISGetIndices(isrow, &irow);
271:   ISGetIndices(iscol_local, &icol);
272:   ISGetLocalSize(isrow, &nrows);
273:   ISGetLocalSize(iscol, &ncols);
274:   ISGetSize(iscol, &Ncols); /* global number of columns, size of iscol_local */

276:   /* No parallel redistribution currently supported! Should really check each index set
277:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
278:      original matrix! */

280:   MatGetLocalSize(A, &nlrows, &nlcols);
281:   MatGetOwnershipRange(A, &rstart, &rend);

283:   /* Check submatrix call */
284:   if (scall == MAT_REUSE_MATRIX) {
285:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
286:     /* Really need to test rows and column sizes! */
287:     newmat = *B;
288:   } else {
289:     /* Create and fill new matrix */
290:     MatCreate(PetscObjectComm((PetscObject)A), &newmat);
291:     MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols);
292:     MatSetType(newmat, ((PetscObject)A)->type_name);
293:     MatMPIDenseSetPreallocation(newmat, NULL);
294:   }

296:   /* Now extract the data pointers and do the copy, column at a time */
297:   newmatd = (Mat_MPIDense *)newmat->data;
298:   MatDenseGetArray(newmatd->A, &bv);
299:   MatDenseGetArrayRead(mat->A, &v);
300:   MatDenseGetLDA(mat->A, &lda);
301:   for (i = 0; i < Ncols; i++) {
302:     const PetscScalar *av = v + lda * icol[i];
303:     for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
304:   }
305:   MatDenseRestoreArrayRead(mat->A, &v);
306:   MatDenseRestoreArray(newmatd->A, &bv);

308:   /* Assemble the matrices so that the correct flags are set */
309:   MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY);
310:   MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY);

312:   /* Free work space */
313:   ISRestoreIndices(isrow, &irow);
314:   ISRestoreIndices(iscol_local, &icol);
315:   ISDestroy(&iscol_local);
316:   *B = newmat;
317:   return 0;
318: }

320: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
321: {
322:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

324:   MatDenseRestoreArray(a->A, array);
325:   return 0;
326: }

328: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
329: {
330:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

332:   MatDenseRestoreArrayRead(a->A, array);
333:   return 0;
334: }

336: PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
337: {
338:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

340:   MatDenseRestoreArrayWrite(a->A, array);
341:   return 0;
342: }

344: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
345: {
346:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
347:   PetscInt      nstash, reallocs;

349:   if (mdn->donotstash || mat->nooffprocentries) return 0;

351:   MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range);
352:   MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs);
353:   PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs);
354:   return 0;
355: }

357: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
358: {
359:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
360:   PetscInt      i, *row, *col, flg, j, rstart, ncols;
361:   PetscMPIInt   n;
362:   PetscScalar  *val;

364:   if (!mdn->donotstash && !mat->nooffprocentries) {
365:     /*  wait on receives */
366:     while (1) {
367:       MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg);
368:       if (!flg) break;

370:       for (i = 0; i < n;) {
371:         /* Now identify the consecutive vals belonging to the same row */
372:         for (j = i, rstart = row[j]; j < n; j++) {
373:           if (row[j] != rstart) break;
374:         }
375:         if (j < n) ncols = j - i;
376:         else ncols = n - i;
377:         /* Now assemble all these values with a single function call */
378:         MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode);
379:         i = j;
380:       }
381:     }
382:     MatStashScatterEnd_Private(&mat->stash);
383:   }

385:   MatAssemblyBegin(mdn->A, mode);
386:   MatAssemblyEnd(mdn->A, mode);
387:   return 0;
388: }

390: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
391: {
392:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

394:   MatZeroEntries(l->A);
395:   return 0;
396: }

398: PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
399: {
400:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;
401:   PetscInt      i, len, *lrows;

403:   /* get locally owned rows */
404:   PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL);
405:   /* fix right hand side if needed */
406:   if (x && b) {
407:     const PetscScalar *xx;
408:     PetscScalar       *bb;

410:     VecGetArrayRead(x, &xx);
411:     VecGetArrayWrite(b, &bb);
412:     for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
413:     VecRestoreArrayRead(x, &xx);
414:     VecRestoreArrayWrite(b, &bb);
415:   }
416:   MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL);
417:   if (diag != 0.0) {
418:     Vec d;

420:     MatCreateVecs(A, NULL, &d);
421:     VecSet(d, diag);
422:     MatDiagonalSet(A, d, INSERT_VALUES);
423:     VecDestroy(&d);
424:   }
425:   PetscFree(lrows);
426:   return 0;
427: }

429: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
430: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
431: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
432: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);

434: PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
435: {
436:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
437:   const PetscScalar *ax;
438:   PetscScalar       *ay;
439:   PetscMemType       axmtype, aymtype;

441:   if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(mat);
442:   VecGetArrayReadAndMemType(xx, &ax, &axmtype);
443:   VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype);
444:   PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE);
445:   PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE);
446:   VecRestoreArrayAndMemType(mdn->lvec, &ay);
447:   VecRestoreArrayReadAndMemType(xx, &ax);
448:   (*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy);
449:   return 0;
450: }

452: PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
453: {
454:   Mat_MPIDense      *mdn = (Mat_MPIDense *)mat->data;
455:   const PetscScalar *ax;
456:   PetscScalar       *ay;
457:   PetscMemType       axmtype, aymtype;

459:   if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(mat);
460:   VecGetArrayReadAndMemType(xx, &ax, &axmtype);
461:   VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype);
462:   PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE);
463:   PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE);
464:   VecRestoreArrayAndMemType(mdn->lvec, &ay);
465:   VecRestoreArrayReadAndMemType(xx, &ax);
466:   (*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz);
467:   return 0;
468: }

470: PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
471: {
472:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
473:   const PetscScalar *ax;
474:   PetscScalar       *ay;
475:   PetscMemType       axmtype, aymtype;

477:   if (!a->Mvctx) MatSetUpMultiply_MPIDense(A);
478:   VecSet(yy, 0.0);
479:   (*a->A->ops->multtranspose)(a->A, xx, a->lvec);
480:   VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype);
481:   VecGetArrayAndMemType(yy, &ay, &aymtype);
482:   PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM);
483:   PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM);
484:   VecRestoreArrayReadAndMemType(a->lvec, &ax);
485:   VecRestoreArrayAndMemType(yy, &ay);
486:   return 0;
487: }

489: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
490: {
491:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
492:   const PetscScalar *ax;
493:   PetscScalar       *ay;
494:   PetscMemType       axmtype, aymtype;

496:   if (!a->Mvctx) MatSetUpMultiply_MPIDense(A);
497:   VecCopy(yy, zz);
498:   (*a->A->ops->multtranspose)(a->A, xx, a->lvec);
499:   VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype);
500:   VecGetArrayAndMemType(zz, &ay, &aymtype);
501:   PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM);
502:   PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM);
503:   VecRestoreArrayReadAndMemType(a->lvec, &ax);
504:   VecRestoreArrayAndMemType(zz, &ay);
505:   return 0;
506: }

508: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
509: {
510:   Mat_MPIDense      *a = (Mat_MPIDense *)A->data;
511:   PetscInt           lda, len, i, n, m = A->rmap->n, radd;
512:   PetscScalar       *x, zero = 0.0;
513:   const PetscScalar *av;

515:   VecSet(v, zero);
516:   VecGetArray(v, &x);
517:   VecGetSize(v, &n);
519:   len  = PetscMin(a->A->rmap->n, a->A->cmap->n);
520:   radd = A->rmap->rstart * m;
521:   MatDenseGetArrayRead(a->A, &av);
522:   MatDenseGetLDA(a->A, &lda);
523:   for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
524:   MatDenseRestoreArrayRead(a->A, &av);
525:   VecRestoreArray(v, &x);
526:   return 0;
527: }

529: PetscErrorCode MatDestroy_MPIDense(Mat mat)
530: {
531:   Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;

533: #if defined(PETSC_USE_LOG)
534:   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
535: #endif
536:   MatStashDestroy_Private(&mat->stash);
539:   MatDestroy(&mdn->A);
540:   VecDestroy(&mdn->lvec);
541:   PetscSFDestroy(&mdn->Mvctx);
542:   VecDestroy(&mdn->cvec);
543:   MatDestroy(&mdn->cmat);

545:   PetscFree(mat->data);
546:   PetscObjectChangeTypeName((PetscObject)mat, NULL);

548:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL);
549:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL);
550:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL);
551:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL);
552:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL);
553:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL);
554:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL);
555:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL);
556:   PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL);
557:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL);
558:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL);
559:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL);
560:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL);
561: #if defined(PETSC_HAVE_ELEMENTAL)
562:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL);
563: #endif
564: #if defined(PETSC_HAVE_SCALAPACK)
565:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL);
566: #endif
567:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL);
568:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL);
569:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL);
570: #if defined(PETSC_HAVE_CUDA)
571:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL);
572:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL);
573:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL);
574:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL);
575:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL);
576:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL);
577:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL);
578:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL);
579:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL);
580:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL);
581:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL);
582:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL);
583:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL);
584:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL);
585:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL);
586:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL);
587:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL);
588: #endif
589:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL);
590:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL);
591:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL);
592:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL);
593:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL);
594:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL);
595:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL);
596:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL);
597:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL);
598:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL);

600:   PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL);
601:   return 0;
602: }

604: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);

606: #include <petscdraw.h>
607: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
608: {
609:   Mat_MPIDense     *mdn = (Mat_MPIDense *)mat->data;
610:   PetscMPIInt       rank;
611:   PetscViewerType   vtype;
612:   PetscBool         iascii, isdraw;
613:   PetscViewer       sviewer;
614:   PetscViewerFormat format;

616:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank);
617:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
618:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
619:   if (iascii) {
620:     PetscViewerGetType(viewer, &vtype);
621:     PetscViewerGetFormat(viewer, &format);
622:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
623:       MatInfo info;
624:       MatGetInfo(mat, MAT_LOCAL, &info);
625:       PetscViewerASCIIPushSynchronized(viewer);
626:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
627:                                                    (PetscInt)info.memory));
628:       PetscViewerFlush(viewer);
629:       PetscViewerASCIIPopSynchronized(viewer);
630:       if (mdn->Mvctx) PetscSFView(mdn->Mvctx, viewer);
631:       return 0;
632:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
633:       return 0;
634:     }
635:   } else if (isdraw) {
636:     PetscDraw draw;
637:     PetscBool isnull;

639:     PetscViewerDrawGetDraw(viewer, 0, &draw);
640:     PetscDrawIsNull(draw, &isnull);
641:     if (isnull) return 0;
642:   }

644:   {
645:     /* assemble the entire matrix onto first processor. */
646:     Mat          A;
647:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
648:     PetscInt    *cols;
649:     PetscScalar *vals;

651:     MatCreate(PetscObjectComm((PetscObject)mat), &A);
652:     if (rank == 0) {
653:       MatSetSizes(A, M, N, M, N);
654:     } else {
655:       MatSetSizes(A, 0, 0, M, N);
656:     }
657:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
658:     MatSetType(A, MATMPIDENSE);
659:     MatMPIDenseSetPreallocation(A, NULL);

661:     /* Copy the matrix ... This isn't the most efficient means,
662:        but it's quick for now */
663:     A->insertmode = INSERT_VALUES;

665:     row = mat->rmap->rstart;
666:     m   = mdn->A->rmap->n;
667:     for (i = 0; i < m; i++) {
668:       MatGetRow_MPIDense(mat, row, &nz, &cols, &vals);
669:       MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES);
670:       MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals);
671:       row++;
672:     }

674:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
675:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
676:     PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
677:     if (rank == 0) {
678:       PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name);
679:       MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer);
680:     }
681:     PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer);
682:     PetscViewerFlush(viewer);
683:     MatDestroy(&A);
684:   }
685:   return 0;
686: }

688: PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
689: {
690:   PetscBool iascii, isbinary, isdraw, issocket;

692:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
693:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
694:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket);
695:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);

697:   if (iascii || issocket || isdraw) {
698:     MatView_MPIDense_ASCIIorDraworSocket(mat, viewer);
699:   } else if (isbinary) MatView_Dense_Binary(mat, viewer);
700:   return 0;
701: }

703: PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
704: {
705:   Mat_MPIDense  *mat = (Mat_MPIDense *)A->data;
706:   Mat            mdn = mat->A;
707:   PetscLogDouble isend[5], irecv[5];

709:   info->block_size = 1.0;

711:   MatGetInfo(mdn, MAT_LOCAL, info);

713:   isend[0] = info->nz_used;
714:   isend[1] = info->nz_allocated;
715:   isend[2] = info->nz_unneeded;
716:   isend[3] = info->memory;
717:   isend[4] = info->mallocs;
718:   if (flag == MAT_LOCAL) {
719:     info->nz_used      = isend[0];
720:     info->nz_allocated = isend[1];
721:     info->nz_unneeded  = isend[2];
722:     info->memory       = isend[3];
723:     info->mallocs      = isend[4];
724:   } else if (flag == MAT_GLOBAL_MAX) {
725:     MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A));

727:     info->nz_used      = irecv[0];
728:     info->nz_allocated = irecv[1];
729:     info->nz_unneeded  = irecv[2];
730:     info->memory       = irecv[3];
731:     info->mallocs      = irecv[4];
732:   } else if (flag == MAT_GLOBAL_SUM) {
733:     MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A));

735:     info->nz_used      = irecv[0];
736:     info->nz_allocated = irecv[1];
737:     info->nz_unneeded  = irecv[2];
738:     info->memory       = irecv[3];
739:     info->mallocs      = irecv[4];
740:   }
741:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
742:   info->fill_ratio_needed = 0;
743:   info->factor_mallocs    = 0;
744:   return 0;
745: }

747: PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
748: {
749:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

751:   switch (op) {
752:   case MAT_NEW_NONZERO_LOCATIONS:
753:   case MAT_NEW_NONZERO_LOCATION_ERR:
754:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
755:     MatCheckPreallocated(A, 1);
756:     MatSetOption(a->A, op, flg);
757:     break;
758:   case MAT_ROW_ORIENTED:
759:     MatCheckPreallocated(A, 1);
760:     a->roworiented = flg;
761:     MatSetOption(a->A, op, flg);
762:     break;
763:   case MAT_FORCE_DIAGONAL_ENTRIES:
764:   case MAT_KEEP_NONZERO_PATTERN:
765:   case MAT_USE_HASH_TABLE:
766:   case MAT_SORTED_FULL:
767:     PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
768:     break;
769:   case MAT_IGNORE_OFF_PROC_ENTRIES:
770:     a->donotstash = flg;
771:     break;
772:   case MAT_SYMMETRIC:
773:   case MAT_STRUCTURALLY_SYMMETRIC:
774:   case MAT_HERMITIAN:
775:   case MAT_SYMMETRY_ETERNAL:
776:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
777:   case MAT_SPD:
778:   case MAT_IGNORE_LOWER_TRIANGULAR:
779:   case MAT_IGNORE_ZERO_ENTRIES:
780:   case MAT_SPD_ETERNAL:
781:     /* if the diagonal matrix is square it inherits some of the properties above */
782:     PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
783:     break;
784:   default:
785:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
786:   }
787:   return 0;
788: }

790: PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
791: {
792:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
793:   const PetscScalar *l;
794:   PetscScalar        x, *v, *vv, *r;
795:   PetscInt           i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;

797:   MatDenseGetArray(mdn->A, &vv);
798:   MatDenseGetLDA(mdn->A, &lda);
799:   MatGetLocalSize(A, &s2, &s3);
800:   if (ll) {
801:     VecGetLocalSize(ll, &s2a);
803:     VecGetArrayRead(ll, &l);
804:     for (i = 0; i < m; i++) {
805:       x = l[i];
806:       v = vv + i;
807:       for (j = 0; j < n; j++) {
808:         (*v) *= x;
809:         v += lda;
810:       }
811:     }
812:     VecRestoreArrayRead(ll, &l);
813:     PetscLogFlops(1.0 * n * m);
814:   }
815:   if (rr) {
816:     const PetscScalar *ar;

818:     VecGetLocalSize(rr, &s3a);
820:     VecGetArrayRead(rr, &ar);
821:     if (!mdn->Mvctx) MatSetUpMultiply_MPIDense(A);
822:     VecGetArray(mdn->lvec, &r);
823:     PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE);
824:     PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE);
825:     VecRestoreArrayRead(rr, &ar);
826:     for (i = 0; i < n; i++) {
827:       x = r[i];
828:       v = vv + i * lda;
829:       for (j = 0; j < m; j++) (*v++) *= x;
830:     }
831:     VecRestoreArray(mdn->lvec, &r);
832:     PetscLogFlops(1.0 * n * m);
833:   }
834:   MatDenseRestoreArray(mdn->A, &vv);
835:   return 0;
836: }

838: PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
839: {
840:   Mat_MPIDense      *mdn = (Mat_MPIDense *)A->data;
841:   PetscInt           i, j;
842:   PetscMPIInt        size;
843:   PetscReal          sum = 0.0;
844:   const PetscScalar *av, *v;

846:   MatDenseGetArrayRead(mdn->A, &av);
847:   v = av;
848:   MPI_Comm_size(PetscObjectComm((PetscObject)A), &size);
849:   if (size == 1) {
850:     MatNorm(mdn->A, type, nrm);
851:   } else {
852:     if (type == NORM_FROBENIUS) {
853:       for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
854:         sum += PetscRealPart(PetscConj(*v) * (*v));
855:         v++;
856:       }
857:       MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A));
858:       *nrm = PetscSqrtReal(*nrm);
859:       PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n);
860:     } else if (type == NORM_1) {
861:       PetscReal *tmp, *tmp2;
862:       PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2);
863:       *nrm = 0.0;
864:       v    = av;
865:       for (j = 0; j < mdn->A->cmap->n; j++) {
866:         for (i = 0; i < mdn->A->rmap->n; i++) {
867:           tmp[j] += PetscAbsScalar(*v);
868:           v++;
869:         }
870:       }
871:       MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A));
872:       for (j = 0; j < A->cmap->N; j++) {
873:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
874:       }
875:       PetscFree2(tmp, tmp2);
876:       PetscLogFlops(A->cmap->n * A->rmap->n);
877:     } else if (type == NORM_INFINITY) { /* max row norm */
878:       PetscReal ntemp;
879:       MatNorm(mdn->A, type, &ntemp);
880:       MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A));
881:     } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
882:   }
883:   MatDenseRestoreArrayRead(mdn->A, &av);
884:   return 0;
885: }

887: PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
888: {
889:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
890:   Mat           B;
891:   PetscInt      M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
892:   PetscInt      j, i, lda;
893:   PetscScalar  *v;

895:   if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
896:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
897:     MatCreate(PetscObjectComm((PetscObject)A), &B);
898:     MatSetSizes(B, A->cmap->n, A->rmap->n, N, M);
899:     MatSetType(B, ((PetscObject)A)->type_name);
900:     MatMPIDenseSetPreallocation(B, NULL);
901:   } else B = *matout;

903:   m = a->A->rmap->n;
904:   n = a->A->cmap->n;
905:   MatDenseGetArrayRead(a->A, (const PetscScalar **)&v);
906:   MatDenseGetLDA(a->A, &lda);
907:   PetscMalloc1(m, &rwork);
908:   for (i = 0; i < m; i++) rwork[i] = rstart + i;
909:   for (j = 0; j < n; j++) {
910:     MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES);
911:     v += lda;
912:   }
913:   MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v);
914:   PetscFree(rwork);
915:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
916:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
917:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
918:     *matout = B;
919:   } else {
920:     MatHeaderMerge(A, &B);
921:   }
922:   return 0;
923: }

925: static PetscErrorCode       MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
926: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);

928: PetscErrorCode MatSetUp_MPIDense(Mat A)
929: {
930:   PetscLayoutSetUp(A->rmap);
931:   PetscLayoutSetUp(A->cmap);
932:   if (!A->preallocated) MatMPIDenseSetPreallocation(A, NULL);
933:   return 0;
934: }

936: PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
937: {
938:   Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;

940:   MatAXPY(A->A, alpha, B->A, str);
941:   return 0;
942: }

944: PetscErrorCode MatConjugate_MPIDense(Mat mat)
945: {
946:   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;

948:   MatConjugate(a->A);
949:   return 0;
950: }

952: PetscErrorCode MatRealPart_MPIDense(Mat A)
953: {
954:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

956:   MatRealPart(a->A);
957:   return 0;
958: }

960: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
961: {
962:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

964:   MatImaginaryPart(a->A);
965:   return 0;
966: }

968: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
969: {
970:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

974:   (*a->A->ops->getcolumnvector)(a->A, v, col);
975:   return 0;
976: }

978: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);

980: PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
981: {
982:   PetscInt      i, m, n;
983:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
984:   PetscReal    *work;

986:   MatGetSize(A, &m, &n);
987:   PetscMalloc1(n, &work);
988:   if (type == REDUCTION_MEAN_REALPART) {
989:     MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work);
990:   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
991:     MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work);
992:   } else {
993:     MatGetColumnReductions_SeqDense(a->A, type, work);
994:   }
995:   if (type == NORM_2) {
996:     for (i = 0; i < n; i++) work[i] *= work[i];
997:   }
998:   if (type == NORM_INFINITY) {
999:     MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm);
1000:   } else {
1001:     MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm);
1002:   }
1003:   PetscFree(work);
1004:   if (type == NORM_2) {
1005:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1006:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1007:     for (i = 0; i < n; i++) reductions[i] /= m;
1008:   }
1009:   return 0;
1010: }

1012: #if defined(PETSC_HAVE_CUDA)
1013: PetscErrorCode MatShift_MPIDenseCUDA(Mat A, PetscScalar alpha)
1014: {
1015:   PetscScalar *da;
1016:   PetscInt     lda;

1018:   MatDenseCUDAGetArray(A, &da);
1019:   MatDenseGetLDA(A, &lda);
1020:   PetscInfo(A, "Performing Shift on backend\n");
1021:   MatShift_DenseCUDA_Private(da, alpha, lda, A->rmap->rstart, A->rmap->rend, A->cmap->N);
1022:   MatDenseCUDARestoreArray(A, &da);
1023:   return 0;
1024: }

1026: static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1027: {
1028:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1029:   PetscInt      lda;

1033:   if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1034:   a->vecinuse = col + 1;
1035:   MatDenseGetLDA(a->A, &lda);
1036:   MatDenseCUDAGetArray(a->A, (PetscScalar **)&a->ptrinuse);
1037:   VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1038:   *v = a->cvec;
1039:   return 0;
1040: }

1042: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1043: {
1044:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1048:   a->vecinuse = 0;
1049:   MatDenseCUDARestoreArray(a->A, (PetscScalar **)&a->ptrinuse);
1050:   VecCUDAResetArray(a->cvec);
1051:   if (v) *v = NULL;
1052:   return 0;
1053: }

1055: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1056: {
1057:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1058:   PetscInt      lda;

1062:   if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1063:   a->vecinuse = col + 1;
1064:   MatDenseGetLDA(a->A, &lda);
1065:   MatDenseCUDAGetArrayRead(a->A, &a->ptrinuse);
1066:   VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1067:   VecLockReadPush(a->cvec);
1068:   *v = a->cvec;
1069:   return 0;
1070: }

1072: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1073: {
1074:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1078:   a->vecinuse = 0;
1079:   MatDenseCUDARestoreArrayRead(a->A, &a->ptrinuse);
1080:   VecLockReadPop(a->cvec);
1081:   VecCUDAResetArray(a->cvec);
1082:   if (v) *v = NULL;
1083:   return 0;
1084: }

1086: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1087: {
1088:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1089:   PetscInt      lda;

1093:   if (!a->cvec) { VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec); }
1094:   a->vecinuse = col + 1;
1095:   MatDenseGetLDA(a->A, &lda);
1096:   MatDenseCUDAGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1097:   VecCUDAPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1098:   *v = a->cvec;
1099:   return 0;
1100: }

1102: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A, PetscInt col, Vec *v)
1103: {
1104:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1108:   a->vecinuse = 0;
1109:   MatDenseCUDARestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1110:   VecCUDAResetArray(a->cvec);
1111:   if (v) *v = NULL;
1112:   return 0;
1113: }

1115: static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1116: {
1117:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1121:   MatDenseCUDAPlaceArray(l->A, a);
1122:   return 0;
1123: }

1125: static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1126: {
1127:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1131:   MatDenseCUDAResetArray(l->A);
1132:   return 0;
1133: }

1135: static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1136: {
1137:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1141:   MatDenseCUDAReplaceArray(l->A, a);
1142:   return 0;
1143: }

1145: static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1146: {
1147:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1149:   MatDenseCUDAGetArrayWrite(l->A, a);
1150:   return 0;
1151: }

1153: static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1154: {
1155:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1157:   MatDenseCUDARestoreArrayWrite(l->A, a);
1158:   return 0;
1159: }

1161: static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1162: {
1163:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1165:   MatDenseCUDAGetArrayRead(l->A, a);
1166:   return 0;
1167: }

1169: static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1170: {
1171:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1173:   MatDenseCUDARestoreArrayRead(l->A, a);
1174:   return 0;
1175: }

1177: static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1178: {
1179:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1181:   MatDenseCUDAGetArray(l->A, a);
1182:   return 0;
1183: }

1185: static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1186: {
1187:   Mat_MPIDense *l = (Mat_MPIDense *)A->data;

1189:   MatDenseCUDARestoreArray(l->A, a);
1190:   return 0;
1191: }

1193: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1194: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1195: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat, PetscInt, Vec *);
1196: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat, PetscInt, Vec *);
1197: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat, PetscInt, Vec *);
1198: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat, PetscInt, Vec *);
1199: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat, Mat *);

1201: static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat, PetscBool bind)
1202: {
1203:   Mat_MPIDense *d = (Mat_MPIDense *)mat->data;

1207:   if (d->A) MatBindToCPU(d->A, bind);
1208:   mat->boundtocpu = bind;
1209:   if (!bind) {
1210:     PetscBool iscuda;

1212:     PetscFree(mat->defaultrandtype);
1213:     PetscStrallocpy(PETSCCURAND, &mat->defaultrandtype);
1214:     PetscObjectTypeCompare((PetscObject)d->cvec, VECMPICUDA, &iscuda);
1215:     if (!iscuda) VecDestroy(&d->cvec);
1216:     PetscObjectTypeCompare((PetscObject)d->cmat, MATMPIDENSECUDA, &iscuda);
1217:     if (!iscuda) MatDestroy(&d->cmat);
1218:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDenseCUDA);
1219:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDenseCUDA);
1220:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDenseCUDA);
1221:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDenseCUDA);
1222:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDenseCUDA);
1223:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDenseCUDA);
1224:     mat->ops->shift = MatShift_MPIDenseCUDA;
1225:   } else {
1226:     PetscFree(mat->defaultrandtype);
1227:     PetscStrallocpy(PETSCRANDER48, &mat->defaultrandtype);
1228:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense);
1229:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense);
1230:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense);
1231:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense);
1232:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense);
1233:     PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense);
1234:     mat->ops->shift = MatShift_MPIDense;
1235:   }
1236:   if (d->cmat) MatBindToCPU(d->cmat, bind);
1237:   return 0;
1238: }

1240: PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1241: {
1242:   Mat_MPIDense *d = (Mat_MPIDense *)A->data;
1243:   PetscBool     iscuda;

1246:   PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda);
1247:   if (!iscuda) return 0;
1248:   PetscLayoutSetUp(A->rmap);
1249:   PetscLayoutSetUp(A->cmap);
1250:   if (!d->A) {
1251:     MatCreate(PETSC_COMM_SELF, &d->A);
1252:     MatSetSizes(d->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N);
1253:   }
1254:   MatSetType(d->A, MATSEQDENSECUDA);
1255:   MatSeqDenseCUDASetPreallocation(d->A, d_data);
1256:   A->preallocated = PETSC_TRUE;
1257:   A->assembled    = PETSC_TRUE;
1258:   return 0;
1259: }
1260: #endif

1262: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1263: {
1264:   Mat_MPIDense *d = (Mat_MPIDense *)x->data;

1266:   MatSetRandom(d->A, rctx);
1267: #if defined(PETSC_HAVE_DEVICE)
1268:   x->offloadmask = d->A->offloadmask;
1269: #endif
1270:   return 0;
1271: }

1273: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1274: {
1275:   *missing = PETSC_FALSE;
1276:   return 0;
1277: }

1279: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1280: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1281: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1282: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1283: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1284: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);

1286: /* -------------------------------------------------------------------*/
1287: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1288:                                        MatGetRow_MPIDense,
1289:                                        MatRestoreRow_MPIDense,
1290:                                        MatMult_MPIDense,
1291:                                        /*  4*/ MatMultAdd_MPIDense,
1292:                                        MatMultTranspose_MPIDense,
1293:                                        MatMultTransposeAdd_MPIDense,
1294:                                        NULL,
1295:                                        NULL,
1296:                                        NULL,
1297:                                        /* 10*/ NULL,
1298:                                        NULL,
1299:                                        NULL,
1300:                                        NULL,
1301:                                        MatTranspose_MPIDense,
1302:                                        /* 15*/ MatGetInfo_MPIDense,
1303:                                        MatEqual_MPIDense,
1304:                                        MatGetDiagonal_MPIDense,
1305:                                        MatDiagonalScale_MPIDense,
1306:                                        MatNorm_MPIDense,
1307:                                        /* 20*/ MatAssemblyBegin_MPIDense,
1308:                                        MatAssemblyEnd_MPIDense,
1309:                                        MatSetOption_MPIDense,
1310:                                        MatZeroEntries_MPIDense,
1311:                                        /* 24*/ MatZeroRows_MPIDense,
1312:                                        NULL,
1313:                                        NULL,
1314:                                        NULL,
1315:                                        NULL,
1316:                                        /* 29*/ MatSetUp_MPIDense,
1317:                                        NULL,
1318:                                        NULL,
1319:                                        MatGetDiagonalBlock_MPIDense,
1320:                                        NULL,
1321:                                        /* 34*/ MatDuplicate_MPIDense,
1322:                                        NULL,
1323:                                        NULL,
1324:                                        NULL,
1325:                                        NULL,
1326:                                        /* 39*/ MatAXPY_MPIDense,
1327:                                        MatCreateSubMatrices_MPIDense,
1328:                                        NULL,
1329:                                        MatGetValues_MPIDense,
1330:                                        MatCopy_MPIDense,
1331:                                        /* 44*/ NULL,
1332:                                        MatScale_MPIDense,
1333:                                        MatShift_MPIDense,
1334:                                        NULL,
1335:                                        NULL,
1336:                                        /* 49*/ MatSetRandom_MPIDense,
1337:                                        NULL,
1338:                                        NULL,
1339:                                        NULL,
1340:                                        NULL,
1341:                                        /* 54*/ NULL,
1342:                                        NULL,
1343:                                        NULL,
1344:                                        NULL,
1345:                                        NULL,
1346:                                        /* 59*/ MatCreateSubMatrix_MPIDense,
1347:                                        MatDestroy_MPIDense,
1348:                                        MatView_MPIDense,
1349:                                        NULL,
1350:                                        NULL,
1351:                                        /* 64*/ NULL,
1352:                                        NULL,
1353:                                        NULL,
1354:                                        NULL,
1355:                                        NULL,
1356:                                        /* 69*/ NULL,
1357:                                        NULL,
1358:                                        NULL,
1359:                                        NULL,
1360:                                        NULL,
1361:                                        /* 74*/ NULL,
1362:                                        NULL,
1363:                                        NULL,
1364:                                        NULL,
1365:                                        NULL,
1366:                                        /* 79*/ NULL,
1367:                                        NULL,
1368:                                        NULL,
1369:                                        NULL,
1370:                                        /* 83*/ MatLoad_MPIDense,
1371:                                        NULL,
1372:                                        NULL,
1373:                                        NULL,
1374:                                        NULL,
1375:                                        NULL,
1376:                                        /* 89*/ NULL,
1377:                                        NULL,
1378:                                        NULL,
1379:                                        NULL,
1380:                                        NULL,
1381:                                        /* 94*/ NULL,
1382:                                        NULL,
1383:                                        MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1384:                                        MatMatTransposeMultNumeric_MPIDense_MPIDense,
1385:                                        NULL,
1386:                                        /* 99*/ MatProductSetFromOptions_MPIDense,
1387:                                        NULL,
1388:                                        NULL,
1389:                                        MatConjugate_MPIDense,
1390:                                        NULL,
1391:                                        /*104*/ NULL,
1392:                                        MatRealPart_MPIDense,
1393:                                        MatImaginaryPart_MPIDense,
1394:                                        NULL,
1395:                                        NULL,
1396:                                        /*109*/ NULL,
1397:                                        NULL,
1398:                                        NULL,
1399:                                        MatGetColumnVector_MPIDense,
1400:                                        MatMissingDiagonal_MPIDense,
1401:                                        /*114*/ NULL,
1402:                                        NULL,
1403:                                        NULL,
1404:                                        NULL,
1405:                                        NULL,
1406:                                        /*119*/ NULL,
1407:                                        NULL,
1408:                                        NULL,
1409:                                        NULL,
1410:                                        NULL,
1411:                                        /*124*/ NULL,
1412:                                        MatGetColumnReductions_MPIDense,
1413:                                        NULL,
1414:                                        NULL,
1415:                                        NULL,
1416:                                        /*129*/ NULL,
1417:                                        NULL,
1418:                                        MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1419:                                        MatTransposeMatMultNumeric_MPIDense_MPIDense,
1420:                                        NULL,
1421:                                        /*134*/ NULL,
1422:                                        NULL,
1423:                                        NULL,
1424:                                        NULL,
1425:                                        NULL,
1426:                                        /*139*/ NULL,
1427:                                        NULL,
1428:                                        NULL,
1429:                                        NULL,
1430:                                        NULL,
1431:                                        MatCreateMPIMatConcatenateSeqMat_MPIDense,
1432:                                        /*145*/ NULL,
1433:                                        NULL,
1434:                                        NULL,
1435:                                        NULL,
1436:                                        NULL,
1437:                                        /*150*/ NULL};

1439: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1440: {
1441:   Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1442:   PetscBool     iscuda;

1445:   PetscLayoutSetUp(mat->rmap);
1446:   PetscLayoutSetUp(mat->cmap);
1447:   if (!a->A) {
1448:     MatCreate(PETSC_COMM_SELF, &a->A);
1449:     MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N);
1450:   }
1451:   PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda);
1452:   MatSetType(a->A, iscuda ? MATSEQDENSECUDA : MATSEQDENSE);
1453:   MatSeqDenseSetPreallocation(a->A, data);
1454: #if defined(PETSC_HAVE_DEVICE)
1455:   mat->offloadmask = a->A->offloadmask;
1456: #endif
1457:   mat->preallocated = PETSC_TRUE;
1458:   mat->assembled    = PETSC_TRUE;
1459:   return 0;
1460: }

1462: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1463: {
1464:   Mat B, C;

1466:   MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C);
1467:   MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B);
1468:   MatDestroy(&C);
1469:   if (reuse == MAT_REUSE_MATRIX) {
1470:     C = *newmat;
1471:   } else C = NULL;
1472:   MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C);
1473:   MatDestroy(&B);
1474:   if (reuse == MAT_INPLACE_MATRIX) {
1475:     MatHeaderReplace(A, &C);
1476:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1477:   return 0;
1478: }

1480: PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1481: {
1482:   Mat B, C;

1484:   MatDenseGetLocalMatrix(A, &C);
1485:   MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B);
1486:   if (reuse == MAT_REUSE_MATRIX) {
1487:     C = *newmat;
1488:   } else C = NULL;
1489:   MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C);
1490:   MatDestroy(&B);
1491:   if (reuse == MAT_INPLACE_MATRIX) {
1492:     MatHeaderReplace(A, &C);
1493:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1494:   return 0;
1495: }

1497: #if defined(PETSC_HAVE_ELEMENTAL)
1498: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1499: {
1500:   Mat          mat_elemental;
1501:   PetscScalar *v;
1502:   PetscInt     m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols;

1504:   if (reuse == MAT_REUSE_MATRIX) {
1505:     mat_elemental = *newmat;
1506:     MatZeroEntries(*newmat);
1507:   } else {
1508:     MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1509:     MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N);
1510:     MatSetType(mat_elemental, MATELEMENTAL);
1511:     MatSetUp(mat_elemental);
1512:     MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE);
1513:   }

1515:   PetscMalloc2(m, &rows, N, &cols);
1516:   for (i = 0; i < N; i++) cols[i] = i;
1517:   for (i = 0; i < m; i++) rows[i] = rstart + i;

1519:   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1520:   MatDenseGetArray(A, &v);
1521:   MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES);
1522:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1523:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1524:   MatDenseRestoreArray(A, &v);
1525:   PetscFree2(rows, cols);

1527:   if (reuse == MAT_INPLACE_MATRIX) {
1528:     MatHeaderReplace(A, &mat_elemental);
1529:   } else {
1530:     *newmat = mat_elemental;
1531:   }
1532:   return 0;
1533: }
1534: #endif

1536: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1537: {
1538:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1540:   MatDenseGetColumn(mat->A, col, vals);
1541:   return 0;
1542: }

1544: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1545: {
1546:   Mat_MPIDense *mat = (Mat_MPIDense *)A->data;

1548:   MatDenseRestoreColumn(mat->A, vals);
1549:   return 0;
1550: }

1552: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1553: {
1554:   Mat_MPIDense *mat;
1555:   PetscInt      m, nloc, N;

1557:   MatGetSize(inmat, &m, &N);
1558:   MatGetLocalSize(inmat, NULL, &nloc);
1559:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1560:     PetscInt sum;

1562:     if (n == PETSC_DECIDE) PetscSplitOwnership(comm, &n, &N);
1563:     /* Check sum(n) = N */
1564:     MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm);

1567:     MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat);
1568:     MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
1569:   }

1571:   /* numeric phase */
1572:   mat = (Mat_MPIDense *)(*outmat)->data;
1573:   MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN);
1574:   return 0;
1575: }

1577: #if defined(PETSC_HAVE_CUDA)
1578: PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1579: {
1580:   Mat           B;
1581:   Mat_MPIDense *m;

1583:   if (reuse == MAT_INITIAL_MATRIX) {
1584:     MatDuplicate(M, MAT_COPY_VALUES, newmat);
1585:   } else if (reuse == MAT_REUSE_MATRIX) {
1586:     MatCopy(M, *newmat, SAME_NONZERO_PATTERN);
1587:   }

1589:   B = *newmat;
1590:   MatBindToCPU_MPIDenseCUDA(B, PETSC_TRUE);
1591:   PetscFree(B->defaultvectype);
1592:   PetscStrallocpy(VECSTANDARD, &B->defaultvectype);
1593:   PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSE);
1594:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", NULL);
1595:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL);
1596:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL);
1597:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL);
1598:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL);
1599:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", NULL);
1600:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", NULL);
1601:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", NULL);
1602:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", NULL);
1603:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", NULL);
1604:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", NULL);
1605:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", NULL);
1606:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", NULL);
1607:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", NULL);
1608:   m = (Mat_MPIDense *)(B)->data;
1609:   if (m->A) MatConvert(m->A, MATSEQDENSE, MAT_INPLACE_MATRIX, &m->A);
1610:   B->ops->bindtocpu = NULL;
1611:   B->offloadmask    = PETSC_OFFLOAD_CPU;
1612:   return 0;
1613: }

1615: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M, MatType type, MatReuse reuse, Mat *newmat)
1616: {
1617:   Mat           B;
1618:   Mat_MPIDense *m;

1620:   if (reuse == MAT_INITIAL_MATRIX) {
1621:     MatDuplicate(M, MAT_COPY_VALUES, newmat);
1622:   } else if (reuse == MAT_REUSE_MATRIX) {
1623:     MatCopy(M, *newmat, SAME_NONZERO_PATTERN);
1624:   }

1626:   B = *newmat;
1627:   PetscFree(B->defaultvectype);
1628:   PetscStrallocpy(VECCUDA, &B->defaultvectype);
1629:   PetscObjectChangeTypeName((PetscObject)B, MATMPIDENSECUDA);
1630:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);
1631:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1632:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1633:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1634:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1635:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);
1636:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);
1637:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);
1638:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);
1639:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);
1640:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);
1641:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);
1642:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);
1643:   PetscObjectComposeFunction((PetscObject)B, "MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);
1644:   m = (Mat_MPIDense *)(B->data);
1645:   if (m->A) {
1646:     MatConvert(m->A, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &m->A);
1647:     B->offloadmask = PETSC_OFFLOAD_BOTH;
1648:   } else {
1649:     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1650:   }
1651:   MatBindToCPU_MPIDenseCUDA(B, PETSC_FALSE);

1653:   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1654:   return 0;
1655: }
1656: #endif

1658: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1659: {
1660:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1661:   PetscInt      lda;

1665:   if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1666:   a->vecinuse = col + 1;
1667:   MatDenseGetLDA(a->A, &lda);
1668:   MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse);
1669:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1670:   *v = a->cvec;
1671:   return 0;
1672: }

1674: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1675: {
1676:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1680:   a->vecinuse = 0;
1681:   MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse);
1682:   VecResetArray(a->cvec);
1683:   if (v) *v = NULL;
1684:   return 0;
1685: }

1687: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1688: {
1689:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1690:   PetscInt      lda;

1694:   if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1695:   a->vecinuse = col + 1;
1696:   MatDenseGetLDA(a->A, &lda);
1697:   MatDenseGetArrayRead(a->A, &a->ptrinuse);
1698:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1699:   VecLockReadPush(a->cvec);
1700:   *v = a->cvec;
1701:   return 0;
1702: }

1704: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1705: {
1706:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1710:   a->vecinuse = 0;
1711:   MatDenseRestoreArrayRead(a->A, &a->ptrinuse);
1712:   VecLockReadPop(a->cvec);
1713:   VecResetArray(a->cvec);
1714:   if (v) *v = NULL;
1715:   return 0;
1716: }

1718: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1719: {
1720:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1721:   PetscInt      lda;

1725:   if (!a->cvec) VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, A->rmap->N, NULL, &a->cvec);
1726:   a->vecinuse = col + 1;
1727:   MatDenseGetLDA(a->A, &lda);
1728:   MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1729:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda);
1730:   *v = a->cvec;
1731:   return 0;
1732: }

1734: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1735: {
1736:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;

1740:   a->vecinuse = 0;
1741:   MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse);
1742:   VecResetArray(a->cvec);
1743:   if (v) *v = NULL;
1744:   return 0;
1745: }

1747: PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1748: {
1749:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1750:   Mat_MPIDense *c;
1751:   MPI_Comm      comm;
1752:   PetscInt      pbegin, pend;

1754:   PetscObjectGetComm((PetscObject)A, &comm);
1757:   pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1758:   pend   = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1759:   if (!a->cmat) {
1760:     MatCreate(comm, &a->cmat);
1761:     MatSetType(a->cmat, ((PetscObject)A)->type_name);
1762:     if (rend - rbegin == A->rmap->N) PetscLayoutReference(A->rmap, &a->cmat->rmap);
1763:     else {
1764:       PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin);
1765:       PetscLayoutSetSize(a->cmat->rmap, rend - rbegin);
1766:       PetscLayoutSetUp(a->cmat->rmap);
1767:     }
1768:     PetscLayoutSetSize(a->cmat->cmap, cend - cbegin);
1769:     PetscLayoutSetUp(a->cmat->cmap);
1770:   } else {
1771:     PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1772:     if (same && a->cmat->rmap->N != A->rmap->N) {
1773:       same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1774:       MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
1775:     }
1776:     if (!same) {
1777:       PetscLayoutDestroy(&a->cmat->rmap);
1778:       PetscLayoutCreate(comm, &a->cmat->rmap);
1779:       PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin);
1780:       PetscLayoutSetSize(a->cmat->rmap, rend - rbegin);
1781:       PetscLayoutSetUp(a->cmat->rmap);
1782:     }
1783:     if (cend - cbegin != a->cmat->cmap->N) {
1784:       PetscLayoutDestroy(&a->cmat->cmap);
1785:       PetscLayoutCreate(comm, &a->cmat->cmap);
1786:       PetscLayoutSetSize(a->cmat->cmap, cend - cbegin);
1787:       PetscLayoutSetUp(a->cmat->cmap);
1788:     }
1789:   }
1790:   c = (Mat_MPIDense *)a->cmat->data;
1792:   MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A);

1794:   a->cmat->preallocated = PETSC_TRUE;
1795:   a->cmat->assembled    = PETSC_TRUE;
1796: #if defined(PETSC_HAVE_DEVICE)
1797:   a->cmat->offloadmask = c->A->offloadmask;
1798: #endif
1799:   a->matinuse = cbegin + 1;
1800:   *v          = a->cmat;
1801:   return 0;
1802: }

1804: PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1805: {
1806:   Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1807:   Mat_MPIDense *c;

1812:   a->matinuse = 0;
1813:   c           = (Mat_MPIDense *)a->cmat->data;
1814:   MatDenseRestoreSubMatrix(a->A, &c->A);
1815:   if (v) *v = NULL;
1816: #if defined(PETSC_HAVE_DEVICE)
1817:   A->offloadmask = a->A->offloadmask;
1818: #endif
1819:   return 0;
1820: }

1822: /*MC
1823:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

1825:    Options Database Keys:
1826: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`

1828:   Level: beginner

1830: .seealso: `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1831: M*/
1832: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1833: {
1834:   Mat_MPIDense *a;

1836:   PetscNew(&a);
1837:   mat->data = (void *)a;
1838:   PetscMemcpy(mat->ops, &MatOps_Values, sizeof(struct _MatOps));

1840:   mat->insertmode = NOT_SET_VALUES;

1842:   /* build cache for off array entries formed */
1843:   a->donotstash = PETSC_FALSE;

1845:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash);

1847:   /* stuff used for matrix vector multiply */
1848:   a->lvec        = NULL;
1849:   a->Mvctx       = NULL;
1850:   a->roworiented = PETSC_TRUE;

1852:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense);
1853:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense);
1854:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense);
1855:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense);
1856:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense);
1857:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense);
1858:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense);
1859:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense);
1860:   PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense);
1861:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense);
1862:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense);
1863:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense);
1864:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense);
1865:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense);
1866:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense);
1867:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense);
1868:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense);
1869:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense);
1870:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense);
1871:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense);
1872:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ);
1873: #if defined(PETSC_HAVE_ELEMENTAL)
1874:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental);
1875: #endif
1876: #if defined(PETSC_HAVE_SCALAPACK)
1877:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK);
1878: #endif
1879: #if defined(PETSC_HAVE_CUDA)
1880:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA);
1881: #endif
1882:   PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense);
1883:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1884:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1885: #if defined(PETSC_HAVE_CUDA)
1886:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1887:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1888: #endif

1890:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense);
1891:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense);
1892:   PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE);
1893:   return 0;
1894: }

1896: /*MC
1897:    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.

1899:    Options Database Keys:
1900: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to `MatSetFromOptions()`

1902:   Level: beginner

1904: .seealso: `MATMPIDENSE`, `MATSEQDENSE`, `MATSEQDENSECUDA`
1905: M*/
1906: #if defined(PETSC_HAVE_CUDA)
1907: #include <petsc/private/deviceimpl.h>
1908: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1909: {
1910:   PetscDeviceInitialize(PETSC_DEVICE_CUDA);
1911:   MatCreate_MPIDense(B);
1912:   MatConvert_MPIDense_MPIDenseCUDA(B, MATMPIDENSECUDA, MAT_INPLACE_MATRIX, &B);
1913:   return 0;
1914: }
1915: #endif

1917: /*MC
1918:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1920:    This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1921:    and `MATMPIDENSE` otherwise.

1923:    Options Database Keys:
1924: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`

1926:   Level: beginner

1928: .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
1929: M*/

1931: /*MC
1932:    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.

1934:    This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process communicator,
1935:    and `MATMPIDENSECUDA` otherwise.

1937:    Options Database Keys:
1938: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to `MatSetFromOptions()`

1940:   Level: beginner

1942: .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1943: M*/

1945: /*@C
1946:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1948:    Collective

1950:    Input Parameters:
1951: .  B - the matrix
1952: -  data - optional location of matrix data.  Set data=NULL for PETSc
1953:    to control all matrix memory allocation.

1955:    Notes:
1956:    The dense format is fully compatible with standard Fortran 77
1957:    storage by columns.

1959:    The data input variable is intended primarily for Fortran programmers
1960:    who wish to allocate their own matrix memory space.  Most users should
1961:    set data=NULL.

1963:    Level: intermediate

1965: .seealso: `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1966: @*/
1967: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1968: {
1970:   PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1971:   return 0;
1972: }

1974: /*@
1975:    MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1976:    array provided by the user. This is useful to avoid copying an array
1977:    into a matrix

1979:    Not Collective

1981:    Input Parameters:
1982: +  mat - the matrix
1983: -  array - the array in column major order

1985:    Note:
1986:    You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1987:    freed when the matrix is destroyed.

1989:    Level: developer

1991: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1992:           `MatDenseReplaceArray()`
1993: @*/
1994: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1995: {
1997:   PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1998:   PetscObjectStateIncrease((PetscObject)mat);
1999: #if defined(PETSC_HAVE_DEVICE)
2000:   mat->offloadmask = PETSC_OFFLOAD_CPU;
2001: #endif
2002:   return 0;
2003: }

2005: /*@
2006:    MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`

2008:    Not Collective

2010:    Input Parameters:
2011: .  mat - the matrix

2013:    Note:
2014:    You can only call this after a call to `MatDensePlaceArray()`

2016:    Level: developer

2018: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2019: @*/
2020: PetscErrorCode MatDenseResetArray(Mat mat)
2021: {
2023:   PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
2024:   PetscObjectStateIncrease((PetscObject)mat);
2025:   return 0;
2026: }

2028: /*@
2029:    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2030:    array provided by the user. This is useful to avoid copying an array
2031:    into a matrix

2033:    Not Collective

2035:    Input Parameters:
2036: +  mat - the matrix
2037: -  array - the array in column major order

2039:    Note:
2040:    The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
2041:    freed by the user. It will be freed when the matrix is destroyed.

2043:    Level: developer

2045: .seealso: `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
2046: @*/
2047: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
2048: {
2050:   PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2051:   PetscObjectStateIncrease((PetscObject)mat);
2052: #if defined(PETSC_HAVE_DEVICE)
2053:   mat->offloadmask = PETSC_OFFLOAD_CPU;
2054: #endif
2055:   return 0;
2056: }

2058: #if defined(PETSC_HAVE_CUDA)
2059: /*@C
2060:    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2061:    array provided by the user. This is useful to avoid copying an array
2062:    into a matrix

2064:    Not Collective

2066:    Input Parameters:
2067: +  mat - the matrix
2068: -  array - the array in column major order

2070:    Note:
2071:    You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is responsible for freeing this array; it will not be
2072:    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().

2074:    Level: developer

2076: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, `MatDenseCUDAReplaceArray()`
2077: @*/
2078: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
2079: {
2081:   PetscUseMethod(mat, "MatDenseCUDAPlaceArray_C", (Mat, const PetscScalar *), (mat, array));
2082:   PetscObjectStateIncrease((PetscObject)mat);
2083:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2084:   return 0;
2085: }

2087: /*@C
2088:    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to `MatDenseCUDAPlaceArray()`

2090:    Not Collective

2092:    Input Parameters:
2093: .  mat - the matrix

2095:    Note:
2096:    You can only call this after a call to `MatDenseCUDAPlaceArray()`

2098:    Level: developer

2100: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2101: @*/
2102: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2103: {
2105:   PetscUseMethod(mat, "MatDenseCUDAResetArray_C", (Mat), (mat));
2106:   PetscObjectStateIncrease((PetscObject)mat);
2107:   return 0;
2108: }

2110: /*@C
2111:    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
2112:    array provided by the user. This is useful to avoid copying an array
2113:    into a matrix

2115:    Not Collective

2117:    Input Parameters:
2118: +  mat - the matrix
2119: -  array - the array in column major order

2121:    Note:
2122:    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2123:    The memory passed in CANNOT be freed by the user. It will be freed
2124:    when the matrix is destroyed. The array should respect the matrix leading dimension.

2126:    Level: developer

2128: .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2129: @*/
2130: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
2131: {
2133:   PetscUseMethod(mat, "MatDenseCUDAReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
2134:   PetscObjectStateIncrease((PetscObject)mat);
2135:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2136:   return 0;
2137: }

2139: /*@C
2140:    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` matrix.

2142:    Not Collective

2144:    Input Parameters:
2145: .  A - the matrix

2147:    Output Parameters
2148: .  array - the GPU array in column major order

2150:    Notes:
2151:    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use `MatDenseCUDAGetArray()`.

2153:    The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.

2155:    Level: developer

2157: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2158: @*/
2159: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2160: {
2162:   PetscUseMethod(A, "MatDenseCUDAGetArrayWrite_C", (Mat, PetscScalar **), (A, a));
2163:   PetscObjectStateIncrease((PetscObject)A);
2164:   return 0;
2165: }

2167: /*@C
2168:    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.

2170:    Not Collective

2172:    Input Parameters:
2173: +  A - the matrix
2174: -  array - the GPU array in column major order

2176:    Level: developer

2178: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2179: @*/
2180: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2181: {
2183:   PetscUseMethod(A, "MatDenseCUDARestoreArrayWrite_C", (Mat, PetscScalar **), (A, a));
2184:   PetscObjectStateIncrease((PetscObject)A);
2185:   A->offloadmask = PETSC_OFFLOAD_GPU;
2186:   return 0;
2187: }

2189: /*@C
2190:    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when no longer needed.

2192:    Not Collective

2194:    Input Parameters:
2195: .  A - the matrix

2197:    Output Parameters
2198: .  array - the GPU array in column major order

2200:    Note:
2201:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.

2203:    Level: developer

2205: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2206: @*/
2207: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2208: {
2210:   PetscUseMethod(A, "MatDenseCUDAGetArrayRead_C", (Mat, const PetscScalar **), (A, a));
2211:   return 0;
2212: }

2214: /*@C
2215:    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.

2217:    Not Collective

2219:    Input Parameters:
2220: +  A - the matrix
2221: -  array - the GPU array in column major order

2223:    Note:
2224:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`.

2226:    Level: developer

2228: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2229: @*/
2230: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2231: {
2232:   PetscUseMethod(A, "MatDenseCUDARestoreArrayRead_C", (Mat, const PetscScalar **), (A, a));
2233:   return 0;
2234: }

2236: /*@C
2237:    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.

2239:    Not Collective

2241:    Input Parameters:
2242: .  A - the matrix

2244:    Output Parameters
2245: .  array - the GPU array in column major order

2247:    Note:
2248:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use `MatDenseCUDAGetArrayRead()`.

2250:    Level: developer

2252: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2253: @*/
2254: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2255: {
2257:   PetscUseMethod(A, "MatDenseCUDAGetArray_C", (Mat, PetscScalar **), (A, a));
2258:   PetscObjectStateIncrease((PetscObject)A);
2259:   return 0;
2260: }

2262: /*@C
2263:    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArray()`.

2265:    Not Collective

2267:    Input Parameters:
2268: +  A - the matrix
2269: -  array - the GPU array in column major order

2271:    Level: developer

2273: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2274: @*/
2275: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2276: {
2278:   PetscUseMethod(A, "MatDenseCUDARestoreArray_C", (Mat, PetscScalar **), (A, a));
2279:   PetscObjectStateIncrease((PetscObject)A);
2280:   A->offloadmask = PETSC_OFFLOAD_GPU;
2281:   return 0;
2282: }
2283: #endif

2285: /*@C
2286:    MatCreateDense - Creates a matrix in `MATDENSE` format.

2288:    Collective

2290:    Input Parameters:
2291: +  comm - MPI communicator
2292: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2293: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2294: .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2295: .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2296: -  data - optional location of matrix data.  Set data to NULL (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
2297:    to control all matrix memory allocation.

2299:    Output Parameter:
2300: .  A - the matrix

2302:    Notes:
2303:    The dense format is fully compatible with standard Fortran 77
2304:    storage by columns.

2306:    Although local portions of the matrix are stored in column-major
2307:    order, the matrix is partitioned across MPI ranks by row.

2309:    The data input variable is intended primarily for Fortran programmers
2310:    who wish to allocate their own matrix memory space.  Most users should
2311:    set data=NULL (`PETSC_NULL_SCALAR` for Fortran users).

2313:    The user MUST specify either the local or global matrix dimensions
2314:    (possibly both).

2316:    Level: intermediate

2318: .seealso: `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2319: @*/
2320: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
2321: {
2322:   MatCreate(comm, A);
2323:   MatSetSizes(*A, m, n, M, N);
2324:   MatSetType(*A, MATDENSE);
2325:   MatSeqDenseSetPreallocation(*A, data);
2326:   MatMPIDenseSetPreallocation(*A, data);
2327:   return 0;
2328: }

2330: #if defined(PETSC_HAVE_CUDA)
2331: /*@C
2332:    MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.

2334:    Collective

2336:    Input Parameters:
2337: +  comm - MPI communicator
2338: .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2339: .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2340: .  M - number of global rows (or `PETSC_DECIDE` to have calculated if m is given)
2341: .  N - number of global columns (or `PETSC_DECIDE` to have calculated if n is given)
2342: -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2343:    to control matrix memory allocation.

2345:    Output Parameter:
2346: .  A - the matrix

2348:    Level: intermediate

2350: .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
2351: @*/
2352: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
2353: {
2354:   MatCreate(comm, A);
2356:   MatSetSizes(*A, m, n, M, N);
2357:   MatSetType(*A, MATDENSECUDA);
2358:   MatSeqDenseCUDASetPreallocation(*A, data);
2359:   MatMPIDenseCUDASetPreallocation(*A, data);
2360:   return 0;
2361: }
2362: #endif

2364: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
2365: {
2366:   Mat           mat;
2367:   Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;

2369:   *newmat = NULL;
2370:   MatCreate(PetscObjectComm((PetscObject)A), &mat);
2371:   MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
2372:   MatSetType(mat, ((PetscObject)A)->type_name);
2373:   a = (Mat_MPIDense *)mat->data;

2375:   mat->factortype   = A->factortype;
2376:   mat->assembled    = PETSC_TRUE;
2377:   mat->preallocated = PETSC_TRUE;

2379:   mat->insertmode = NOT_SET_VALUES;
2380:   a->donotstash   = oldmat->donotstash;

2382:   PetscLayoutReference(A->rmap, &mat->rmap);
2383:   PetscLayoutReference(A->cmap, &mat->cmap);

2385:   MatDuplicate(oldmat->A, cpvalues, &a->A);

2387:   *newmat = mat;
2388:   return 0;
2389: }

2391: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2392: {
2393:   PetscBool isbinary;
2394: #if defined(PETSC_HAVE_HDF5)
2395:   PetscBool ishdf5;
2396: #endif

2400:   /* force binary viewer to load .info file if it has not yet done so */
2401:   PetscViewerSetUp(viewer);
2402:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
2403: #if defined(PETSC_HAVE_HDF5)
2404:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
2405: #endif
2406:   if (isbinary) {
2407:     MatLoad_Dense_Binary(newMat, viewer);
2408: #if defined(PETSC_HAVE_HDF5)
2409:   } else if (ishdf5) {
2410:     MatLoad_Dense_HDF5(newMat, viewer);
2411: #endif
2412:   } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2413:   return 0;
2414: }

2416: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
2417: {
2418:   Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
2419:   Mat           a, b;

2421:   a = matA->A;
2422:   b = matB->A;
2423:   MatEqual(a, b, flag);
2424:   MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A));
2425:   return 0;
2426: }

2428: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2429: {
2430:   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;

2432:   PetscFree2(atb->sendbuf, atb->recvcounts);
2433:   MatDestroy(&atb->atb);
2434:   PetscFree(atb);
2435:   return 0;
2436: }

2438: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2439: {
2440:   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;

2442:   PetscFree2(abt->buf[0], abt->buf[1]);
2443:   PetscFree2(abt->recvcounts, abt->recvdispls);
2444:   PetscFree(abt);
2445:   return 0;
2446: }

2448: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2449: {
2450:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2451:   Mat_TransMatMultDense *atb;
2452:   MPI_Comm               comm;
2453:   PetscMPIInt            size, *recvcounts;
2454:   PetscScalar           *carray, *sendbuf;
2455:   const PetscScalar     *atbarray;
2456:   PetscInt               i, cN = C->cmap->N, proc, k, j, lda;
2457:   const PetscInt        *ranges;

2459:   MatCheckProduct(C, 3);
2461:   atb        = (Mat_TransMatMultDense *)C->product->data;
2462:   recvcounts = atb->recvcounts;
2463:   sendbuf    = atb->sendbuf;

2465:   PetscObjectGetComm((PetscObject)A, &comm);
2466:   MPI_Comm_size(comm, &size);

2468:   /* compute atbarray = aseq^T * bseq */
2469:   MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb);

2471:   MatGetOwnershipRanges(C, &ranges);

2473:   /* arrange atbarray into sendbuf */
2474:   MatDenseGetArrayRead(atb->atb, &atbarray);
2475:   MatDenseGetLDA(atb->atb, &lda);
2476:   for (proc = 0, k = 0; proc < size; proc++) {
2477:     for (j = 0; j < cN; j++) {
2478:       for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
2479:     }
2480:   }
2481:   MatDenseRestoreArrayRead(atb->atb, &atbarray);

2483:   /* sum all atbarray to local values of C */
2484:   MatDenseGetArrayWrite(c->A, &carray);
2485:   MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm);
2486:   MatDenseRestoreArrayWrite(c->A, &carray);
2487:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
2488:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
2489:   return 0;
2490: }

2492: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2493: {
2494:   MPI_Comm               comm;
2495:   PetscMPIInt            size;
2496:   PetscInt               cm = A->cmap->n, cM, cN = B->cmap->N;
2497:   Mat_TransMatMultDense *atb;
2498:   PetscBool              cisdense;
2499:   PetscInt               i;
2500:   const PetscInt        *ranges;

2502:   MatCheckProduct(C, 4);
2504:   PetscObjectGetComm((PetscObject)A, &comm);
2505:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2506:     SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2507:   }

2509:   /* create matrix product C */
2510:   MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N);
2511:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "");
2512:   if (!cisdense) MatSetType(C, ((PetscObject)A)->type_name);
2513:   MatSetUp(C);

2515:   /* create data structure for reuse C */
2516:   MPI_Comm_size(comm, &size);
2517:   PetscNew(&atb);
2518:   cM = C->rmap->N;
2519:   PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts);
2520:   MatGetOwnershipRanges(C, &ranges);
2521:   for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;

2523:   C->product->data    = atb;
2524:   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2525:   return 0;
2526: }

2528: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2529: {
2530:   MPI_Comm               comm;
2531:   PetscMPIInt            i, size;
2532:   PetscInt               maxRows, bufsiz;
2533:   PetscMPIInt            tag;
2534:   PetscInt               alg;
2535:   Mat_MatTransMultDense *abt;
2536:   Mat_Product           *product = C->product;
2537:   PetscBool              flg;

2539:   MatCheckProduct(C, 4);
2541:   /* check local size of A and B */

2544:   PetscStrcmp(product->alg, "allgatherv", &flg);
2545:   alg = flg ? 0 : 1;

2547:   /* setup matrix product C */
2548:   MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N);
2549:   MatSetType(C, MATMPIDENSE);
2550:   MatSetUp(C);
2551:   PetscObjectGetNewTag((PetscObject)C, &tag);

2553:   /* create data structure for reuse C */
2554:   PetscObjectGetComm((PetscObject)C, &comm);
2555:   MPI_Comm_size(comm, &size);
2556:   PetscNew(&abt);
2557:   abt->tag = tag;
2558:   abt->alg = alg;
2559:   switch (alg) {
2560:   case 1: /* alg: "cyclic" */
2561:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2562:     bufsiz = A->cmap->N * maxRows;
2563:     PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1]));
2564:     break;
2565:   default: /* alg: "allgatherv" */
2566:     PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
2567:     PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls));
2568:     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2569:     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2570:     break;
2571:   }

2573:   C->product->data                = abt;
2574:   C->product->destroy             = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2575:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2576:   return 0;
2577: }

2579: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2580: {
2581:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2582:   Mat_MatTransMultDense *abt;
2583:   MPI_Comm               comm;
2584:   PetscMPIInt            rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2585:   PetscScalar           *sendbuf, *recvbuf = NULL, *cv;
2586:   PetscInt               i, cK             = A->cmap->N, k, j, bn;
2587:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2588:   const PetscScalar     *av, *bv;
2589:   PetscBLASInt           cm, cn, ck, alda, blda = 0, clda;
2590:   MPI_Request            reqs[2];
2591:   const PetscInt        *ranges;

2593:   MatCheckProduct(C, 3);
2595:   abt = (Mat_MatTransMultDense *)C->product->data;
2596:   PetscObjectGetComm((PetscObject)C, &comm);
2597:   MPI_Comm_rank(comm, &rank);
2598:   MPI_Comm_size(comm, &size);
2599:   MatDenseGetArrayRead(a->A, &av);
2600:   MatDenseGetArrayRead(b->A, &bv);
2601:   MatDenseGetArrayWrite(c->A, &cv);
2602:   MatDenseGetLDA(a->A, &i);
2603:   PetscBLASIntCast(i, &alda);
2604:   MatDenseGetLDA(b->A, &i);
2605:   PetscBLASIntCast(i, &blda);
2606:   MatDenseGetLDA(c->A, &i);
2607:   PetscBLASIntCast(i, &clda);
2608:   MatGetOwnershipRanges(B, &ranges);
2609:   bn = B->rmap->n;
2610:   if (blda == bn) {
2611:     sendbuf = (PetscScalar *)bv;
2612:   } else {
2613:     sendbuf = abt->buf[0];
2614:     for (k = 0, i = 0; i < cK; i++) {
2615:       for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2616:     }
2617:   }
2618:   if (size > 1) {
2619:     sendto   = (rank + size - 1) % size;
2620:     recvfrom = (rank + size + 1) % size;
2621:   } else {
2622:     sendto = recvfrom = 0;
2623:   }
2624:   PetscBLASIntCast(cK, &ck);
2625:   PetscBLASIntCast(c->A->rmap->n, &cm);
2626:   recvisfrom = rank;
2627:   for (i = 0; i < size; i++) {
2628:     /* we have finished receiving in sending, bufs can be read/modified */
2629:     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2630:     PetscInt nextbn         = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

2632:     if (nextrecvisfrom != rank) {
2633:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2634:       sendsiz = cK * bn;
2635:       recvsiz = cK * nextbn;
2636:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2637:       MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
2638:       MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
2639:     }

2641:     /* local aseq * sendbuf^T */
2642:     PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
2643:     if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));

2645:     if (nextrecvisfrom != rank) {
2646:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2647:       MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
2648:     }
2649:     bn         = nextbn;
2650:     recvisfrom = nextrecvisfrom;
2651:     sendbuf    = recvbuf;
2652:   }
2653:   MatDenseRestoreArrayRead(a->A, &av);
2654:   MatDenseRestoreArrayRead(b->A, &bv);
2655:   MatDenseRestoreArrayWrite(c->A, &cv);
2656:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
2657:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
2658:   return 0;
2659: }

2661: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2662: {
2663:   Mat_MPIDense          *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2664:   Mat_MatTransMultDense *abt;
2665:   MPI_Comm               comm;
2666:   PetscMPIInt            size;
2667:   PetscScalar           *cv, *sendbuf, *recvbuf;
2668:   const PetscScalar     *av, *bv;
2669:   PetscInt               blda, i, cK = A->cmap->N, k, j, bn;
2670:   PetscScalar            _DOne = 1.0, _DZero = 0.0;
2671:   PetscBLASInt           cm, cn, ck, alda, clda;

2673:   MatCheckProduct(C, 3);
2675:   abt = (Mat_MatTransMultDense *)C->product->data;
2676:   PetscObjectGetComm((PetscObject)A, &comm);
2677:   MPI_Comm_size(comm, &size);
2678:   MatDenseGetArrayRead(a->A, &av);
2679:   MatDenseGetArrayRead(b->A, &bv);
2680:   MatDenseGetArrayWrite(c->A, &cv);
2681:   MatDenseGetLDA(a->A, &i);
2682:   PetscBLASIntCast(i, &alda);
2683:   MatDenseGetLDA(b->A, &blda);
2684:   MatDenseGetLDA(c->A, &i);
2685:   PetscBLASIntCast(i, &clda);
2686:   /* copy transpose of B into buf[0] */
2687:   bn      = B->rmap->n;
2688:   sendbuf = abt->buf[0];
2689:   recvbuf = abt->buf[1];
2690:   for (k = 0, j = 0; j < bn; j++) {
2691:     for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2692:   }
2693:   MatDenseRestoreArrayRead(b->A, &bv);
2694:   MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
2695:   PetscBLASIntCast(cK, &ck);
2696:   PetscBLASIntCast(c->A->rmap->n, &cm);
2697:   PetscBLASIntCast(c->A->cmap->n, &cn);
2698:   if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2699:   MatDenseRestoreArrayRead(a->A, &av);
2700:   MatDenseRestoreArrayRead(b->A, &bv);
2701:   MatDenseRestoreArrayWrite(c->A, &cv);
2702:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
2703:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);
2704:   return 0;
2705: }

2707: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2708: {
2709:   Mat_MatTransMultDense *abt;

2711:   MatCheckProduct(C, 3);
2713:   abt = (Mat_MatTransMultDense *)C->product->data;
2714:   switch (abt->alg) {
2715:   case 1:
2716:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
2717:     break;
2718:   default:
2719:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
2720:     break;
2721:   }
2722:   return 0;
2723: }

2725: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2726: {
2727:   Mat_MatMultDense *ab = (Mat_MatMultDense *)data;

2729:   MatDestroy(&ab->Ce);
2730:   MatDestroy(&ab->Ae);
2731:   MatDestroy(&ab->Be);
2732:   PetscFree(ab);
2733:   return 0;
2734: }

2736: #if defined(PETSC_HAVE_ELEMENTAL)
2737: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2738: {
2739:   Mat_MatMultDense *ab;

2741:   MatCheckProduct(C, 3);
2743:   ab = (Mat_MatMultDense *)C->product->data;
2744:   MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae);
2745:   MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be);
2746:   MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce);
2747:   MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C);
2748:   return 0;
2749: }

2751: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2752: {
2753:   Mat               Ae, Be, Ce;
2754:   Mat_MatMultDense *ab;

2756:   MatCheckProduct(C, 4);
2758:   /* check local size of A and B */
2759:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2760:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2761:   }

2763:   /* create elemental matrices Ae and Be */
2764:   MatCreate(PetscObjectComm((PetscObject)A), &Ae);
2765:   MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N);
2766:   MatSetType(Ae, MATELEMENTAL);
2767:   MatSetUp(Ae);
2768:   MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE);

2770:   MatCreate(PetscObjectComm((PetscObject)B), &Be);
2771:   MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N);
2772:   MatSetType(Be, MATELEMENTAL);
2773:   MatSetUp(Be);
2774:   MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE);

2776:   /* compute symbolic Ce = Ae*Be */
2777:   MatCreate(PetscObjectComm((PetscObject)C), &Ce);
2778:   MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce);

2780:   /* setup C */
2781:   MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
2782:   MatSetType(C, MATDENSE);
2783:   MatSetUp(C);

2785:   /* create data structure for reuse Cdense */
2786:   PetscNew(&ab);
2787:   ab->Ae = Ae;
2788:   ab->Be = Be;
2789:   ab->Ce = Ce;

2791:   C->product->data       = ab;
2792:   C->product->destroy    = MatDestroy_MatMatMult_MPIDense_MPIDense;
2793:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2794:   return 0;
2795: }
2796: #endif
2797: /* ----------------------------------------------- */
2798: #if defined(PETSC_HAVE_ELEMENTAL)
2799: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2800: {
2801:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2802:   C->ops->productsymbolic = MatProductSymbolic_AB;
2803:   return 0;
2804: }
2805: #endif

2807: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2808: {
2809:   Mat_Product *product = C->product;
2810:   Mat          A = product->A, B = product->B;

2812:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2813:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2814:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2815:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2816:   return 0;
2817: }

2819: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2820: {
2821:   Mat_Product *product     = C->product;
2822:   const char  *algTypes[2] = {"allgatherv", "cyclic"};
2823:   PetscInt     alg, nalg = 2;
2824:   PetscBool    flg = PETSC_FALSE;

2826:   /* Set default algorithm */
2827:   alg = 0; /* default is allgatherv */
2828:   PetscStrcmp(product->alg, "default", &flg);
2829:   if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);

2831:   /* Get runtime option */
2832:   if (product->api_user) {
2833:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2834:     PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg);
2835:     PetscOptionsEnd();
2836:   } else {
2837:     PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2838:     PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg);
2839:     PetscOptionsEnd();
2840:   }
2841:   if (flg) MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]);

2843:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2844:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2845:   return 0;
2846: }

2848: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2849: {
2850:   Mat_Product *product = C->product;

2852:   switch (product->type) {
2853: #if defined(PETSC_HAVE_ELEMENTAL)
2854:   case MATPRODUCT_AB:
2855:     MatProductSetFromOptions_MPIDense_AB(C);
2856:     break;
2857: #endif
2858:   case MATPRODUCT_AtB:
2859:     MatProductSetFromOptions_MPIDense_AtB(C);
2860:     break;
2861:   case MATPRODUCT_ABt:
2862:     MatProductSetFromOptions_MPIDense_ABt(C);
2863:     break;
2864:   default:
2865:     break;
2866:   }
2867:   return 0;
2868: }