Actual source code: dense.c

  1: /*
  2:      Defines the basic matrix operations for sequential dense.
  3: */

  5: #include <../src/mat/impls/dense/seq/dense.h>
  6: #include <petscblaslapack.h>
  7: #include <../src/mat/impls/aij/seq/aij.h>

  9: PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian)
 10: {
 11:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
 12:   PetscInt      j, k, n = A->rmap->n;
 13:   PetscScalar  *v;

 16:   MatDenseGetArray(A, &v);
 17:   if (!hermitian) {
 18:     for (k = 0; k < n; k++) {
 19:       for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j];
 20:     }
 21:   } else {
 22:     for (k = 0; k < n; k++) {
 23:       for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]);
 24:     }
 25:   }
 26:   MatDenseRestoreArray(A, &v);
 27:   return 0;
 28: }

 30: PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A)
 31: {
 32:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
 33:   PetscBLASInt  info, n;

 35:   if (!A->rmap->n || !A->cmap->n) return 0;
 36:   PetscBLASIntCast(A->cmap->n, &n);
 37:   if (A->factortype == MAT_FACTOR_LU) {
 39:     if (!mat->fwork) {
 40:       mat->lfwork = n;
 41:       PetscMalloc1(mat->lfwork, &mat->fwork);
 42:     }
 43:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 44:     PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
 45:     PetscFPTrapPop();
 46:     PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
 47:   } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
 48:     if (A->spd == PETSC_BOOL3_TRUE) {
 49:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 50:       PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info));
 51:       PetscFPTrapPop();
 52:       MatSeqDenseSymmetrize_Private(A, PETSC_TRUE);
 53: #if defined(PETSC_USE_COMPLEX)
 54:     } else if (A->hermitian == PETSC_BOOL3_TRUE) {
 57:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 58:       PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
 59:       PetscFPTrapPop();
 60:       MatSeqDenseSymmetrize_Private(A, PETSC_TRUE);
 61: #endif
 62:     } else { /* symmetric case */
 65:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 66:       PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
 67:       PetscFPTrapPop();
 68:       MatSeqDenseSymmetrize_Private(A, PETSC_FALSE);
 69:     }
 71:     PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
 72:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve");

 74:   A->ops->solve             = NULL;
 75:   A->ops->matsolve          = NULL;
 76:   A->ops->solvetranspose    = NULL;
 77:   A->ops->matsolvetranspose = NULL;
 78:   A->ops->solveadd          = NULL;
 79:   A->ops->solvetransposeadd = NULL;
 80:   A->factortype             = MAT_FACTOR_NONE;
 81:   PetscFree(A->solvertype);
 82:   return 0;
 83: }

 85: PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
 86: {
 87:   Mat_SeqDense      *l = (Mat_SeqDense *)A->data;
 88:   PetscInt           m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j;
 89:   PetscScalar       *slot, *bb, *v;
 90:   const PetscScalar *xx;

 92:   if (PetscDefined(USE_DEBUG)) {
 93:     for (i = 0; i < N; i++) {
 97:     }
 98:   }
 99:   if (!N) return 0;

101:   /* fix right hand side if needed */
102:   if (x && b) {
103:     Vec xt;

106:     VecDuplicate(x, &xt);
107:     VecCopy(x, xt);
108:     VecScale(xt, -1.0);
109:     MatMultAdd(A, xt, b, b);
110:     VecDestroy(&xt);
111:     VecGetArrayRead(x, &xx);
112:     VecGetArray(b, &bb);
113:     for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
114:     VecRestoreArrayRead(x, &xx);
115:     VecRestoreArray(b, &bb);
116:   }

118:   MatDenseGetArray(A, &v);
119:   for (i = 0; i < N; i++) {
120:     slot = v + rows[i] * m;
121:     PetscArrayzero(slot, r);
122:   }
123:   for (i = 0; i < N; i++) {
124:     slot = v + rows[i];
125:     for (j = 0; j < n; j++) {
126:       *slot = 0.0;
127:       slot += m;
128:     }
129:   }
130:   if (diag != 0.0) {
132:     for (i = 0; i < N; i++) {
133:       slot  = v + (m + 1) * rows[i];
134:       *slot = diag;
135:     }
136:   }
137:   MatDenseRestoreArray(A, &v);
138:   return 0;
139: }

141: PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C)
142: {
143:   Mat_SeqDense *c = (Mat_SeqDense *)(C->data);

145:   if (c->ptapwork) {
146:     (*C->ops->matmultnumeric)(A, P, c->ptapwork);
147:     (*C->ops->transposematmultnumeric)(P, c->ptapwork, C);
148:   } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first");
149:   return 0;
150: }

152: PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C)
153: {
154:   Mat_SeqDense *c;
155:   PetscBool     cisdense;

157:   MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N);
158:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
159:   if (!cisdense) {
160:     PetscBool flg;

162:     PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg);
163:     MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
164:   }
165:   MatSetUp(C);
166:   c = (Mat_SeqDense *)C->data;
167:   MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork);
168:   MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N);
169:   MatSetType(c->ptapwork, ((PetscObject)C)->type_name);
170:   MatSetUp(c->ptapwork);
171:   return 0;
172: }

174: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
175: {
176:   Mat              B = NULL;
177:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data;
178:   Mat_SeqDense    *b;
179:   PetscInt        *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i;
180:   const MatScalar *av;
181:   PetscBool        isseqdense;

183:   if (reuse == MAT_REUSE_MATRIX) {
184:     PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense);
186:   }
187:   if (reuse != MAT_REUSE_MATRIX) {
188:     MatCreate(PetscObjectComm((PetscObject)A), &B);
189:     MatSetSizes(B, m, n, m, n);
190:     MatSetType(B, MATSEQDENSE);
191:     MatSeqDenseSetPreallocation(B, NULL);
192:     b = (Mat_SeqDense *)(B->data);
193:   } else {
194:     b = (Mat_SeqDense *)((*newmat)->data);
195:     PetscArrayzero(b->v, m * n);
196:   }
197:   MatSeqAIJGetArrayRead(A, &av);
198:   for (i = 0; i < m; i++) {
199:     PetscInt j;
200:     for (j = 0; j < ai[1] - ai[0]; j++) {
201:       b->v[*aj * m + i] = *av;
202:       aj++;
203:       av++;
204:     }
205:     ai++;
206:   }
207:   MatSeqAIJRestoreArrayRead(A, &av);

209:   if (reuse == MAT_INPLACE_MATRIX) {
210:     MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
211:     MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
212:     MatHeaderReplace(A, &B);
213:   } else {
214:     if (B) *newmat = B;
215:     MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY);
216:     MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY);
217:   }
218:   return 0;
219: }

221: PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
222: {
223:   Mat           B = NULL;
224:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
225:   PetscInt      i, j;
226:   PetscInt     *rows, *nnz;
227:   MatScalar    *aa = a->v, *vals;

229:   PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals);
230:   if (reuse != MAT_REUSE_MATRIX) {
231:     MatCreate(PetscObjectComm((PetscObject)A), &B);
232:     MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
233:     MatSetType(B, MATSEQAIJ);
234:     for (j = 0; j < A->cmap->n; j++) {
235:       for (i = 0; i < A->rmap->n; i++)
236:         if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i];
237:       aa += a->lda;
238:     }
239:     MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz);
240:   } else B = *newmat;
241:   aa = a->v;
242:   for (j = 0; j < A->cmap->n; j++) {
243:     PetscInt numRows = 0;
244:     for (i = 0; i < A->rmap->n; i++)
245:       if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) {
246:         rows[numRows]   = i;
247:         vals[numRows++] = aa[i];
248:       }
249:     MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES);
250:     aa += a->lda;
251:   }
252:   PetscFree3(rows, nnz, vals);
253:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
254:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);

256:   if (reuse == MAT_INPLACE_MATRIX) {
257:     MatHeaderReplace(A, &B);
258:   } else if (reuse != MAT_REUSE_MATRIX) *newmat = B;
259:   return 0;
260: }

262: PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
263: {
264:   Mat_SeqDense      *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data;
265:   const PetscScalar *xv;
266:   PetscScalar       *yv;
267:   PetscBLASInt       N, m, ldax = 0, lday = 0, one = 1;

269:   MatDenseGetArrayRead(X, &xv);
270:   MatDenseGetArray(Y, &yv);
271:   PetscBLASIntCast(X->rmap->n * X->cmap->n, &N);
272:   PetscBLASIntCast(X->rmap->n, &m);
273:   PetscBLASIntCast(x->lda, &ldax);
274:   PetscBLASIntCast(y->lda, &lday);
275:   if (ldax > m || lday > m) {
276:     PetscInt j;

278:     for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one));
279:   } else {
280:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one));
281:   }
282:   MatDenseRestoreArrayRead(X, &xv);
283:   MatDenseRestoreArray(Y, &yv);
284:   PetscLogFlops(PetscMax(2.0 * N - 1, 0));
285:   return 0;
286: }

288: static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info)
289: {
290:   PetscLogDouble N = A->rmap->n * A->cmap->n;

292:   info->block_size        = 1.0;
293:   info->nz_allocated      = N;
294:   info->nz_used           = N;
295:   info->nz_unneeded       = 0;
296:   info->assemblies        = A->num_ass;
297:   info->mallocs           = 0;
298:   info->memory            = 0; /* REVIEW ME */
299:   info->fill_ratio_given  = 0;
300:   info->fill_ratio_needed = 0;
301:   info->factor_mallocs    = 0;
302:   return 0;
303: }

305: PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha)
306: {
307:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
308:   PetscScalar  *v;
309:   PetscBLASInt  one = 1, j, nz, lda = 0;

311:   MatDenseGetArray(A, &v);
312:   PetscBLASIntCast(a->lda, &lda);
313:   if (lda > A->rmap->n) {
314:     PetscBLASIntCast(A->rmap->n, &nz);
315:     for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one));
316:   } else {
317:     PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz);
318:     PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one));
319:   }
320:   PetscLogFlops(A->rmap->n * A->cmap->n);
321:   MatDenseRestoreArray(A, &v);
322:   return 0;
323: }

325: PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha)
326: {
327:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
328:   PetscScalar  *v;
329:   PetscInt      j, k;

331:   MatDenseGetArray(A, &v);
332:   k = PetscMin(A->rmap->n, A->cmap->n);
333:   for (j = 0; j < k; j++) v[j + j * a->lda] += alpha;
334:   PetscLogFlops(k);
335:   MatDenseRestoreArray(A, &v);
336:   return 0;
337: }

339: static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
340: {
341:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
342:   PetscInt           i, j, m = A->rmap->n, N = a->lda;
343:   const PetscScalar *v;

345:   *fl = PETSC_FALSE;
346:   if (A->rmap->n != A->cmap->n) return 0;
347:   MatDenseGetArrayRead(A, &v);
348:   for (i = 0; i < m; i++) {
349:     for (j = i; j < m; j++) {
350:       if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore;
351:     }
352:   }
353:   *fl = PETSC_TRUE;
354: restore:
355:   MatDenseRestoreArrayRead(A, &v);
356:   return 0;
357: }

359: static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
360: {
361:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
362:   PetscInt           i, j, m = A->rmap->n, N = a->lda;
363:   const PetscScalar *v;

365:   *fl = PETSC_FALSE;
366:   if (A->rmap->n != A->cmap->n) return 0;
367:   MatDenseGetArrayRead(A, &v);
368:   for (i = 0; i < m; i++) {
369:     for (j = i; j < m; j++) {
370:       if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore;
371:     }
372:   }
373:   *fl = PETSC_TRUE;
374: restore:
375:   MatDenseRestoreArrayRead(A, &v);
376:   return 0;
377: }

379: PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues)
380: {
381:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
382:   PetscInt      lda = (PetscInt)mat->lda, j, m, nlda = lda;
383:   PetscBool     isdensecpu;

385:   PetscLayoutReference(A->rmap, &newi->rmap);
386:   PetscLayoutReference(A->cmap, &newi->cmap);
387:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */
388:     MatDenseSetLDA(newi, lda);
389:   }
390:   PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu);
391:   if (isdensecpu) MatSeqDenseSetPreallocation(newi, NULL);
392:   if (cpvalues == MAT_COPY_VALUES) {
393:     const PetscScalar *av;
394:     PetscScalar       *v;

396:     MatDenseGetArrayRead(A, &av);
397:     MatDenseGetArrayWrite(newi, &v);
398:     MatDenseGetLDA(newi, &nlda);
399:     m = A->rmap->n;
400:     if (lda > m || nlda > m) {
401:       for (j = 0; j < A->cmap->n; j++) PetscArraycpy(v + j * nlda, av + j * lda, m);
402:     } else {
403:       PetscArraycpy(v, av, A->rmap->n * A->cmap->n);
404:     }
405:     MatDenseRestoreArrayWrite(newi, &v);
406:     MatDenseRestoreArrayRead(A, &av);
407:   }
408:   return 0;
409: }

411: PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
412: {
413:   MatCreate(PetscObjectComm((PetscObject)A), newmat);
414:   MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n);
415:   MatSetType(*newmat, ((PetscObject)A)->type_name);
416:   MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues);
417:   return 0;
418: }

420: static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
421: {
422:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
423:   PetscBLASInt  info;

425:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
426:   PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
427:   PetscFPTrapPop();
429:   PetscLogFlops(nrhs * (2.0 * m * m - m));
430:   return 0;
431: }

433: static PetscErrorCode MatConjugate_SeqDense(Mat);

435: static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
436: {
437:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
438:   PetscBLASInt  info;

440:   if (A->spd == PETSC_BOOL3_TRUE) {
441:     if (PetscDefined(USE_COMPLEX) && T) MatConjugate_SeqDense(A);
442:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
443:     PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info));
444:     PetscFPTrapPop();
446:     if (PetscDefined(USE_COMPLEX) && T) MatConjugate_SeqDense(A);
447: #if defined(PETSC_USE_COMPLEX)
448:   } else if (A->hermitian == PETSC_BOOL3_TRUE) {
449:     if (T) MatConjugate_SeqDense(A);
450:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
451:     PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
452:     PetscFPTrapPop();
454:     if (T) MatConjugate_SeqDense(A);
455: #endif
456:   } else { /* symmetric case */
457:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
458:     PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
459:     PetscFPTrapPop();
461:   }
462:   PetscLogFlops(nrhs * (2.0 * m * m - m));
463:   return 0;
464: }

466: static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
467: {
468:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
469:   PetscBLASInt  info;
470:   char          trans;

472:   if (PetscDefined(USE_COMPLEX)) {
473:     trans = 'C';
474:   } else {
475:     trans = 'T';
476:   }
477:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
478:   { /* lwork depends on the number of right-hand sides */
479:     PetscBLASInt nlfwork, lfwork = -1;
480:     PetscScalar  fwork;

482:     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
483:     nlfwork = (PetscBLASInt)PetscRealPart(fwork);
484:     if (nlfwork > mat->lfwork) {
485:       mat->lfwork = nlfwork;
486:       PetscFree(mat->fwork);
487:       PetscMalloc1(mat->lfwork, &mat->fwork);
488:     }
489:   }
490:   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
491:   PetscFPTrapPop();
493:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
494:   PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
495:   PetscFPTrapPop();
497:   for (PetscInt j = 0; j < nrhs; j++) {
498:     for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.;
499:   }
500:   PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)));
501:   return 0;
502: }

504: static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
505: {
506:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
507:   PetscBLASInt  info;

509:   if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) {
510:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
511:     PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
512:     PetscFPTrapPop();
514:     if (PetscDefined(USE_COMPLEX)) MatConjugate_SeqDense(A);
515:     { /* lwork depends on the number of right-hand sides */
516:       PetscBLASInt nlfwork, lfwork = -1;
517:       PetscScalar  fwork;

519:       PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
520:       nlfwork = (PetscBLASInt)PetscRealPart(fwork);
521:       if (nlfwork > mat->lfwork) {
522:         mat->lfwork = nlfwork;
523:         PetscFree(mat->fwork);
524:         PetscMalloc1(mat->lfwork, &mat->fwork);
525:       }
526:     }
527:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
528:     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
529:     PetscFPTrapPop();
531:     if (PetscDefined(USE_COMPLEX)) MatConjugate_SeqDense(A);
532:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve");
533:   PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)));
534:   return 0;
535: }

537: static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
538: {
539:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
540:   PetscScalar  *y;
541:   PetscBLASInt  m = 0, k = 0;

543:   PetscBLASIntCast(A->rmap->n, &m);
544:   PetscBLASIntCast(A->cmap->n, &k);
545:   if (k < m) {
546:     VecCopy(xx, mat->qrrhs);
547:     VecGetArray(mat->qrrhs, &y);
548:   } else {
549:     VecCopy(xx, yy);
550:     VecGetArray(yy, &y);
551:   }
552:   *_y = y;
553:   *_k = k;
554:   *_m = m;
555:   return 0;
556: }

558: static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
559: {
560:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
561:   PetscScalar  *y   = NULL;
562:   PetscBLASInt  m, k;

564:   y   = *_y;
565:   *_y = NULL;
566:   k   = *_k;
567:   m   = *_m;
568:   if (k < m) {
569:     PetscScalar *yv;
570:     VecGetArray(yy, &yv);
571:     PetscArraycpy(yv, y, k);
572:     VecRestoreArray(yy, &yv);
573:     VecRestoreArray(mat->qrrhs, &y);
574:   } else {
575:     VecRestoreArray(yy, &y);
576:   }
577:   return 0;
578: }

580: static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy)
581: {
582:   PetscScalar *y = NULL;
583:   PetscBLASInt m = 0, k = 0;

585:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
586:   MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE);
587:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
588:   return 0;
589: }

591: static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy)
592: {
593:   PetscScalar *y = NULL;
594:   PetscBLASInt m = 0, k = 0;

596:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
597:   MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE);
598:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
599:   return 0;
600: }

602: static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
603: {
604:   PetscScalar *y = NULL;
605:   PetscBLASInt m = 0, k = 0;

607:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
608:   MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE);
609:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
610:   return 0;
611: }

613: static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
614: {
615:   PetscScalar *y = NULL;
616:   PetscBLASInt m = 0, k = 0;

618:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
619:   MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE);
620:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
621:   return 0;
622: }

624: static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy)
625: {
626:   PetscScalar *y = NULL;
627:   PetscBLASInt m = 0, k = 0;

629:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
630:   MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k);
631:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
632:   return 0;
633: }

635: static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy)
636: {
637:   PetscScalar *y = NULL;
638:   PetscBLASInt m = 0, k = 0;

640:   MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
641:   MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k);
642:   MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
643:   return 0;
644: }

646: static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
647: {
648:   const PetscScalar *b;
649:   PetscScalar       *y;
650:   PetscInt           n, _ldb, _ldx;
651:   PetscBLASInt       nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0;

653:   *_ldy  = 0;
654:   *_m    = 0;
655:   *_nrhs = 0;
656:   *_k    = 0;
657:   *_y    = NULL;
658:   PetscBLASIntCast(A->rmap->n, &m);
659:   PetscBLASIntCast(A->cmap->n, &k);
660:   MatGetSize(B, NULL, &n);
661:   PetscBLASIntCast(n, &nrhs);
662:   MatDenseGetLDA(B, &_ldb);
663:   PetscBLASIntCast(_ldb, &ldb);
664:   MatDenseGetLDA(X, &_ldx);
665:   PetscBLASIntCast(_ldx, &ldx);
666:   if (ldx < m) {
667:     MatDenseGetArrayRead(B, &b);
668:     PetscMalloc1(nrhs * m, &y);
669:     if (ldb == m) {
670:       PetscArraycpy(y, b, ldb * nrhs);
671:     } else {
672:       for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&y[j * m], &b[j * ldb], m);
673:     }
674:     ldy = m;
675:     MatDenseRestoreArrayRead(B, &b);
676:   } else {
677:     if (ldb == ldx) {
678:       MatCopy(B, X, SAME_NONZERO_PATTERN);
679:       MatDenseGetArray(X, &y);
680:     } else {
681:       MatDenseGetArray(X, &y);
682:       MatDenseGetArrayRead(B, &b);
683:       for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&y[j * ldx], &b[j * ldb], m);
684:       MatDenseRestoreArrayRead(B, &b);
685:     }
686:     ldy = ldx;
687:   }
688:   *_y    = y;
689:   *_ldy  = ldy;
690:   *_k    = k;
691:   *_m    = m;
692:   *_nrhs = nrhs;
693:   return 0;
694: }

696: static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
697: {
698:   PetscScalar *y;
699:   PetscInt     _ldx;
700:   PetscBLASInt k, ldy, nrhs, ldx = 0;

702:   y    = *_y;
703:   *_y  = NULL;
704:   k    = *_k;
705:   ldy  = *_ldy;
706:   nrhs = *_nrhs;
707:   MatDenseGetLDA(X, &_ldx);
708:   PetscBLASIntCast(_ldx, &ldx);
709:   if (ldx != ldy) {
710:     PetscScalar *xv;
711:     MatDenseGetArray(X, &xv);
712:     for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&xv[j * ldx], &y[j * ldy], k);
713:     MatDenseRestoreArray(X, &xv);
714:     PetscFree(y);
715:   } else {
716:     MatDenseRestoreArray(X, &y);
717:   }
718:   return 0;
719: }

721: static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X)
722: {
723:   PetscScalar *y;
724:   PetscBLASInt m, k, ldy, nrhs;

726:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
727:   MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE);
728:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
729:   return 0;
730: }

732: static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X)
733: {
734:   PetscScalar *y;
735:   PetscBLASInt m, k, ldy, nrhs;

737:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
738:   MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE);
739:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
740:   return 0;
741: }

743: static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X)
744: {
745:   PetscScalar *y;
746:   PetscBLASInt m, k, ldy, nrhs;

748:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
749:   MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE);
750:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
751:   return 0;
752: }

754: static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X)
755: {
756:   PetscScalar *y;
757:   PetscBLASInt m, k, ldy, nrhs;

759:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
760:   MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE);
761:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
762:   return 0;
763: }

765: static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X)
766: {
767:   PetscScalar *y;
768:   PetscBLASInt m, k, ldy, nrhs;

770:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
771:   MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k);
772:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
773:   return 0;
774: }

776: static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X)
777: {
778:   PetscScalar *y;
779:   PetscBLASInt m, k, ldy, nrhs;

781:   MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
782:   MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k);
783:   MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
784:   return 0;
785: }

787: /* ---------------------------------------------------------------*/
788: /* COMMENT: I have chosen to hide row permutation in the pivots,
789:    rather than put it in the Mat->row slot.*/
790: PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo)
791: {
792:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
793:   PetscBLASInt  n, m, info;

795:   PetscBLASIntCast(A->cmap->n, &n);
796:   PetscBLASIntCast(A->rmap->n, &m);
797:   if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
798:   if (!A->rmap->n || !A->cmap->n) return 0;
799:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
800:   PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info));
801:   PetscFPTrapPop();


806:   A->ops->solve             = MatSolve_SeqDense_LU;
807:   A->ops->matsolve          = MatMatSolve_SeqDense_LU;
808:   A->ops->solvetranspose    = MatSolveTranspose_SeqDense_LU;
809:   A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU;
810:   A->factortype             = MAT_FACTOR_LU;

812:   PetscFree(A->solvertype);
813:   PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);

815:   PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3);
816:   return 0;
817: }

819: static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
820: {
821:   MatFactorInfo info;

823:   MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
824:   PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info);
825:   return 0;
826: }

828: PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
829: {
830:   fact->preallocated         = PETSC_TRUE;
831:   fact->assembled            = PETSC_TRUE;
832:   fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense;
833:   return 0;
834: }

836: /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */
837: PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo)
838: {
839:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
840:   PetscBLASInt  info, n;

842:   PetscBLASIntCast(A->cmap->n, &n);
843:   if (!A->rmap->n || !A->cmap->n) return 0;
844:   if (A->spd == PETSC_BOOL3_TRUE) {
845:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
846:     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info));
847:     PetscFPTrapPop();
848: #if defined(PETSC_USE_COMPLEX)
849:   } else if (A->hermitian == PETSC_BOOL3_TRUE) {
850:     if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
851:     if (!mat->fwork) {
852:       PetscScalar dummy;

854:       mat->lfwork = -1;
855:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
856:       PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
857:       PetscFPTrapPop();
858:       mat->lfwork = (PetscInt)PetscRealPart(dummy);
859:       PetscMalloc1(mat->lfwork, &mat->fwork);
860:     }
861:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
862:     PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
863:     PetscFPTrapPop();
864: #endif
865:   } else { /* symmetric case */
866:     if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
867:     if (!mat->fwork) {
868:       PetscScalar dummy;

870:       mat->lfwork = -1;
871:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
872:       PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
873:       PetscFPTrapPop();
874:       mat->lfwork = (PetscInt)PetscRealPart(dummy);
875:       PetscMalloc1(mat->lfwork, &mat->fwork);
876:     }
877:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
878:     PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
879:     PetscFPTrapPop();
880:   }

883:   A->ops->solve             = MatSolve_SeqDense_Cholesky;
884:   A->ops->matsolve          = MatMatSolve_SeqDense_Cholesky;
885:   A->ops->solvetranspose    = MatSolveTranspose_SeqDense_Cholesky;
886:   A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky;
887:   A->factortype             = MAT_FACTOR_CHOLESKY;

889:   PetscFree(A->solvertype);
890:   PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);

892:   PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
893:   return 0;
894: }

896: static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
897: {
898:   MatFactorInfo info;

900:   info.fill = 1.0;

902:   MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
903:   PetscUseTypeMethod(fact, choleskyfactor, NULL, &info);
904:   return 0;
905: }

907: PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
908: {
909:   fact->assembled                  = PETSC_TRUE;
910:   fact->preallocated               = PETSC_TRUE;
911:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense;
912:   return 0;
913: }

915: PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo)
916: {
917:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
918:   PetscBLASInt  n, m, info, min, max;

920:   PetscBLASIntCast(A->cmap->n, &n);
921:   PetscBLASIntCast(A->rmap->n, &m);
922:   max = PetscMax(m, n);
923:   min = PetscMin(m, n);
924:   if (!mat->tau) { PetscMalloc1(min, &mat->tau); }
925:   if (!mat->pivots) { PetscMalloc1(n, &mat->pivots); }
926:   if (!mat->qrrhs) MatCreateVecs(A, NULL, &(mat->qrrhs));
927:   if (!A->rmap->n || !A->cmap->n) return 0;
928:   if (!mat->fwork) {
929:     PetscScalar dummy;

931:     mat->lfwork = -1;
932:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
933:     PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info));
934:     PetscFPTrapPop();
935:     mat->lfwork = (PetscInt)PetscRealPart(dummy);
936:     PetscMalloc1(mat->lfwork, &mat->fwork);
937:   }
938:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
939:   PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info));
940:   PetscFPTrapPop();
942:   // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR.  For now just say rank is min of m and n
943:   mat->rank = min;

945:   A->ops->solve    = MatSolve_SeqDense_QR;
946:   A->ops->matsolve = MatMatSolve_SeqDense_QR;
947:   A->factortype    = MAT_FACTOR_QR;
948:   if (m == n) {
949:     A->ops->solvetranspose    = MatSolveTranspose_SeqDense_QR;
950:     A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR;
951:   }

953:   PetscFree(A->solvertype);
954:   PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);

956:   PetscLogFlops(2.0 * min * min * (max - min / 3.0));
957:   return 0;
958: }

960: static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
961: {
962:   MatFactorInfo info;

964:   info.fill = 1.0;

966:   MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
967:   PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info));
968:   return 0;
969: }

971: PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
972: {
973:   fact->assembled    = PETSC_TRUE;
974:   fact->preallocated = PETSC_TRUE;
975:   PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense);
976:   return 0;
977: }

979: /* uses LAPACK */
980: PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact)
981: {
982:   MatCreate(PetscObjectComm((PetscObject)A), fact);
983:   MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n);
984:   MatSetType(*fact, MATDENSE);
985:   (*fact)->trivialsymbolic = PETSC_TRUE;
986:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
987:     (*fact)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqDense;
988:     (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense;
989:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
990:     (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense;
991:   } else if (ftype == MAT_FACTOR_QR) {
992:     PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense);
993:   }
994:   (*fact)->factortype = ftype;

996:   PetscFree((*fact)->solvertype);
997:   PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype);
998:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU]);
999:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU]);
1000:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY]);
1001:   PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC]);
1002:   return 0;
1003: }

1005: /* ------------------------------------------------------------------*/
1006: static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx)
1007: {
1008:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1009:   PetscScalar       *x, *v = mat->v, zero = 0.0, xt;
1010:   const PetscScalar *b;
1011:   PetscInt           m = A->rmap->n, i;
1012:   PetscBLASInt       o = 1, bm = 0;

1014: #if defined(PETSC_HAVE_CUDA)
1016: #endif
1017:   if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */
1018:   PetscBLASIntCast(m, &bm);
1019:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1020:     /* this is a hack fix, should have another version without the second BLASdotu */
1021:     VecSet(xx, zero);
1022:   }
1023:   VecGetArray(xx, &x);
1024:   VecGetArrayRead(bb, &b);
1025:   its = its * lits;
1027:   while (its--) {
1028:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1029:       for (i = 0; i < m; i++) {
1030:         PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1031:         x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
1032:       }
1033:     }
1034:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1035:       for (i = m - 1; i >= 0; i--) {
1036:         PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1037:         x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
1038:       }
1039:     }
1040:   }
1041:   VecRestoreArrayRead(bb, &b);
1042:   VecRestoreArray(xx, &x);
1043:   return 0;
1044: }

1046: /* -----------------------------------------------------------------*/
1047: PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy)
1048: {
1049:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1050:   const PetscScalar *v   = mat->v, *x;
1051:   PetscScalar       *y;
1052:   PetscBLASInt       m, n, _One = 1;
1053:   PetscScalar        _DOne = 1.0, _DZero = 0.0;

1055:   PetscBLASIntCast(A->rmap->n, &m);
1056:   PetscBLASIntCast(A->cmap->n, &n);
1057:   VecGetArrayRead(xx, &x);
1058:   VecGetArrayWrite(yy, &y);
1059:   if (!A->rmap->n || !A->cmap->n) {
1060:     PetscBLASInt i;
1061:     for (i = 0; i < n; i++) y[i] = 0.0;
1062:   } else {
1063:     PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One));
1064:     PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n);
1065:   }
1066:   VecRestoreArrayRead(xx, &x);
1067:   VecRestoreArrayWrite(yy, &y);
1068:   return 0;
1069: }

1071: PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy)
1072: {
1073:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1074:   PetscScalar       *y, _DOne = 1.0, _DZero = 0.0;
1075:   PetscBLASInt       m, n, _One             = 1;
1076:   const PetscScalar *v = mat->v, *x;

1078:   PetscBLASIntCast(A->rmap->n, &m);
1079:   PetscBLASIntCast(A->cmap->n, &n);
1080:   VecGetArrayRead(xx, &x);
1081:   VecGetArrayWrite(yy, &y);
1082:   if (!A->rmap->n || !A->cmap->n) {
1083:     PetscBLASInt i;
1084:     for (i = 0; i < m; i++) y[i] = 0.0;
1085:   } else {
1086:     PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One));
1087:     PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n);
1088:   }
1089:   VecRestoreArrayRead(xx, &x);
1090:   VecRestoreArrayWrite(yy, &y);
1091:   return 0;
1092: }

1094: PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1095: {
1096:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1097:   const PetscScalar *v   = mat->v, *x;
1098:   PetscScalar       *y, _DOne = 1.0;
1099:   PetscBLASInt       m, n, _One = 1;

1101:   PetscBLASIntCast(A->rmap->n, &m);
1102:   PetscBLASIntCast(A->cmap->n, &n);
1103:   VecCopy(zz, yy);
1104:   if (!A->rmap->n || !A->cmap->n) return 0;
1105:   VecGetArrayRead(xx, &x);
1106:   VecGetArray(yy, &y);
1107:   PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One));
1108:   VecRestoreArrayRead(xx, &x);
1109:   VecRestoreArray(yy, &y);
1110:   PetscLogFlops(2.0 * A->rmap->n * A->cmap->n);
1111:   return 0;
1112: }

1114: PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1115: {
1116:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1117:   const PetscScalar *v   = mat->v, *x;
1118:   PetscScalar       *y;
1119:   PetscBLASInt       m, n, _One = 1;
1120:   PetscScalar        _DOne = 1.0;

1122:   PetscBLASIntCast(A->rmap->n, &m);
1123:   PetscBLASIntCast(A->cmap->n, &n);
1124:   VecCopy(zz, yy);
1125:   if (!A->rmap->n || !A->cmap->n) return 0;
1126:   VecGetArrayRead(xx, &x);
1127:   VecGetArray(yy, &y);
1128:   PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One));
1129:   VecRestoreArrayRead(xx, &x);
1130:   VecRestoreArray(yy, &y);
1131:   PetscLogFlops(2.0 * A->rmap->n * A->cmap->n);
1132:   return 0;
1133: }

1135: /* -----------------------------------------------------------------*/
1136: static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1137: {
1138:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1139:   PetscInt      i;

1141:   *ncols = A->cmap->n;
1142:   if (cols) {
1143:     PetscMalloc1(A->cmap->n, cols);
1144:     for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i;
1145:   }
1146:   if (vals) {
1147:     const PetscScalar *v;

1149:     MatDenseGetArrayRead(A, &v);
1150:     PetscMalloc1(A->cmap->n, vals);
1151:     v += row;
1152:     for (i = 0; i < A->cmap->n; i++) {
1153:       (*vals)[i] = *v;
1154:       v += mat->lda;
1155:     }
1156:     MatDenseRestoreArrayRead(A, &v);
1157:   }
1158:   return 0;
1159: }

1161: static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1162: {
1163:   if (ncols) *ncols = 0;
1164:   if (cols) PetscFree(*cols);
1165:   if (vals) PetscFree(*vals);
1166:   return 0;
1167: }
1168: /* ----------------------------------------------------------------*/
1169: static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv)
1170: {
1171:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1172:   PetscScalar  *av;
1173:   PetscInt      i, j, idx = 0;
1174: #if defined(PETSC_HAVE_CUDA)
1175:   PetscOffloadMask oldf;
1176: #endif

1178:   MatDenseGetArray(A, &av);
1179:   if (!mat->roworiented) {
1180:     if (addv == INSERT_VALUES) {
1181:       for (j = 0; j < n; j++) {
1182:         if (indexn[j] < 0) {
1183:           idx += m;
1184:           continue;
1185:         }
1187:         for (i = 0; i < m; i++) {
1188:           if (indexm[i] < 0) {
1189:             idx++;
1190:             continue;
1191:           }
1193:           av[indexn[j] * mat->lda + indexm[i]] = v[idx++];
1194:         }
1195:       }
1196:     } else {
1197:       for (j = 0; j < n; j++) {
1198:         if (indexn[j] < 0) {
1199:           idx += m;
1200:           continue;
1201:         }
1203:         for (i = 0; i < m; i++) {
1204:           if (indexm[i] < 0) {
1205:             idx++;
1206:             continue;
1207:           }
1209:           av[indexn[j] * mat->lda + indexm[i]] += v[idx++];
1210:         }
1211:       }
1212:     }
1213:   } else {
1214:     if (addv == INSERT_VALUES) {
1215:       for (i = 0; i < m; i++) {
1216:         if (indexm[i] < 0) {
1217:           idx += n;
1218:           continue;
1219:         }
1221:         for (j = 0; j < n; j++) {
1222:           if (indexn[j] < 0) {
1223:             idx++;
1224:             continue;
1225:           }
1227:           av[indexn[j] * mat->lda + indexm[i]] = v[idx++];
1228:         }
1229:       }
1230:     } else {
1231:       for (i = 0; i < m; i++) {
1232:         if (indexm[i] < 0) {
1233:           idx += n;
1234:           continue;
1235:         }
1237:         for (j = 0; j < n; j++) {
1238:           if (indexn[j] < 0) {
1239:             idx++;
1240:             continue;
1241:           }
1243:           av[indexn[j] * mat->lda + indexm[i]] += v[idx++];
1244:         }
1245:       }
1246:     }
1247:   }
1248:   /* hack to prevent unneeded copy to the GPU while returning the array */
1249: #if defined(PETSC_HAVE_CUDA)
1250:   oldf           = A->offloadmask;
1251:   A->offloadmask = PETSC_OFFLOAD_GPU;
1252: #endif
1253:   MatDenseRestoreArray(A, &av);
1254: #if defined(PETSC_HAVE_CUDA)
1255:   A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU);
1256: #endif
1257:   return 0;
1258: }

1260: static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[])
1261: {
1262:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1263:   const PetscScalar *vv;
1264:   PetscInt           i, j;

1266:   MatDenseGetArrayRead(A, &vv);
1267:   /* row-oriented output */
1268:   for (i = 0; i < m; i++) {
1269:     if (indexm[i] < 0) {
1270:       v += n;
1271:       continue;
1272:     }
1274:     for (j = 0; j < n; j++) {
1275:       if (indexn[j] < 0) {
1276:         v++;
1277:         continue;
1278:       }
1280:       *v++ = vv[indexn[j] * mat->lda + indexm[i]];
1281:     }
1282:   }
1283:   MatDenseRestoreArrayRead(A, &vv);
1284:   return 0;
1285: }

1287: /* -----------------------------------------------------------------*/

1289: PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer)
1290: {
1291:   PetscBool          skipHeader;
1292:   PetscViewerFormat  format;
1293:   PetscInt           header[4], M, N, m, lda, i, j, k;
1294:   const PetscScalar *v;
1295:   PetscScalar       *vwork;

1297:   PetscViewerSetUp(viewer);
1298:   PetscViewerBinaryGetSkipHeader(viewer, &skipHeader);
1299:   PetscViewerGetFormat(viewer, &format);
1300:   if (skipHeader) format = PETSC_VIEWER_NATIVE;

1302:   MatGetSize(mat, &M, &N);

1304:   /* write matrix header */
1305:   header[0] = MAT_FILE_CLASSID;
1306:   header[1] = M;
1307:   header[2] = N;
1308:   header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N;
1309:   if (!skipHeader) PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT);

1311:   MatGetLocalSize(mat, &m, NULL);
1312:   if (format != PETSC_VIEWER_NATIVE) {
1313:     PetscInt nnz = m * N, *iwork;
1314:     /* store row lengths for each row */
1315:     PetscMalloc1(nnz, &iwork);
1316:     for (i = 0; i < m; i++) iwork[i] = N;
1317:     PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1318:     /* store column indices (zero start index) */
1319:     for (k = 0, i = 0; i < m; i++)
1320:       for (j = 0; j < N; j++, k++) iwork[k] = j;
1321:     PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1322:     PetscFree(iwork);
1323:   }
1324:   /* store matrix values as a dense matrix in row major order */
1325:   PetscMalloc1(m * N, &vwork);
1326:   MatDenseGetArrayRead(mat, &v);
1327:   MatDenseGetLDA(mat, &lda);
1328:   for (k = 0, i = 0; i < m; i++)
1329:     for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j];
1330:   MatDenseRestoreArrayRead(mat, &v);
1331:   PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1332:   PetscFree(vwork);
1333:   return 0;
1334: }

1336: PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer)
1337: {
1338:   PetscBool    skipHeader;
1339:   PetscInt     header[4], M, N, m, nz, lda, i, j, k;
1340:   PetscInt     rows, cols;
1341:   PetscScalar *v, *vwork;

1343:   PetscViewerSetUp(viewer);
1344:   PetscViewerBinaryGetSkipHeader(viewer, &skipHeader);

1346:   if (!skipHeader) {
1347:     PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT);
1349:     M = header[1];
1350:     N = header[2];
1353:     nz = header[3];
1355:   } else {
1356:     MatGetSize(mat, &M, &N);
1358:     nz = MATRIX_BINARY_FORMAT_DENSE;
1359:   }

1361:   /* setup global sizes if not set */
1362:   if (mat->rmap->N < 0) mat->rmap->N = M;
1363:   if (mat->cmap->N < 0) mat->cmap->N = N;
1364:   MatSetUp(mat);
1365:   /* check if global sizes are correct */
1366:   MatGetSize(mat, &rows, &cols);

1369:   MatGetSize(mat, NULL, &N);
1370:   MatGetLocalSize(mat, &m, NULL);
1371:   MatDenseGetArray(mat, &v);
1372:   MatDenseGetLDA(mat, &lda);
1373:   if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */
1374:     PetscInt nnz = m * N;
1375:     /* read in matrix values */
1376:     PetscMalloc1(nnz, &vwork);
1377:     PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1378:     /* store values in column major order */
1379:     for (j = 0; j < N; j++)
1380:       for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j];
1381:     PetscFree(vwork);
1382:   } else { /* matrix in file is sparse format */
1383:     PetscInt nnz = 0, *rlens, *icols;
1384:     /* read in row lengths */
1385:     PetscMalloc1(m, &rlens);
1386:     PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1387:     for (i = 0; i < m; i++) nnz += rlens[i];
1388:     /* read in column indices and values */
1389:     PetscMalloc2(nnz, &icols, nnz, &vwork);
1390:     PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1391:     PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1392:     /* store values in column major order */
1393:     for (k = 0, i = 0; i < m; i++)
1394:       for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k];
1395:     PetscFree(rlens);
1396:     PetscFree2(icols, vwork);
1397:   }
1398:   MatDenseRestoreArray(mat, &v);
1399:   MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
1400:   MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
1401:   return 0;
1402: }

1404: PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer)
1405: {
1406:   PetscBool isbinary, ishdf5;

1410:   /* force binary viewer to load .info file if it has not yet done so */
1411:   PetscViewerSetUp(viewer);
1412:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1413:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
1414:   if (isbinary) {
1415:     MatLoad_Dense_Binary(newMat, viewer);
1416:   } else if (ishdf5) {
1417: #if defined(PETSC_HAVE_HDF5)
1418:     MatLoad_Dense_HDF5(newMat, viewer);
1419: #else
1420:     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
1421: #endif
1422:   } else {
1423:     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1424:   }
1425:   return 0;
1426: }

1428: static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer)
1429: {
1430:   Mat_SeqDense     *a = (Mat_SeqDense *)A->data;
1431:   PetscInt          i, j;
1432:   const char       *name;
1433:   PetscScalar      *v, *av;
1434:   PetscViewerFormat format;
1435: #if defined(PETSC_USE_COMPLEX)
1436:   PetscBool allreal = PETSC_TRUE;
1437: #endif

1439:   MatDenseGetArrayRead(A, (const PetscScalar **)&av);
1440:   PetscViewerGetFormat(viewer, &format);
1441:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442:     return 0; /* do nothing for now */
1443:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1444:     PetscViewerASCIIUseTabs(viewer, PETSC_FALSE);
1445:     for (i = 0; i < A->rmap->n; i++) {
1446:       v = av + i;
1447:       PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i);
1448:       for (j = 0; j < A->cmap->n; j++) {
1449: #if defined(PETSC_USE_COMPLEX)
1450:         if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) {
1451:           PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v));
1452:         } else if (PetscRealPart(*v)) {
1453:           PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v));
1454:         }
1455: #else
1456:         if (*v) PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v);
1457: #endif
1458:         v += a->lda;
1459:       }
1460:       PetscViewerASCIIPrintf(viewer, "\n");
1461:     }
1462:     PetscViewerASCIIUseTabs(viewer, PETSC_TRUE);
1463:   } else {
1464:     PetscViewerASCIIUseTabs(viewer, PETSC_FALSE);
1465: #if defined(PETSC_USE_COMPLEX)
1466:     /* determine if matrix has all real values */
1467:     for (j = 0; j < A->cmap->n; j++) {
1468:       v = av + j * a->lda;
1469:       for (i = 0; i < A->rmap->n; i++) {
1470:         if (PetscImaginaryPart(v[i])) {
1471:           allreal = PETSC_FALSE;
1472:           break;
1473:         }
1474:       }
1475:     }
1476: #endif
1477:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
1478:       PetscObjectGetName((PetscObject)A, &name);
1479:       PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n);
1480:       PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n);
1481:       PetscViewerASCIIPrintf(viewer, "%s = [\n", name);
1482:     }

1484:     for (i = 0; i < A->rmap->n; i++) {
1485:       v = av + i;
1486:       for (j = 0; j < A->cmap->n; j++) {
1487: #if defined(PETSC_USE_COMPLEX)
1488:         if (allreal) {
1489:           PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v));
1490:         } else {
1491:           PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v));
1492:         }
1493: #else
1494:         PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v);
1495: #endif
1496:         v += a->lda;
1497:       }
1498:       PetscViewerASCIIPrintf(viewer, "\n");
1499:     }
1500:     if (format == PETSC_VIEWER_ASCII_MATLAB) PetscViewerASCIIPrintf(viewer, "];\n");
1501:     PetscViewerASCIIUseTabs(viewer, PETSC_TRUE);
1502:   }
1503:   MatDenseRestoreArrayRead(A, (const PetscScalar **)&av);
1504:   PetscViewerFlush(viewer);
1505:   return 0;
1506: }

1508: #include <petscdraw.h>
1509: static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa)
1510: {
1511:   Mat                A = (Mat)Aa;
1512:   PetscInt           m = A->rmap->n, n = A->cmap->n, i, j;
1513:   int                color = PETSC_DRAW_WHITE;
1514:   const PetscScalar *v;
1515:   PetscViewer        viewer;
1516:   PetscReal          xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1517:   PetscViewerFormat  format;

1519:   PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer);
1520:   PetscViewerGetFormat(viewer, &format);
1521:   PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr);

1523:   /* Loop over matrix elements drawing boxes */
1524:   MatDenseGetArrayRead(A, &v);
1525:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1526:     PetscDrawCollectiveBegin(draw);
1527:     /* Blue for negative and Red for positive */
1528:     for (j = 0; j < n; j++) {
1529:       x_l = j;
1530:       x_r = x_l + 1.0;
1531:       for (i = 0; i < m; i++) {
1532:         y_l = m - i - 1.0;
1533:         y_r = y_l + 1.0;
1534:         if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED;
1535:         else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE;
1536:         else continue;
1537:         PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color);
1538:       }
1539:     }
1540:     PetscDrawCollectiveEnd(draw);
1541:   } else {
1542:     /* use contour shading to indicate magnitude of values */
1543:     /* first determine max of all nonzero values */
1544:     PetscReal minv = 0.0, maxv = 0.0;
1545:     PetscDraw popup;

1547:     for (i = 0; i < m * n; i++) {
1548:       if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]);
1549:     }
1550:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1551:     PetscDrawGetPopup(draw, &popup);
1552:     PetscDrawScalePopup(popup, minv, maxv);

1554:     PetscDrawCollectiveBegin(draw);
1555:     for (j = 0; j < n; j++) {
1556:       x_l = j;
1557:       x_r = x_l + 1.0;
1558:       for (i = 0; i < m; i++) {
1559:         y_l   = m - i - 1.0;
1560:         y_r   = y_l + 1.0;
1561:         color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv);
1562:         PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color);
1563:       }
1564:     }
1565:     PetscDrawCollectiveEnd(draw);
1566:   }
1567:   MatDenseRestoreArrayRead(A, &v);
1568:   return 0;
1569: }

1571: static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer)
1572: {
1573:   PetscDraw draw;
1574:   PetscBool isnull;
1575:   PetscReal xr, yr, xl, yl, h, w;

1577:   PetscViewerDrawGetDraw(viewer, 0, &draw);
1578:   PetscDrawIsNull(draw, &isnull);
1579:   if (isnull) return 0;

1581:   xr = A->cmap->n;
1582:   yr = A->rmap->n;
1583:   h  = yr / 10.0;
1584:   w  = xr / 10.0;
1585:   xr += w;
1586:   yr += h;
1587:   xl = -w;
1588:   yl = -h;
1589:   PetscDrawSetCoordinates(draw, xl, yl, xr, yr);
1590:   PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer);
1591:   PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A);
1592:   PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL);
1593:   PetscDrawSave(draw);
1594:   return 0;
1595: }

1597: PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer)
1598: {
1599:   PetscBool iascii, isbinary, isdraw;

1601:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
1602:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1603:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
1604:   if (iascii) MatView_SeqDense_ASCII(A, viewer);
1605:   else if (isbinary) MatView_Dense_Binary(A, viewer);
1606:   else if (isdraw) MatView_SeqDense_Draw(A, viewer);
1607:   return 0;
1608: }

1610: static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array)
1611: {
1612:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

1617:   a->unplacedarray       = a->v;
1618:   a->unplaced_user_alloc = a->user_alloc;
1619:   a->v                   = (PetscScalar *)array;
1620:   a->user_alloc          = PETSC_TRUE;
1621: #if defined(PETSC_HAVE_CUDA)
1622:   A->offloadmask = PETSC_OFFLOAD_CPU;
1623: #endif
1624:   return 0;
1625: }

1627: static PetscErrorCode MatDenseResetArray_SeqDense(Mat A)
1628: {
1629:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

1633:   a->v             = a->unplacedarray;
1634:   a->user_alloc    = a->unplaced_user_alloc;
1635:   a->unplacedarray = NULL;
1636: #if defined(PETSC_HAVE_CUDA)
1637:   A->offloadmask = PETSC_OFFLOAD_CPU;
1638: #endif
1639:   return 0;
1640: }

1642: static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array)
1643: {
1644:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

1648:   if (!a->user_alloc) PetscFree(a->v);
1649:   a->v          = (PetscScalar *)array;
1650:   a->user_alloc = PETSC_FALSE;
1651: #if defined(PETSC_HAVE_CUDA)
1652:   A->offloadmask = PETSC_OFFLOAD_CPU;
1653: #endif
1654:   return 0;
1655: }

1657: PetscErrorCode MatDestroy_SeqDense(Mat mat)
1658: {
1659:   Mat_SeqDense *l = (Mat_SeqDense *)mat->data;

1661: #if defined(PETSC_USE_LOG)
1662:   PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n);
1663: #endif
1664:   VecDestroy(&(l->qrrhs));
1665:   PetscFree(l->tau);
1666:   PetscFree(l->pivots);
1667:   PetscFree(l->fwork);
1668:   MatDestroy(&l->ptapwork);
1669:   if (!l->user_alloc) PetscFree(l->v);
1670:   if (!l->unplaced_user_alloc) PetscFree(l->unplacedarray);
1673:   VecDestroy(&l->cvec);
1674:   MatDestroy(&l->cmat);
1675:   PetscFree(mat->data);

1677:   PetscObjectChangeTypeName((PetscObject)mat, NULL);
1678:   PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL);
1679:   PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL);
1680:   PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL);
1681:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL);
1682:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL);
1683:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL);
1684:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL);
1685:   PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL);
1686:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL);
1687:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL);
1688:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL);
1689:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL);
1690:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL);
1691:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL);
1692:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL);
1693: #if defined(PETSC_HAVE_ELEMENTAL)
1694:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL);
1695: #endif
1696: #if defined(PETSC_HAVE_SCALAPACK)
1697:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL);
1698: #endif
1699: #if defined(PETSC_HAVE_CUDA)
1700:   PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL);
1701:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL);
1702:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL);
1703:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL);
1704: #endif
1705:   PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL);
1706:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL);
1707:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL);
1708:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL);
1709:   PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL);

1711:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL);
1712:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL);
1713:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL);
1714:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL);
1715:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL);
1716:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL);
1717:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL);
1718:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL);
1719:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL);
1720:   PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL);
1721:   return 0;
1722: }

1724: static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout)
1725: {
1726:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1727:   PetscInt      k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n;
1728:   PetscScalar  *v, tmp;

1730:   if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
1731:   if (reuse == MAT_INPLACE_MATRIX) {
1732:     if (m == n) { /* in place transpose */
1733:       MatDenseGetArray(A, &v);
1734:       for (j = 0; j < m; j++) {
1735:         for (k = 0; k < j; k++) {
1736:           tmp          = v[j + k * M];
1737:           v[j + k * M] = v[k + j * M];
1738:           v[k + j * M] = tmp;
1739:         }
1740:       }
1741:       MatDenseRestoreArray(A, &v);
1742:     } else { /* reuse memory, temporary allocates new memory */
1743:       PetscScalar *v2;
1744:       PetscLayout  tmplayout;

1746:       PetscMalloc1((size_t)m * n, &v2);
1747:       MatDenseGetArray(A, &v);
1748:       for (j = 0; j < n; j++) {
1749:         for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M];
1750:       }
1751:       PetscArraycpy(v, v2, (size_t)m * n);
1752:       PetscFree(v2);
1753:       MatDenseRestoreArray(A, &v);
1754:       /* cleanup size dependent quantities */
1755:       VecDestroy(&mat->cvec);
1756:       MatDestroy(&mat->cmat);
1757:       PetscFree(mat->pivots);
1758:       PetscFree(mat->fwork);
1759:       MatDestroy(&mat->ptapwork);
1760:       /* swap row/col layouts */
1761:       mat->lda  = n;
1762:       tmplayout = A->rmap;
1763:       A->rmap   = A->cmap;
1764:       A->cmap   = tmplayout;
1765:     }
1766:   } else { /* out-of-place transpose */
1767:     Mat           tmat;
1768:     Mat_SeqDense *tmatd;
1769:     PetscScalar  *v2;
1770:     PetscInt      M2;

1772:     if (reuse == MAT_INITIAL_MATRIX) {
1773:       MatCreate(PetscObjectComm((PetscObject)A), &tmat);
1774:       MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n);
1775:       MatSetType(tmat, ((PetscObject)A)->type_name);
1776:       MatSeqDenseSetPreallocation(tmat, NULL);
1777:     } else tmat = *matout;

1779:     MatDenseGetArrayRead(A, (const PetscScalar **)&v);
1780:     MatDenseGetArray(tmat, &v2);
1781:     tmatd = (Mat_SeqDense *)tmat->data;
1782:     M2    = tmatd->lda;
1783:     for (j = 0; j < n; j++) {
1784:       for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M];
1785:     }
1786:     MatDenseRestoreArray(tmat, &v2);
1787:     MatDenseRestoreArrayRead(A, (const PetscScalar **)&v);
1788:     MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY);
1789:     MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY);
1790:     *matout = tmat;
1791:   }
1792:   return 0;
1793: }

1795: static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg)
1796: {
1797:   Mat_SeqDense      *mat1 = (Mat_SeqDense *)A1->data;
1798:   Mat_SeqDense      *mat2 = (Mat_SeqDense *)A2->data;
1799:   PetscInt           i;
1800:   const PetscScalar *v1, *v2;

1802:   if (A1->rmap->n != A2->rmap->n) {
1803:     *flg = PETSC_FALSE;
1804:     return 0;
1805:   }
1806:   if (A1->cmap->n != A2->cmap->n) {
1807:     *flg = PETSC_FALSE;
1808:     return 0;
1809:   }
1810:   MatDenseGetArrayRead(A1, &v1);
1811:   MatDenseGetArrayRead(A2, &v2);
1812:   for (i = 0; i < A1->cmap->n; i++) {
1813:     PetscArraycmp(v1, v2, A1->rmap->n, flg);
1814:     if (*flg == PETSC_FALSE) return 0;
1815:     v1 += mat1->lda;
1816:     v2 += mat2->lda;
1817:   }
1818:   MatDenseRestoreArrayRead(A1, &v1);
1819:   MatDenseRestoreArrayRead(A2, &v2);
1820:   *flg = PETSC_TRUE;
1821:   return 0;
1822: }

1824: static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v)
1825: {
1826:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1827:   PetscInt           i, n, len;
1828:   PetscScalar       *x;
1829:   const PetscScalar *vv;

1831:   VecGetSize(v, &n);
1832:   VecGetArray(v, &x);
1833:   len = PetscMin(A->rmap->n, A->cmap->n);
1834:   MatDenseGetArrayRead(A, &vv);
1836:   for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i];
1837:   MatDenseRestoreArrayRead(A, &vv);
1838:   VecRestoreArray(v, &x);
1839:   return 0;
1840: }

1842: static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr)
1843: {
1844:   Mat_SeqDense      *mat = (Mat_SeqDense *)A->data;
1845:   const PetscScalar *l, *r;
1846:   PetscScalar        x, *v, *vv;
1847:   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n;

1849:   MatDenseGetArray(A, &vv);
1850:   if (ll) {
1851:     VecGetSize(ll, &m);
1852:     VecGetArrayRead(ll, &l);
1854:     for (i = 0; i < m; i++) {
1855:       x = l[i];
1856:       v = vv + i;
1857:       for (j = 0; j < n; j++) {
1858:         (*v) *= x;
1859:         v += mat->lda;
1860:       }
1861:     }
1862:     VecRestoreArrayRead(ll, &l);
1863:     PetscLogFlops(1.0 * n * m);
1864:   }
1865:   if (rr) {
1866:     VecGetSize(rr, &n);
1867:     VecGetArrayRead(rr, &r);
1869:     for (i = 0; i < n; i++) {
1870:       x = r[i];
1871:       v = vv + i * mat->lda;
1872:       for (j = 0; j < m; j++) (*v++) *= x;
1873:     }
1874:     VecRestoreArrayRead(rr, &r);
1875:     PetscLogFlops(1.0 * n * m);
1876:   }
1877:   MatDenseRestoreArray(A, &vv);
1878:   return 0;
1879: }

1881: PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm)
1882: {
1883:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1884:   PetscScalar  *v, *vv;
1885:   PetscReal     sum = 0.0;
1886:   PetscInt      lda, m = A->rmap->n, i, j;

1888:   MatDenseGetArrayRead(A, (const PetscScalar **)&vv);
1889:   MatDenseGetLDA(A, &lda);
1890:   v = vv;
1891:   if (type == NORM_FROBENIUS) {
1892:     if (lda > m) {
1893:       for (j = 0; j < A->cmap->n; j++) {
1894:         v = vv + j * lda;
1895:         for (i = 0; i < m; i++) {
1896:           sum += PetscRealPart(PetscConj(*v) * (*v));
1897:           v++;
1898:         }
1899:       }
1900:     } else {
1901: #if defined(PETSC_USE_REAL___FP16)
1902:       PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n;
1903:       PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one));
1904:     }
1905: #else
1906:       for (i = 0; i < A->cmap->n * A->rmap->n; i++) {
1907:         sum += PetscRealPart(PetscConj(*v) * (*v));
1908:         v++;
1909:       }
1910:     }
1911:     *nrm = PetscSqrtReal(sum);
1912: #endif
1913:     PetscLogFlops(2.0 * A->cmap->n * A->rmap->n);
1914:   } else if (type == NORM_1) {
1915:     *nrm = 0.0;
1916:     for (j = 0; j < A->cmap->n; j++) {
1917:       v   = vv + j * mat->lda;
1918:       sum = 0.0;
1919:       for (i = 0; i < A->rmap->n; i++) {
1920:         sum += PetscAbsScalar(*v);
1921:         v++;
1922:       }
1923:       if (sum > *nrm) *nrm = sum;
1924:     }
1925:     PetscLogFlops(1.0 * A->cmap->n * A->rmap->n);
1926:   } else if (type == NORM_INFINITY) {
1927:     *nrm = 0.0;
1928:     for (j = 0; j < A->rmap->n; j++) {
1929:       v   = vv + j;
1930:       sum = 0.0;
1931:       for (i = 0; i < A->cmap->n; i++) {
1932:         sum += PetscAbsScalar(*v);
1933:         v += mat->lda;
1934:       }
1935:       if (sum > *nrm) *nrm = sum;
1936:     }
1937:     PetscLogFlops(1.0 * A->cmap->n * A->rmap->n);
1938:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm");
1939:   MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv);
1940:   return 0;
1941: }

1943: static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg)
1944: {
1945:   Mat_SeqDense *aij = (Mat_SeqDense *)A->data;

1947:   switch (op) {
1948:   case MAT_ROW_ORIENTED:
1949:     aij->roworiented = flg;
1950:     break;
1951:   case MAT_NEW_NONZERO_LOCATIONS:
1952:   case MAT_NEW_NONZERO_LOCATION_ERR:
1953:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1954:   case MAT_FORCE_DIAGONAL_ENTRIES:
1955:   case MAT_KEEP_NONZERO_PATTERN:
1956:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1957:   case MAT_USE_HASH_TABLE:
1958:   case MAT_IGNORE_ZERO_ENTRIES:
1959:   case MAT_IGNORE_LOWER_TRIANGULAR:
1960:   case MAT_SORTED_FULL:
1961:     PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
1962:     break;
1963:   case MAT_SPD:
1964:   case MAT_SYMMETRIC:
1965:   case MAT_STRUCTURALLY_SYMMETRIC:
1966:   case MAT_HERMITIAN:
1967:   case MAT_SYMMETRY_ETERNAL:
1968:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1969:   case MAT_SPD_ETERNAL:
1970:     break;
1971:   default:
1972:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
1973:   }
1974:   return 0;
1975: }

1977: PetscErrorCode MatZeroEntries_SeqDense(Mat A)
1978: {
1979:   Mat_SeqDense *l   = (Mat_SeqDense *)A->data;
1980:   PetscInt      lda = l->lda, m = A->rmap->n, n = A->cmap->n, j;
1981:   PetscScalar  *v;

1983:   MatDenseGetArrayWrite(A, &v);
1984:   if (lda > m) {
1985:     for (j = 0; j < n; j++) PetscArrayzero(v + j * lda, m);
1986:   } else {
1987:     PetscArrayzero(v, PetscInt64Mult(m, n));
1988:   }
1989:   MatDenseRestoreArrayWrite(A, &v);
1990:   return 0;
1991: }

1993: static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1994: {
1995:   Mat_SeqDense      *l = (Mat_SeqDense *)A->data;
1996:   PetscInt           m = l->lda, n = A->cmap->n, i, j;
1997:   PetscScalar       *slot, *bb, *v;
1998:   const PetscScalar *xx;

2000:   if (PetscDefined(USE_DEBUG)) {
2001:     for (i = 0; i < N; i++) {
2004:     }
2005:   }
2006:   if (!N) return 0;

2008:   /* fix right hand side if needed */
2009:   if (x && b) {
2010:     VecGetArrayRead(x, &xx);
2011:     VecGetArray(b, &bb);
2012:     for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
2013:     VecRestoreArrayRead(x, &xx);
2014:     VecRestoreArray(b, &bb);
2015:   }

2017:   MatDenseGetArray(A, &v);
2018:   for (i = 0; i < N; i++) {
2019:     slot = v + rows[i];
2020:     for (j = 0; j < n; j++) {
2021:       *slot = 0.0;
2022:       slot += m;
2023:     }
2024:   }
2025:   if (diag != 0.0) {
2027:     for (i = 0; i < N; i++) {
2028:       slot  = v + (m + 1) * rows[i];
2029:       *slot = diag;
2030:     }
2031:   }
2032:   MatDenseRestoreArray(A, &v);
2033:   return 0;
2034: }

2036: static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda)
2037: {
2038:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;

2040:   *lda = mat->lda;
2041:   return 0;
2042: }

2044: PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array)
2045: {
2046:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;

2049:   *array = mat->v;
2050:   return 0;
2051: }

2053: PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array)
2054: {
2055:   if (array) *array = NULL;
2056:   return 0;
2057: }

2059: /*@
2060:    MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()`

2062:    Not collective

2064:    Input Parameter:
2065: .  mat - a `MATDENSE` or `MATDENSECUDA` matrix

2067:    Output Parameter:
2068: .   lda - the leading dimension

2070:    Level: intermediate

2072: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()`
2073: @*/
2074: PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda)
2075: {
2078:   MatCheckPreallocated(A, 1);
2079:   PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda));
2080:   return 0;
2081: }

2083: /*@
2084:    MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix

2086:    Not collective

2088:    Input Parameters:
2089: +  mat - a `MATDENSE` or `MATDENSECUDA` matrix
2090: -  lda - the leading dimension

2092:    Level: intermediate

2094: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()`
2095: @*/
2096: PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda)
2097: {
2099:   PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda));
2100:   return 0;
2101: }

2103: /*@C
2104:    MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored

2106:    Logically Collective on A

2108:    Input Parameter:
2109: .  mat - a dense matrix

2111:    Output Parameter:
2112: .   array - pointer to the data

2114:    Level: intermediate

2116: .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2117: @*/
2118: PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array)
2119: {
2122:   PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
2123:   return 0;
2124: }

2126: /*@C
2127:    MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()`

2129:    Logically Collective on A

2131:    Input Parameters:
2132: +  mat - a dense matrix
2133: -  array - pointer to the data (may be NULL)

2135:    Level: intermediate

2137: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2138: @*/
2139: PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array)
2140: {
2143:   PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
2144:   PetscObjectStateIncrease((PetscObject)A);
2145: #if defined(PETSC_HAVE_CUDA)
2146:   A->offloadmask = PETSC_OFFLOAD_CPU;
2147: #endif
2148:   return 0;
2149: }

2151: /*@C
2152:   MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE`  matrix is stored

2154:    Not Collective

2156:    Input Parameter:
2157: .  mat - a dense matrix

2159:    Output Parameter:
2160: .   array - pointer to the data

2162:    Level: intermediate

2164: .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2165: @*/
2166: PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array)
2167: {
2170:   PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array));
2171:   return 0;
2172: }

2174: /*@C
2175:    MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()`

2177:    Not Collective

2179:    Input Parameters:
2180: +  mat - a dense matrix
2181: -  array - pointer to the data (may be NULL)

2183:    Level: intermediate

2185: .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2186: @*/
2187: PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array)
2188: {
2191:   PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array));
2192:   return 0;
2193: }

2195: /*@C
2196:    MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored

2198:    Not Collective

2200:    Input Parameter:
2201: .  mat - a dense matrix

2203:    Output Parameter:
2204: .   array - pointer to the data

2206:    Level: intermediate

2208: .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2209: @*/
2210: PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array)
2211: {
2214:   PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
2215:   return 0;
2216: }

2218: /*@C
2219:    MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()`

2221:    Not Collective

2223:    Input Parameters:
2224: +  mat - a dense matrix
2225: -  array - pointer to the data (may be NULL)

2227:    Level: intermediate

2229: .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2230: @*/
2231: PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array)
2232: {
2235:   PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
2236:   PetscObjectStateIncrease((PetscObject)A);
2237: #if defined(PETSC_HAVE_CUDA)
2238:   A->offloadmask = PETSC_OFFLOAD_CPU;
2239: #endif
2240:   return 0;
2241: }

2243: static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
2244: {
2245:   Mat_SeqDense   *mat = (Mat_SeqDense *)A->data;
2246:   PetscInt        i, j, nrows, ncols, ldb;
2247:   const PetscInt *irow, *icol;
2248:   PetscScalar    *av, *bv, *v = mat->v;
2249:   Mat             newmat;

2251:   ISGetIndices(isrow, &irow);
2252:   ISGetIndices(iscol, &icol);
2253:   ISGetLocalSize(isrow, &nrows);
2254:   ISGetLocalSize(iscol, &ncols);

2256:   /* Check submatrixcall */
2257:   if (scall == MAT_REUSE_MATRIX) {
2258:     PetscInt n_cols, n_rows;
2259:     MatGetSize(*B, &n_rows, &n_cols);
2260:     if (n_rows != nrows || n_cols != ncols) {
2261:       /* resize the result matrix to match number of requested rows/columns */
2262:       MatSetSizes(*B, nrows, ncols, nrows, ncols);
2263:     }
2264:     newmat = *B;
2265:   } else {
2266:     /* Create and fill new matrix */
2267:     MatCreate(PetscObjectComm((PetscObject)A), &newmat);
2268:     MatSetSizes(newmat, nrows, ncols, nrows, ncols);
2269:     MatSetType(newmat, ((PetscObject)A)->type_name);
2270:     MatSeqDenseSetPreallocation(newmat, NULL);
2271:   }

2273:   /* Now extract the data pointers and do the copy,column at a time */
2274:   MatDenseGetArray(newmat, &bv);
2275:   MatDenseGetLDA(newmat, &ldb);
2276:   for (i = 0; i < ncols; i++) {
2277:     av = v + mat->lda * icol[i];
2278:     for (j = 0; j < nrows; j++) bv[j] = av[irow[j]];
2279:     bv += ldb;
2280:   }
2281:   MatDenseRestoreArray(newmat, &bv);

2283:   /* Assemble the matrices so that the correct flags are set */
2284:   MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY);
2285:   MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY);

2287:   /* Free work space */
2288:   ISRestoreIndices(isrow, &irow);
2289:   ISRestoreIndices(iscol, &icol);
2290:   *B = newmat;
2291:   return 0;
2292: }

2294: static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
2295: {
2296:   PetscInt i;

2298:   if (scall == MAT_INITIAL_MATRIX) PetscCalloc1(n, B);

2300:   for (i = 0; i < n; i++) MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i]);
2301:   return 0;
2302: }

2304: static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode)
2305: {
2306:   return 0;
2307: }

2309: static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode)
2310: {
2311:   return 0;
2312: }

2314: PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str)
2315: {
2316:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data;
2317:   const PetscScalar *va;
2318:   PetscScalar       *vb;
2319:   PetscInt           lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j;

2321:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2322:   if (A->ops->copy != B->ops->copy) {
2323:     MatCopy_Basic(A, B, str);
2324:     return 0;
2325:   }
2327:   MatDenseGetArrayRead(A, &va);
2328:   MatDenseGetArray(B, &vb);
2329:   if (lda1 > m || lda2 > m) {
2330:     for (j = 0; j < n; j++) PetscArraycpy(vb + j * lda2, va + j * lda1, m);
2331:   } else {
2332:     PetscArraycpy(vb, va, A->rmap->n * A->cmap->n);
2333:   }
2334:   MatDenseRestoreArray(B, &vb);
2335:   MatDenseRestoreArrayRead(A, &va);
2336:   MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
2337:   MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
2338:   return 0;
2339: }

2341: PetscErrorCode MatSetUp_SeqDense(Mat A)
2342: {
2343:   PetscLayoutSetUp(A->rmap);
2344:   PetscLayoutSetUp(A->cmap);
2345:   if (!A->preallocated) MatSeqDenseSetPreallocation(A, NULL);
2346:   return 0;
2347: }

2349: static PetscErrorCode MatConjugate_SeqDense(Mat A)
2350: {
2351:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2352:   PetscInt      i, j;
2353:   PetscInt      min = PetscMin(A->rmap->n, A->cmap->n);
2354:   PetscScalar  *aa;

2356:   MatDenseGetArray(A, &aa);
2357:   for (j = 0; j < A->cmap->n; j++) {
2358:     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]);
2359:   }
2360:   MatDenseRestoreArray(A, &aa);
2361:   if (mat->tau)
2362:     for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]);
2363:   return 0;
2364: }

2366: static PetscErrorCode MatRealPart_SeqDense(Mat A)
2367: {
2368:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2369:   PetscInt      i, j;
2370:   PetscScalar  *aa;

2372:   MatDenseGetArray(A, &aa);
2373:   for (j = 0; j < A->cmap->n; j++) {
2374:     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]);
2375:   }
2376:   MatDenseRestoreArray(A, &aa);
2377:   return 0;
2378: }

2380: static PetscErrorCode MatImaginaryPart_SeqDense(Mat A)
2381: {
2382:   Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2383:   PetscInt      i, j;
2384:   PetscScalar  *aa;

2386:   MatDenseGetArray(A, &aa);
2387:   for (j = 0; j < A->cmap->n; j++) {
2388:     for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]);
2389:   }
2390:   MatDenseRestoreArray(A, &aa);
2391:   return 0;
2392: }

2394: /* ----------------------------------------------------------------*/
2395: PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2396: {
2397:   PetscInt  m = A->rmap->n, n = B->cmap->n;
2398:   PetscBool cisdense;

2400:   MatSetSizes(C, m, n, m, n);
2401:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2402:   if (!cisdense) {
2403:     PetscBool flg;

2405:     PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2406:     MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2407:   }
2408:   MatSetUp(C);
2409:   return 0;
2410: }

2412: PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2413: {
2414:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data;
2415:   PetscBLASInt       m, n, k;
2416:   const PetscScalar *av, *bv;
2417:   PetscScalar       *cv;
2418:   PetscScalar        _DOne = 1.0, _DZero = 0.0;

2420:   PetscBLASIntCast(C->rmap->n, &m);
2421:   PetscBLASIntCast(C->cmap->n, &n);
2422:   PetscBLASIntCast(A->cmap->n, &k);
2423:   if (!m || !n || !k) return 0;
2424:   MatDenseGetArrayRead(A, &av);
2425:   MatDenseGetArrayRead(B, &bv);
2426:   MatDenseGetArrayWrite(C, &cv);
2427:   PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2428:   PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2429:   MatDenseRestoreArrayRead(A, &av);
2430:   MatDenseRestoreArrayRead(B, &bv);
2431:   MatDenseRestoreArrayWrite(C, &cv);
2432:   return 0;
2433: }

2435: PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2436: {
2437:   PetscInt  m = A->rmap->n, n = B->rmap->n;
2438:   PetscBool cisdense;

2440:   MatSetSizes(C, m, n, m, n);
2441:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2442:   if (!cisdense) {
2443:     PetscBool flg;

2445:     PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2446:     MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2447:   }
2448:   MatSetUp(C);
2449:   return 0;
2450: }

2452: PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2453: {
2454:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2455:   Mat_SeqDense      *b = (Mat_SeqDense *)B->data;
2456:   Mat_SeqDense      *c = (Mat_SeqDense *)C->data;
2457:   const PetscScalar *av, *bv;
2458:   PetscScalar       *cv;
2459:   PetscBLASInt       m, n, k;
2460:   PetscScalar        _DOne = 1.0, _DZero = 0.0;

2462:   PetscBLASIntCast(C->rmap->n, &m);
2463:   PetscBLASIntCast(C->cmap->n, &n);
2464:   PetscBLASIntCast(A->cmap->n, &k);
2465:   if (!m || !n || !k) return 0;
2466:   MatDenseGetArrayRead(A, &av);
2467:   MatDenseGetArrayRead(B, &bv);
2468:   MatDenseGetArrayWrite(C, &cv);
2469:   PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2470:   MatDenseRestoreArrayRead(A, &av);
2471:   MatDenseRestoreArrayRead(B, &bv);
2472:   MatDenseRestoreArrayWrite(C, &cv);
2473:   PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2474:   return 0;
2475: }

2477: PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2478: {
2479:   PetscInt  m = A->cmap->n, n = B->cmap->n;
2480:   PetscBool cisdense;

2482:   MatSetSizes(C, m, n, m, n);
2483:   PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2484:   if (!cisdense) {
2485:     PetscBool flg;

2487:     PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2488:     MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2489:   }
2490:   MatSetUp(C);
2491:   return 0;
2492: }

2494: PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2495: {
2496:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2497:   Mat_SeqDense      *b = (Mat_SeqDense *)B->data;
2498:   Mat_SeqDense      *c = (Mat_SeqDense *)C->data;
2499:   const PetscScalar *av, *bv;
2500:   PetscScalar       *cv;
2501:   PetscBLASInt       m, n, k;
2502:   PetscScalar        _DOne = 1.0, _DZero = 0.0;

2504:   PetscBLASIntCast(C->rmap->n, &m);
2505:   PetscBLASIntCast(C->cmap->n, &n);
2506:   PetscBLASIntCast(A->rmap->n, &k);
2507:   if (!m || !n || !k) return 0;
2508:   MatDenseGetArrayRead(A, &av);
2509:   MatDenseGetArrayRead(B, &bv);
2510:   MatDenseGetArrayWrite(C, &cv);
2511:   PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2512:   MatDenseRestoreArrayRead(A, &av);
2513:   MatDenseRestoreArrayRead(B, &bv);
2514:   MatDenseRestoreArrayWrite(C, &cv);
2515:   PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2516:   return 0;
2517: }

2519: /* ----------------------------------------------- */
2520: static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C)
2521: {
2522:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense;
2523:   C->ops->productsymbolic = MatProductSymbolic_AB;
2524:   return 0;
2525: }

2527: static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C)
2528: {
2529:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense;
2530:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
2531:   return 0;
2532: }

2534: static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C)
2535: {
2536:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense;
2537:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2538:   return 0;
2539: }

2541: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C)
2542: {
2543:   Mat_Product *product = C->product;

2545:   switch (product->type) {
2546:   case MATPRODUCT_AB:
2547:     MatProductSetFromOptions_SeqDense_AB(C);
2548:     break;
2549:   case MATPRODUCT_AtB:
2550:     MatProductSetFromOptions_SeqDense_AtB(C);
2551:     break;
2552:   case MATPRODUCT_ABt:
2553:     MatProductSetFromOptions_SeqDense_ABt(C);
2554:     break;
2555:   default:
2556:     break;
2557:   }
2558:   return 0;
2559: }
2560: /* ----------------------------------------------- */

2562: static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[])
2563: {
2564:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2565:   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2566:   PetscScalar       *x;
2567:   const PetscScalar *aa;

2570:   VecGetArray(v, &x);
2571:   VecGetLocalSize(v, &p);
2572:   MatDenseGetArrayRead(A, &aa);
2574:   for (i = 0; i < m; i++) {
2575:     x[i] = aa[i];
2576:     if (idx) idx[i] = 0;
2577:     for (j = 1; j < n; j++) {
2578:       if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) {
2579:         x[i] = aa[i + a->lda * j];
2580:         if (idx) idx[i] = j;
2581:       }
2582:     }
2583:   }
2584:   MatDenseRestoreArrayRead(A, &aa);
2585:   VecRestoreArray(v, &x);
2586:   return 0;
2587: }

2589: static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[])
2590: {
2591:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2592:   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2593:   PetscScalar       *x;
2594:   PetscReal          atmp;
2595:   const PetscScalar *aa;

2598:   VecGetArray(v, &x);
2599:   VecGetLocalSize(v, &p);
2600:   MatDenseGetArrayRead(A, &aa);
2602:   for (i = 0; i < m; i++) {
2603:     x[i] = PetscAbsScalar(aa[i]);
2604:     for (j = 1; j < n; j++) {
2605:       atmp = PetscAbsScalar(aa[i + a->lda * j]);
2606:       if (PetscAbsScalar(x[i]) < atmp) {
2607:         x[i] = atmp;
2608:         if (idx) idx[i] = j;
2609:       }
2610:     }
2611:   }
2612:   MatDenseRestoreArrayRead(A, &aa);
2613:   VecRestoreArray(v, &x);
2614:   return 0;
2615: }

2617: static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[])
2618: {
2619:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2620:   PetscInt           i, j, m = A->rmap->n, n = A->cmap->n, p;
2621:   PetscScalar       *x;
2622:   const PetscScalar *aa;

2625:   MatDenseGetArrayRead(A, &aa);
2626:   VecGetArray(v, &x);
2627:   VecGetLocalSize(v, &p);
2629:   for (i = 0; i < m; i++) {
2630:     x[i] = aa[i];
2631:     if (idx) idx[i] = 0;
2632:     for (j = 1; j < n; j++) {
2633:       if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) {
2634:         x[i] = aa[i + a->lda * j];
2635:         if (idx) idx[i] = j;
2636:       }
2637:     }
2638:   }
2639:   VecRestoreArray(v, &x);
2640:   MatDenseRestoreArrayRead(A, &aa);
2641:   return 0;
2642: }

2644: PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col)
2645: {
2646:   Mat_SeqDense      *a = (Mat_SeqDense *)A->data;
2647:   PetscScalar       *x;
2648:   const PetscScalar *aa;

2651:   MatDenseGetArrayRead(A, &aa);
2652:   VecGetArray(v, &x);
2653:   PetscArraycpy(x, aa + col * a->lda, A->rmap->n);
2654:   VecRestoreArray(v, &x);
2655:   MatDenseRestoreArrayRead(A, &aa);
2656:   return 0;
2657: }

2659: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions)
2660: {
2661:   PetscInt           i, j, m, n;
2662:   const PetscScalar *a;

2664:   MatGetSize(A, &m, &n);
2665:   PetscArrayzero(reductions, n);
2666:   MatDenseGetArrayRead(A, &a);
2667:   if (type == NORM_2) {
2668:     for (i = 0; i < n; i++) {
2669:       for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]);
2670:       a += m;
2671:     }
2672:   } else if (type == NORM_1) {
2673:     for (i = 0; i < n; i++) {
2674:       for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]);
2675:       a += m;
2676:     }
2677:   } else if (type == NORM_INFINITY) {
2678:     for (i = 0; i < n; i++) {
2679:       for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]);
2680:       a += m;
2681:     }
2682:   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2683:     for (i = 0; i < n; i++) {
2684:       for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]);
2685:       a += m;
2686:     }
2687:   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2688:     for (i = 0; i < n; i++) {
2689:       for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]);
2690:       a += m;
2691:     }
2692:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2693:   MatDenseRestoreArrayRead(A, &a);
2694:   if (type == NORM_2) {
2695:     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2696:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2697:     for (i = 0; i < n; i++) reductions[i] /= m;
2698:   }
2699:   return 0;
2700: }

2702: PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx)
2703: {
2704:   PetscScalar *a;
2705:   PetscInt     lda, m, n, i, j;

2707:   MatGetSize(x, &m, &n);
2708:   MatDenseGetLDA(x, &lda);
2709:   MatDenseGetArrayWrite(x, &a);
2710:   for (j = 0; j < n; j++) {
2711:     for (i = 0; i < m; i++) PetscRandomGetValue(rctx, a + j * lda + i);
2712:   }
2713:   MatDenseRestoreArrayWrite(x, &a);
2714:   return 0;
2715: }

2717: static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d)
2718: {
2719:   *missing = PETSC_FALSE;
2720:   return 0;
2721: }

2723: /* vals is not const */
2724: static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals)
2725: {
2726:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2727:   PetscScalar  *v;

2730:   MatDenseGetArray(A, &v);
2731:   *vals = v + col * a->lda;
2732:   MatDenseRestoreArray(A, &v);
2733:   return 0;
2734: }

2736: static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals)
2737: {
2738:   if (vals) *vals = NULL; /* user cannot accidentally use the array later */
2739:   return 0;
2740: }

2742: /* -------------------------------------------------------------------*/
2743: static struct _MatOps MatOps_Values = {MatSetValues_SeqDense,
2744:                                        MatGetRow_SeqDense,
2745:                                        MatRestoreRow_SeqDense,
2746:                                        MatMult_SeqDense,
2747:                                        /*  4*/ MatMultAdd_SeqDense,
2748:                                        MatMultTranspose_SeqDense,
2749:                                        MatMultTransposeAdd_SeqDense,
2750:                                        NULL,
2751:                                        NULL,
2752:                                        NULL,
2753:                                        /* 10*/ NULL,
2754:                                        MatLUFactor_SeqDense,
2755:                                        MatCholeskyFactor_SeqDense,
2756:                                        MatSOR_SeqDense,
2757:                                        MatTranspose_SeqDense,
2758:                                        /* 15*/ MatGetInfo_SeqDense,
2759:                                        MatEqual_SeqDense,
2760:                                        MatGetDiagonal_SeqDense,
2761:                                        MatDiagonalScale_SeqDense,
2762:                                        MatNorm_SeqDense,
2763:                                        /* 20*/ MatAssemblyBegin_SeqDense,
2764:                                        MatAssemblyEnd_SeqDense,
2765:                                        MatSetOption_SeqDense,
2766:                                        MatZeroEntries_SeqDense,
2767:                                        /* 24*/ MatZeroRows_SeqDense,
2768:                                        NULL,
2769:                                        NULL,
2770:                                        NULL,
2771:                                        NULL,
2772:                                        /* 29*/ MatSetUp_SeqDense,
2773:                                        NULL,
2774:                                        NULL,
2775:                                        NULL,
2776:                                        NULL,
2777:                                        /* 34*/ MatDuplicate_SeqDense,
2778:                                        NULL,
2779:                                        NULL,
2780:                                        NULL,
2781:                                        NULL,
2782:                                        /* 39*/ MatAXPY_SeqDense,
2783:                                        MatCreateSubMatrices_SeqDense,
2784:                                        NULL,
2785:                                        MatGetValues_SeqDense,
2786:                                        MatCopy_SeqDense,
2787:                                        /* 44*/ MatGetRowMax_SeqDense,
2788:                                        MatScale_SeqDense,
2789:                                        MatShift_SeqDense,
2790:                                        NULL,
2791:                                        MatZeroRowsColumns_SeqDense,
2792:                                        /* 49*/ MatSetRandom_SeqDense,
2793:                                        NULL,
2794:                                        NULL,
2795:                                        NULL,
2796:                                        NULL,
2797:                                        /* 54*/ NULL,
2798:                                        NULL,
2799:                                        NULL,
2800:                                        NULL,
2801:                                        NULL,
2802:                                        /* 59*/ MatCreateSubMatrix_SeqDense,
2803:                                        MatDestroy_SeqDense,
2804:                                        MatView_SeqDense,
2805:                                        NULL,
2806:                                        NULL,
2807:                                        /* 64*/ NULL,
2808:                                        NULL,
2809:                                        NULL,
2810:                                        NULL,
2811:                                        NULL,
2812:                                        /* 69*/ MatGetRowMaxAbs_SeqDense,
2813:                                        NULL,
2814:                                        NULL,
2815:                                        NULL,
2816:                                        NULL,
2817:                                        /* 74*/ NULL,
2818:                                        NULL,
2819:                                        NULL,
2820:                                        NULL,
2821:                                        NULL,
2822:                                        /* 79*/ NULL,
2823:                                        NULL,
2824:                                        NULL,
2825:                                        NULL,
2826:                                        /* 83*/ MatLoad_SeqDense,
2827:                                        MatIsSymmetric_SeqDense,
2828:                                        MatIsHermitian_SeqDense,
2829:                                        NULL,
2830:                                        NULL,
2831:                                        NULL,
2832:                                        /* 89*/ NULL,
2833:                                        NULL,
2834:                                        MatMatMultNumeric_SeqDense_SeqDense,
2835:                                        NULL,
2836:                                        NULL,
2837:                                        /* 94*/ NULL,
2838:                                        NULL,
2839:                                        NULL,
2840:                                        MatMatTransposeMultNumeric_SeqDense_SeqDense,
2841:                                        NULL,
2842:                                        /* 99*/ MatProductSetFromOptions_SeqDense,
2843:                                        NULL,
2844:                                        NULL,
2845:                                        MatConjugate_SeqDense,
2846:                                        NULL,
2847:                                        /*104*/ NULL,
2848:                                        MatRealPart_SeqDense,
2849:                                        MatImaginaryPart_SeqDense,
2850:                                        NULL,
2851:                                        NULL,
2852:                                        /*109*/ NULL,
2853:                                        NULL,
2854:                                        MatGetRowMin_SeqDense,
2855:                                        MatGetColumnVector_SeqDense,
2856:                                        MatMissingDiagonal_SeqDense,
2857:                                        /*114*/ NULL,
2858:                                        NULL,
2859:                                        NULL,
2860:                                        NULL,
2861:                                        NULL,
2862:                                        /*119*/ NULL,
2863:                                        NULL,
2864:                                        NULL,
2865:                                        NULL,
2866:                                        NULL,
2867:                                        /*124*/ NULL,
2868:                                        MatGetColumnReductions_SeqDense,
2869:                                        NULL,
2870:                                        NULL,
2871:                                        NULL,
2872:                                        /*129*/ NULL,
2873:                                        NULL,
2874:                                        NULL,
2875:                                        MatTransposeMatMultNumeric_SeqDense_SeqDense,
2876:                                        NULL,
2877:                                        /*134*/ NULL,
2878:                                        NULL,
2879:                                        NULL,
2880:                                        NULL,
2881:                                        NULL,
2882:                                        /*139*/ NULL,
2883:                                        NULL,
2884:                                        NULL,
2885:                                        NULL,
2886:                                        NULL,
2887:                                        MatCreateMPIMatConcatenateSeqMat_SeqDense,
2888:                                        /*145*/ NULL,
2889:                                        NULL,
2890:                                        NULL,
2891:                                        NULL,
2892:                                        NULL,
2893:                                        /*150*/ NULL};

2895: /*@C
2896:    MatCreateSeqDense - Creates a `MATSEQDENSE` that
2897:    is stored in column major order (the usual Fortran 77 manner). Many
2898:    of the matrix operations use the BLAS and LAPACK routines.

2900:    Collective

2902:    Input Parameters:
2903: +  comm - MPI communicator, set to `PETSC_COMM_SELF`
2904: .  m - number of rows
2905: .  n - number of columns
2906: -  data - optional location of matrix data in column major order.  Set data=NULL for PETSc
2907:    to control all matrix memory allocation.

2909:    Output Parameter:
2910: .  A - the matrix

2912:    Note:
2913:    The data input variable is intended primarily for Fortran programmers
2914:    who wish to allocate their own matrix memory space.  Most users should
2915:    set data=NULL.

2917:    Level: intermediate

2919: .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
2920: @*/
2921: PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A)
2922: {
2923:   MatCreate(comm, A);
2924:   MatSetSizes(*A, m, n, m, n);
2925:   MatSetType(*A, MATSEQDENSE);
2926:   MatSeqDenseSetPreallocation(*A, data);
2927:   return 0;
2928: }

2930: /*@C
2931:    MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix

2933:    Collective

2935:    Input Parameters:
2936: +  B - the matrix
2937: -  data - the array (or NULL)

2939:    Note:
2940:    The data input variable is intended primarily for Fortran programmers
2941:    who wish to allocate their own matrix memory space.  Most users should
2942:    need not call this routine.

2944:    Level: intermediate

2946: .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()`
2947: @*/
2948: PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[])
2949: {
2951:   PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data));
2952:   return 0;
2953: }

2955: PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data)
2956: {
2957:   Mat_SeqDense *b = (Mat_SeqDense *)B->data;

2960:   B->preallocated = PETSC_TRUE;

2962:   PetscLayoutSetUp(B->rmap);
2963:   PetscLayoutSetUp(B->cmap);

2965:   if (b->lda <= 0) b->lda = B->rmap->n;

2967:   if (!data) { /* petsc-allocated storage */
2968:     if (!b->user_alloc) PetscFree(b->v);
2969:     PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v);

2971:     b->user_alloc = PETSC_FALSE;
2972:   } else { /* user-allocated storage */
2973:     if (!b->user_alloc) PetscFree(b->v);
2974:     b->v          = data;
2975:     b->user_alloc = PETSC_TRUE;
2976:   }
2977:   B->assembled = PETSC_TRUE;
2978:   return 0;
2979: }

2981: #if defined(PETSC_HAVE_ELEMENTAL)
2982: PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2983: {
2984:   Mat                mat_elemental;
2985:   const PetscScalar *array;
2986:   PetscScalar       *v_colwise;
2987:   PetscInt           M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols;

2989:   PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols);
2990:   MatDenseGetArrayRead(A, &array);
2991:   /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */
2992:   k = 0;
2993:   for (j = 0; j < N; j++) {
2994:     cols[j] = j;
2995:     for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++];
2996:   }
2997:   for (i = 0; i < M; i++) rows[i] = i;
2998:   MatDenseRestoreArrayRead(A, &array);

3000:   MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
3001:   MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N);
3002:   MatSetType(mat_elemental, MATELEMENTAL);
3003:   MatSetUp(mat_elemental);

3005:   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
3006:   MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES);
3007:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
3008:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
3009:   PetscFree3(v_colwise, rows, cols);

3011:   if (reuse == MAT_INPLACE_MATRIX) {
3012:     MatHeaderReplace(A, &mat_elemental);
3013:   } else {
3014:     *newmat = mat_elemental;
3015:   }
3016:   return 0;
3017: }
3018: #endif

3020: PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda)
3021: {
3022:   Mat_SeqDense *b = (Mat_SeqDense *)B->data;
3023:   PetscBool     data;

3025:   data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE);
3028:   b->lda = lda;
3029:   return 0;
3030: }

3032: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3033: {
3034:   MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat);
3035:   return 0;
3036: }

3038: PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3039: {
3040:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3044:   if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3045:   a->vecinuse = col + 1;
3046:   MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse);
3047:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3048:   *v = a->cvec;
3049:   return 0;
3050: }

3052: PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3053: {
3054:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3058:   a->vecinuse = 0;
3059:   MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse);
3060:   VecResetArray(a->cvec);
3061:   if (v) *v = NULL;
3062:   return 0;
3063: }

3065: PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3066: {
3067:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3071:   if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3072:   a->vecinuse = col + 1;
3073:   MatDenseGetArrayRead(A, &a->ptrinuse);
3074:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3075:   VecLockReadPush(a->cvec);
3076:   *v = a->cvec;
3077:   return 0;
3078: }

3080: PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3081: {
3082:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3086:   a->vecinuse = 0;
3087:   MatDenseRestoreArrayRead(A, &a->ptrinuse);
3088:   VecLockReadPop(a->cvec);
3089:   VecResetArray(a->cvec);
3090:   if (v) *v = NULL;
3091:   return 0;
3092: }

3094: PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3095: {
3096:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3100:   if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3101:   a->vecinuse = col + 1;
3102:   MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse);
3103:   VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3104:   *v = a->cvec;
3105:   return 0;
3106: }

3108: PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3109: {
3110:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3114:   a->vecinuse = 0;
3115:   MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse);
3116:   VecResetArray(a->cvec);
3117:   if (v) *v = NULL;
3118:   return 0;
3119: }

3121: PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3122: {
3123:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3127:   if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) MatDestroy(&a->cmat);
3128:   if (!a->cmat) {
3129:     MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat);
3130:   } else {
3131:     MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda);
3132:   }
3133:   MatDenseSetLDA(a->cmat, a->lda);
3134:   a->matinuse = cbegin + 1;
3135:   *v          = a->cmat;
3136: #if defined(PETSC_HAVE_CUDA)
3137:   A->offloadmask = PETSC_OFFLOAD_CPU;
3138: #endif
3139:   return 0;
3140: }

3142: PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v)
3143: {
3144:   Mat_SeqDense *a = (Mat_SeqDense *)A->data;

3149:   a->matinuse = 0;
3150:   MatDenseResetArray(a->cmat);
3151:   if (v) *v = NULL;
3152: #if defined(PETSC_HAVE_CUDA)
3153:   A->offloadmask = PETSC_OFFLOAD_CPU;
3154: #endif
3155:   return 0;
3156: }

3158: /*MC
3159:    MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices.

3161:    Options Database Keys:
3162: . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()`

3164:   Level: beginner

3166: .seealso: `MATSEQDENSE`, `MatCreateSeqDense()`
3167: M*/
3168: PetscErrorCode MatCreate_SeqDense(Mat B)
3169: {
3170:   Mat_SeqDense *b;
3171:   PetscMPIInt   size;

3173:   MPI_Comm_size(PetscObjectComm((PetscObject)B), &size);

3176:   PetscNew(&b);
3177:   PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps));
3178:   B->data = (void *)b;

3180:   b->roworiented = PETSC_TRUE;

3182:   PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense);
3183:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense);
3184:   PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense);
3185:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense);
3186:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense);
3187:   PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense);
3188:   PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense);
3189:   PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense);
3190:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense);
3191:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense);
3192:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense);
3193:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense);
3194:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ);
3195: #if defined(PETSC_HAVE_ELEMENTAL)
3196:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental);
3197: #endif
3198: #if defined(PETSC_HAVE_SCALAPACK)
3199:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK);
3200: #endif
3201: #if defined(PETSC_HAVE_CUDA)
3202:   PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA);
3203:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense);
3204:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense);
3205:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense);
3206: #endif
3207:   PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense);
3208:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense);
3209:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense);
3210:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense);
3211:   PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense);

3213:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense);
3214:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense);
3215:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense);
3216:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense);
3217:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense);
3218:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense);
3219:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense);
3220:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense);
3221:   PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense);
3222:   PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense);
3223:   PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE);
3224:   return 0;
3225: }

3227: /*@C
3228:    MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding.

3230:    Not Collective

3232:    Input Parameters:
3233: +  mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3234: -  col - column index

3236:    Output Parameter:
3237: .  vals - pointer to the data

3239:    Level: intermediate

3241:    Note:
3242:    Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec`

3244: .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()`
3245: @*/
3246: PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals)
3247: {
3251:   PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals));
3252:   return 0;
3253: }

3255: /*@C
3256:    MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`.

3258:    Not Collective

3260:    Input Parameters:
3261: +  mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3262: -  vals - pointer to the data (may be NULL)

3264:    Level: intermediate

3266: .seealso: `MATDENSE`, `MatDenseGetColumn()`
3267: @*/
3268: PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals)
3269: {
3272:   PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals));
3273:   return 0;
3274: }

3276: /*@
3277:    MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`.

3279:    Collective

3281:    Input Parameters:
3282: +  mat - the `Mat` object
3283: -  col - the column index

3285:    Output Parameter:
3286: .  v - the vector

3288:    Notes:
3289:      The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed.

3291:      Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access.

3293:    Level: intermediate

3295: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()`
3296: @*/
3297: PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v)
3298: {
3305:   PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3306:   return 0;
3307: }

3309: /*@
3310:    MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec().

3312:    Collective

3314:    Input Parameters:
3315: +  mat - the Mat object
3316: .  col - the column index
3317: -  v - the Vec object (may be NULL)

3319:    Level: intermediate

3321: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3322: @*/
3323: PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v)
3324: {
3330:   PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3331:   return 0;
3332: }

3334: /*@
3335:    MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec.

3337:    Collective

3339:    Input Parameters:
3340: +  mat - the Mat object
3341: -  col - the column index

3343:    Output Parameter:
3344: .  v - the vector

3346:    Notes:
3347:      The vector is owned by PETSc and users cannot modify it.

3349:      Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed.

3351:      Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access.

3353:    Level: intermediate

3355: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3356: @*/
3357: PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v)
3358: {
3365:   PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3366:   return 0;
3367: }

3369: /*@
3370:    MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead().

3372:    Collective

3374:    Input Parameters:
3375: +  mat - the Mat object
3376: .  col - the column index
3377: -  v - the Vec object (may be NULL)

3379:    Level: intermediate

3381: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()`
3382: @*/
3383: PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v)
3384: {
3390:   PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3391:   return 0;
3392: }

3394: /*@
3395:    MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec.

3397:    Collective

3399:    Input Parameters:
3400: +  mat - the Mat object
3401: -  col - the column index

3403:    Output Parameter:
3404: .  v - the vector

3406:    Notes:
3407:      The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed.

3409:      Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access.

3411:    Level: intermediate

3413: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3414: @*/
3415: PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v)
3416: {
3423:   PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3424:   return 0;
3425: }

3427: /*@
3428:    MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite().

3430:    Collective

3432:    Input Parameters:
3433: +  mat - the Mat object
3434: .  col - the column index
3435: -  v - the Vec object (may be NULL)

3437:    Level: intermediate

3439: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`
3440: @*/
3441: PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v)
3442: {
3448:   PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3449:   return 0;
3450: }

3452: /*@
3453:    MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat.

3455:    Collective

3457:    Input Parameters:
3458: +  mat - the Mat object
3459: .  rbegin - the first global row index in the block (if PETSC_DECIDE, is 0)
3460: .  rend - the global row index past the last one in the block (if PETSC_DECIDE, is M)
3461: .  cbegin - the first global column index in the block (if PETSC_DECIDE, is 0)
3462: -  cend - the global column index past the last one in the block (if PETSC_DECIDE, is N)

3464:    Output Parameter:
3465: .  v - the matrix

3467:    Notes:
3468:      The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed.

3470:      The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows.

3472:    Level: intermediate

3474: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()`
3475: @*/
3476: PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3477: {
3485:   if (rbegin == PETSC_DECIDE) rbegin = 0;
3486:   if (rend == PETSC_DECIDE) rend = A->rmap->N;
3487:   if (cbegin == PETSC_DECIDE) cbegin = 0;
3488:   if (cend == PETSC_DECIDE) cend = A->cmap->N;
3494:   PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v));
3495:   return 0;
3496: }

3498: /*@
3499:    MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix().

3501:    Collective

3503:    Input Parameters:
3504: +  mat - the Mat object
3505: -  v - the Mat object (may be NULL)

3507:    Level: intermediate

3509: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()`
3510: @*/
3511: PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v)
3512: {
3516:   PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v));
3517:   return 0;
3518: }

3520: #include <petscblaslapack.h>
3521: #include <petsc/private/kernels/blockinvert.h>

3523: PetscErrorCode MatSeqDenseInvert(Mat A)
3524: {
3525:   Mat_SeqDense   *a              = (Mat_SeqDense *)A->data;
3526:   PetscInt        bs             = A->rmap->n;
3527:   MatScalar      *values         = a->v;
3528:   const PetscReal shift          = 0.0;
3529:   PetscBool       allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE;

3531:   /* factor and invert each block */
3532:   switch (bs) {
3533:   case 1:
3534:     values[0] = (PetscScalar)1.0 / (values[0] + shift);
3535:     break;
3536:   case 2:
3537:     PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected);
3538:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3539:     break;
3540:   case 3:
3541:     PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected);
3542:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3543:     break;
3544:   case 4:
3545:     PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected);
3546:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3547:     break;
3548:   case 5: {
3549:     PetscScalar work[25];
3550:     PetscInt    ipvt[5];

3552:     PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected);
3553:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3554:   } break;
3555:   case 6:
3556:     PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected);
3557:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3558:     break;
3559:   case 7:
3560:     PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected);
3561:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3562:     break;
3563:   default: {
3564:     PetscInt    *v_pivots, *IJ, j;
3565:     PetscScalar *v_work;

3567:     PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ);
3568:     for (j = 0; j < bs; j++) IJ[j] = j;
3569:     PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected);
3570:     if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3571:     PetscFree3(v_work, v_pivots, IJ);
3572:   }
3573:   }
3574:   return 0;
3575: }