Actual source code: dense.c
1: /*
2: Defines the basic matrix operations for sequential dense.
3: */
5: #include <../src/mat/impls/dense/seq/dense.h>
6: #include <petscblaslapack.h>
7: #include <../src/mat/impls/aij/seq/aij.h>
9: PetscErrorCode MatSeqDenseSymmetrize_Private(Mat A, PetscBool hermitian)
10: {
11: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
12: PetscInt j, k, n = A->rmap->n;
13: PetscScalar *v;
16: MatDenseGetArray(A, &v);
17: if (!hermitian) {
18: for (k = 0; k < n; k++) {
19: for (j = k; j < n; j++) v[j * mat->lda + k] = v[k * mat->lda + j];
20: }
21: } else {
22: for (k = 0; k < n; k++) {
23: for (j = k; j < n; j++) v[j * mat->lda + k] = PetscConj(v[k * mat->lda + j]);
24: }
25: }
26: MatDenseRestoreArray(A, &v);
27: return 0;
28: }
30: PETSC_EXTERN PetscErrorCode MatSeqDenseInvertFactors_Private(Mat A)
31: {
32: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
33: PetscBLASInt info, n;
35: if (!A->rmap->n || !A->cmap->n) return 0;
36: PetscBLASIntCast(A->cmap->n, &n);
37: if (A->factortype == MAT_FACTOR_LU) {
39: if (!mat->fwork) {
40: mat->lfwork = n;
41: PetscMalloc1(mat->lfwork, &mat->fwork);
42: }
43: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
44: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
45: PetscFPTrapPop();
46: PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
47: } else if (A->factortype == MAT_FACTOR_CHOLESKY) {
48: if (A->spd == PETSC_BOOL3_TRUE) {
49: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
50: PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &n, mat->v, &mat->lda, &info));
51: PetscFPTrapPop();
52: MatSeqDenseSymmetrize_Private(A, PETSC_TRUE);
53: #if defined(PETSC_USE_COMPLEX)
54: } else if (A->hermitian == PETSC_BOOL3_TRUE) {
57: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
58: PetscCallBLAS("LAPACKhetri", LAPACKhetri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
59: PetscFPTrapPop();
60: MatSeqDenseSymmetrize_Private(A, PETSC_TRUE);
61: #endif
62: } else { /* symmetric case */
65: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
66: PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &info));
67: PetscFPTrapPop();
68: MatSeqDenseSymmetrize_Private(A, PETSC_FALSE);
69: }
71: PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
72: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix must be factored to solve");
74: A->ops->solve = NULL;
75: A->ops->matsolve = NULL;
76: A->ops->solvetranspose = NULL;
77: A->ops->matsolvetranspose = NULL;
78: A->ops->solveadd = NULL;
79: A->ops->solvetransposeadd = NULL;
80: A->factortype = MAT_FACTOR_NONE;
81: PetscFree(A->solvertype);
82: return 0;
83: }
85: PetscErrorCode MatZeroRowsColumns_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
86: {
87: Mat_SeqDense *l = (Mat_SeqDense *)A->data;
88: PetscInt m = l->lda, n = A->cmap->n, r = A->rmap->n, i, j;
89: PetscScalar *slot, *bb, *v;
90: const PetscScalar *xx;
92: if (PetscDefined(USE_DEBUG)) {
93: for (i = 0; i < N; i++) {
97: }
98: }
99: if (!N) return 0;
101: /* fix right hand side if needed */
102: if (x && b) {
103: Vec xt;
106: VecDuplicate(x, &xt);
107: VecCopy(x, xt);
108: VecScale(xt, -1.0);
109: MatMultAdd(A, xt, b, b);
110: VecDestroy(&xt);
111: VecGetArrayRead(x, &xx);
112: VecGetArray(b, &bb);
113: for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
114: VecRestoreArrayRead(x, &xx);
115: VecRestoreArray(b, &bb);
116: }
118: MatDenseGetArray(A, &v);
119: for (i = 0; i < N; i++) {
120: slot = v + rows[i] * m;
121: PetscArrayzero(slot, r);
122: }
123: for (i = 0; i < N; i++) {
124: slot = v + rows[i];
125: for (j = 0; j < n; j++) {
126: *slot = 0.0;
127: slot += m;
128: }
129: }
130: if (diag != 0.0) {
132: for (i = 0; i < N; i++) {
133: slot = v + (m + 1) * rows[i];
134: *slot = diag;
135: }
136: }
137: MatDenseRestoreArray(A, &v);
138: return 0;
139: }
141: PetscErrorCode MatPtAPNumeric_SeqDense_SeqDense(Mat A, Mat P, Mat C)
142: {
143: Mat_SeqDense *c = (Mat_SeqDense *)(C->data);
145: if (c->ptapwork) {
146: (*C->ops->matmultnumeric)(A, P, c->ptapwork);
147: (*C->ops->transposematmultnumeric)(P, c->ptapwork, C);
148: } else SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Must call MatPtAPSymbolic_SeqDense_SeqDense() first");
149: return 0;
150: }
152: PetscErrorCode MatPtAPSymbolic_SeqDense_SeqDense(Mat A, Mat P, PetscReal fill, Mat C)
153: {
154: Mat_SeqDense *c;
155: PetscBool cisdense;
157: MatSetSizes(C, P->cmap->n, P->cmap->n, P->cmap->N, P->cmap->N);
158: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
159: if (!cisdense) {
160: PetscBool flg;
162: PetscObjectTypeCompare((PetscObject)P, ((PetscObject)A)->type_name, &flg);
163: MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
164: }
165: MatSetUp(C);
166: c = (Mat_SeqDense *)C->data;
167: MatCreate(PetscObjectComm((PetscObject)A), &c->ptapwork);
168: MatSetSizes(c->ptapwork, A->rmap->n, P->cmap->n, A->rmap->N, P->cmap->N);
169: MatSetType(c->ptapwork, ((PetscObject)C)->type_name);
170: MatSetUp(c->ptapwork);
171: return 0;
172: }
174: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
175: {
176: Mat B = NULL;
177: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
178: Mat_SeqDense *b;
179: PetscInt *ai = a->i, *aj = a->j, m = A->rmap->N, n = A->cmap->N, i;
180: const MatScalar *av;
181: PetscBool isseqdense;
183: if (reuse == MAT_REUSE_MATRIX) {
184: PetscObjectTypeCompare((PetscObject)*newmat, MATSEQDENSE, &isseqdense);
186: }
187: if (reuse != MAT_REUSE_MATRIX) {
188: MatCreate(PetscObjectComm((PetscObject)A), &B);
189: MatSetSizes(B, m, n, m, n);
190: MatSetType(B, MATSEQDENSE);
191: MatSeqDenseSetPreallocation(B, NULL);
192: b = (Mat_SeqDense *)(B->data);
193: } else {
194: b = (Mat_SeqDense *)((*newmat)->data);
195: PetscArrayzero(b->v, m * n);
196: }
197: MatSeqAIJGetArrayRead(A, &av);
198: for (i = 0; i < m; i++) {
199: PetscInt j;
200: for (j = 0; j < ai[1] - ai[0]; j++) {
201: b->v[*aj * m + i] = *av;
202: aj++;
203: av++;
204: }
205: ai++;
206: }
207: MatSeqAIJRestoreArrayRead(A, &av);
209: if (reuse == MAT_INPLACE_MATRIX) {
210: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
211: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
212: MatHeaderReplace(A, &B);
213: } else {
214: if (B) *newmat = B;
215: MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY);
216: MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY);
217: }
218: return 0;
219: }
221: PETSC_INTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
222: {
223: Mat B = NULL;
224: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
225: PetscInt i, j;
226: PetscInt *rows, *nnz;
227: MatScalar *aa = a->v, *vals;
229: PetscCalloc3(A->rmap->n, &rows, A->rmap->n, &nnz, A->rmap->n, &vals);
230: if (reuse != MAT_REUSE_MATRIX) {
231: MatCreate(PetscObjectComm((PetscObject)A), &B);
232: MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N);
233: MatSetType(B, MATSEQAIJ);
234: for (j = 0; j < A->cmap->n; j++) {
235: for (i = 0; i < A->rmap->n; i++)
236: if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) ++nnz[i];
237: aa += a->lda;
238: }
239: MatSeqAIJSetPreallocation(B, PETSC_DETERMINE, nnz);
240: } else B = *newmat;
241: aa = a->v;
242: for (j = 0; j < A->cmap->n; j++) {
243: PetscInt numRows = 0;
244: for (i = 0; i < A->rmap->n; i++)
245: if (aa[i] != 0.0 || (i == j && A->cmap->n == A->rmap->n)) {
246: rows[numRows] = i;
247: vals[numRows++] = aa[i];
248: }
249: MatSetValues(B, numRows, rows, 1, &j, vals, INSERT_VALUES);
250: aa += a->lda;
251: }
252: PetscFree3(rows, nnz, vals);
253: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
254: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
256: if (reuse == MAT_INPLACE_MATRIX) {
257: MatHeaderReplace(A, &B);
258: } else if (reuse != MAT_REUSE_MATRIX) *newmat = B;
259: return 0;
260: }
262: PetscErrorCode MatAXPY_SeqDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
263: {
264: Mat_SeqDense *x = (Mat_SeqDense *)X->data, *y = (Mat_SeqDense *)Y->data;
265: const PetscScalar *xv;
266: PetscScalar *yv;
267: PetscBLASInt N, m, ldax = 0, lday = 0, one = 1;
269: MatDenseGetArrayRead(X, &xv);
270: MatDenseGetArray(Y, &yv);
271: PetscBLASIntCast(X->rmap->n * X->cmap->n, &N);
272: PetscBLASIntCast(X->rmap->n, &m);
273: PetscBLASIntCast(x->lda, &ldax);
274: PetscBLASIntCast(y->lda, &lday);
275: if (ldax > m || lday > m) {
276: PetscInt j;
278: for (j = 0; j < X->cmap->n; j++) PetscCallBLAS("BLASaxpy", BLASaxpy_(&m, &alpha, xv + j * ldax, &one, yv + j * lday, &one));
279: } else {
280: PetscCallBLAS("BLASaxpy", BLASaxpy_(&N, &alpha, xv, &one, yv, &one));
281: }
282: MatDenseRestoreArrayRead(X, &xv);
283: MatDenseRestoreArray(Y, &yv);
284: PetscLogFlops(PetscMax(2.0 * N - 1, 0));
285: return 0;
286: }
288: static PetscErrorCode MatGetInfo_SeqDense(Mat A, MatInfoType flag, MatInfo *info)
289: {
290: PetscLogDouble N = A->rmap->n * A->cmap->n;
292: info->block_size = 1.0;
293: info->nz_allocated = N;
294: info->nz_used = N;
295: info->nz_unneeded = 0;
296: info->assemblies = A->num_ass;
297: info->mallocs = 0;
298: info->memory = 0; /* REVIEW ME */
299: info->fill_ratio_given = 0;
300: info->fill_ratio_needed = 0;
301: info->factor_mallocs = 0;
302: return 0;
303: }
305: PetscErrorCode MatScale_SeqDense(Mat A, PetscScalar alpha)
306: {
307: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
308: PetscScalar *v;
309: PetscBLASInt one = 1, j, nz, lda = 0;
311: MatDenseGetArray(A, &v);
312: PetscBLASIntCast(a->lda, &lda);
313: if (lda > A->rmap->n) {
314: PetscBLASIntCast(A->rmap->n, &nz);
315: for (j = 0; j < A->cmap->n; j++) PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v + j * lda, &one));
316: } else {
317: PetscBLASIntCast(A->rmap->n * A->cmap->n, &nz);
318: PetscCallBLAS("BLASscal", BLASscal_(&nz, &alpha, v, &one));
319: }
320: PetscLogFlops(A->rmap->n * A->cmap->n);
321: MatDenseRestoreArray(A, &v);
322: return 0;
323: }
325: PetscErrorCode MatShift_SeqDense(Mat A, PetscScalar alpha)
326: {
327: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
328: PetscScalar *v;
329: PetscInt j, k;
331: MatDenseGetArray(A, &v);
332: k = PetscMin(A->rmap->n, A->cmap->n);
333: for (j = 0; j < k; j++) v[j + j * a->lda] += alpha;
334: PetscLogFlops(k);
335: MatDenseRestoreArray(A, &v);
336: return 0;
337: }
339: static PetscErrorCode MatIsHermitian_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
340: {
341: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
342: PetscInt i, j, m = A->rmap->n, N = a->lda;
343: const PetscScalar *v;
345: *fl = PETSC_FALSE;
346: if (A->rmap->n != A->cmap->n) return 0;
347: MatDenseGetArrayRead(A, &v);
348: for (i = 0; i < m; i++) {
349: for (j = i; j < m; j++) {
350: if (PetscAbsScalar(v[i + j * N] - PetscConj(v[j + i * N])) > rtol) goto restore;
351: }
352: }
353: *fl = PETSC_TRUE;
354: restore:
355: MatDenseRestoreArrayRead(A, &v);
356: return 0;
357: }
359: static PetscErrorCode MatIsSymmetric_SeqDense(Mat A, PetscReal rtol, PetscBool *fl)
360: {
361: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
362: PetscInt i, j, m = A->rmap->n, N = a->lda;
363: const PetscScalar *v;
365: *fl = PETSC_FALSE;
366: if (A->rmap->n != A->cmap->n) return 0;
367: MatDenseGetArrayRead(A, &v);
368: for (i = 0; i < m; i++) {
369: for (j = i; j < m; j++) {
370: if (PetscAbsScalar(v[i + j * N] - v[j + i * N]) > rtol) goto restore;
371: }
372: }
373: *fl = PETSC_TRUE;
374: restore:
375: MatDenseRestoreArrayRead(A, &v);
376: return 0;
377: }
379: PetscErrorCode MatDuplicateNoCreate_SeqDense(Mat newi, Mat A, MatDuplicateOption cpvalues)
380: {
381: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
382: PetscInt lda = (PetscInt)mat->lda, j, m, nlda = lda;
383: PetscBool isdensecpu;
385: PetscLayoutReference(A->rmap, &newi->rmap);
386: PetscLayoutReference(A->cmap, &newi->cmap);
387: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { /* propagate LDA */
388: MatDenseSetLDA(newi, lda);
389: }
390: PetscObjectTypeCompare((PetscObject)newi, MATSEQDENSE, &isdensecpu);
391: if (isdensecpu) MatSeqDenseSetPreallocation(newi, NULL);
392: if (cpvalues == MAT_COPY_VALUES) {
393: const PetscScalar *av;
394: PetscScalar *v;
396: MatDenseGetArrayRead(A, &av);
397: MatDenseGetArrayWrite(newi, &v);
398: MatDenseGetLDA(newi, &nlda);
399: m = A->rmap->n;
400: if (lda > m || nlda > m) {
401: for (j = 0; j < A->cmap->n; j++) PetscArraycpy(v + j * nlda, av + j * lda, m);
402: } else {
403: PetscArraycpy(v, av, A->rmap->n * A->cmap->n);
404: }
405: MatDenseRestoreArrayWrite(newi, &v);
406: MatDenseRestoreArrayRead(A, &av);
407: }
408: return 0;
409: }
411: PetscErrorCode MatDuplicate_SeqDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
412: {
413: MatCreate(PetscObjectComm((PetscObject)A), newmat);
414: MatSetSizes(*newmat, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n);
415: MatSetType(*newmat, ((PetscObject)A)->type_name);
416: MatDuplicateNoCreate_SeqDense(*newmat, A, cpvalues);
417: return 0;
418: }
420: static PetscErrorCode MatSolve_SeqDense_Internal_LU(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
421: {
422: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
423: PetscBLASInt info;
425: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
426: PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_(T ? "T" : "N", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
427: PetscFPTrapPop();
429: PetscLogFlops(nrhs * (2.0 * m * m - m));
430: return 0;
431: }
433: static PetscErrorCode MatConjugate_SeqDense(Mat);
435: static PetscErrorCode MatSolve_SeqDense_Internal_Cholesky(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k, PetscBool T)
436: {
437: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
438: PetscBLASInt info;
440: if (A->spd == PETSC_BOOL3_TRUE) {
441: if (PetscDefined(USE_COMPLEX) && T) MatConjugate_SeqDense(A);
442: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
443: PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &m, &nrhs, mat->v, &mat->lda, x, &m, &info));
444: PetscFPTrapPop();
446: if (PetscDefined(USE_COMPLEX) && T) MatConjugate_SeqDense(A);
447: #if defined(PETSC_USE_COMPLEX)
448: } else if (A->hermitian == PETSC_BOOL3_TRUE) {
449: if (T) MatConjugate_SeqDense(A);
450: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
451: PetscCallBLAS("LAPACKhetrs", LAPACKhetrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
452: PetscFPTrapPop();
454: if (T) MatConjugate_SeqDense(A);
455: #endif
456: } else { /* symmetric case */
457: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
458: PetscCallBLAS("LAPACKsytrs", LAPACKsytrs_("L", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
459: PetscFPTrapPop();
461: }
462: PetscLogFlops(nrhs * (2.0 * m * m - m));
463: return 0;
464: }
466: static PetscErrorCode MatSolve_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
467: {
468: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
469: PetscBLASInt info;
470: char trans;
472: if (PetscDefined(USE_COMPLEX)) {
473: trans = 'C';
474: } else {
475: trans = 'T';
476: }
477: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
478: { /* lwork depends on the number of right-hand sides */
479: PetscBLASInt nlfwork, lfwork = -1;
480: PetscScalar fwork;
482: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
483: nlfwork = (PetscBLASInt)PetscRealPart(fwork);
484: if (nlfwork > mat->lfwork) {
485: mat->lfwork = nlfwork;
486: PetscFree(mat->fwork);
487: PetscMalloc1(mat->lfwork, &mat->fwork);
488: }
489: }
490: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", &trans, &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
491: PetscFPTrapPop();
493: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
494: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &mat->rank, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
495: PetscFPTrapPop();
497: for (PetscInt j = 0; j < nrhs; j++) {
498: for (PetscInt i = mat->rank; i < k; i++) x[j * ldx + i] = 0.;
499: }
500: PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)));
501: return 0;
502: }
504: static PetscErrorCode MatSolveTranspose_SeqDense_Internal_QR(Mat A, PetscScalar *x, PetscBLASInt ldx, PetscBLASInt m, PetscBLASInt nrhs, PetscBLASInt k)
505: {
506: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
507: PetscBLASInt info;
509: if (A->rmap->n == A->cmap->n && mat->rank == A->rmap->n) {
510: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
511: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &m, &nrhs, mat->v, &mat->lda, x, &ldx, &info));
512: PetscFPTrapPop();
514: if (PetscDefined(USE_COMPLEX)) MatConjugate_SeqDense(A);
515: { /* lwork depends on the number of right-hand sides */
516: PetscBLASInt nlfwork, lfwork = -1;
517: PetscScalar fwork;
519: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, &fwork, &lfwork, &info));
520: nlfwork = (PetscBLASInt)PetscRealPart(fwork);
521: if (nlfwork > mat->lfwork) {
522: mat->lfwork = nlfwork;
523: PetscFree(mat->fwork);
524: PetscMalloc1(mat->lfwork, &mat->fwork);
525: }
526: }
527: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
528: PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "N", &m, &nrhs, &mat->rank, mat->v, &mat->lda, mat->tau, x, &ldx, mat->fwork, &mat->lfwork, &info));
529: PetscFPTrapPop();
531: if (PetscDefined(USE_COMPLEX)) MatConjugate_SeqDense(A);
532: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "QR factored matrix cannot be used for transpose solve");
533: PetscLogFlops(nrhs * (4.0 * m * mat->rank - PetscSqr(mat->rank)));
534: return 0;
535: }
537: static PetscErrorCode MatSolve_SeqDense_SetUp(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
538: {
539: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
540: PetscScalar *y;
541: PetscBLASInt m = 0, k = 0;
543: PetscBLASIntCast(A->rmap->n, &m);
544: PetscBLASIntCast(A->cmap->n, &k);
545: if (k < m) {
546: VecCopy(xx, mat->qrrhs);
547: VecGetArray(mat->qrrhs, &y);
548: } else {
549: VecCopy(xx, yy);
550: VecGetArray(yy, &y);
551: }
552: *_y = y;
553: *_k = k;
554: *_m = m;
555: return 0;
556: }
558: static PetscErrorCode MatSolve_SeqDense_TearDown(Mat A, Vec xx, Vec yy, PetscScalar **_y, PetscBLASInt *_m, PetscBLASInt *_k)
559: {
560: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
561: PetscScalar *y = NULL;
562: PetscBLASInt m, k;
564: y = *_y;
565: *_y = NULL;
566: k = *_k;
567: m = *_m;
568: if (k < m) {
569: PetscScalar *yv;
570: VecGetArray(yy, &yv);
571: PetscArraycpy(yv, y, k);
572: VecRestoreArray(yy, &yv);
573: VecRestoreArray(mat->qrrhs, &y);
574: } else {
575: VecRestoreArray(yy, &y);
576: }
577: return 0;
578: }
580: static PetscErrorCode MatSolve_SeqDense_LU(Mat A, Vec xx, Vec yy)
581: {
582: PetscScalar *y = NULL;
583: PetscBLASInt m = 0, k = 0;
585: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
586: MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_FALSE);
587: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
588: return 0;
589: }
591: static PetscErrorCode MatSolveTranspose_SeqDense_LU(Mat A, Vec xx, Vec yy)
592: {
593: PetscScalar *y = NULL;
594: PetscBLASInt m = 0, k = 0;
596: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
597: MatSolve_SeqDense_Internal_LU(A, y, m, m, 1, k, PETSC_TRUE);
598: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
599: return 0;
600: }
602: static PetscErrorCode MatSolve_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
603: {
604: PetscScalar *y = NULL;
605: PetscBLASInt m = 0, k = 0;
607: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
608: MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_FALSE);
609: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
610: return 0;
611: }
613: static PetscErrorCode MatSolveTranspose_SeqDense_Cholesky(Mat A, Vec xx, Vec yy)
614: {
615: PetscScalar *y = NULL;
616: PetscBLASInt m = 0, k = 0;
618: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
619: MatSolve_SeqDense_Internal_Cholesky(A, y, m, m, 1, k, PETSC_TRUE);
620: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
621: return 0;
622: }
624: static PetscErrorCode MatSolve_SeqDense_QR(Mat A, Vec xx, Vec yy)
625: {
626: PetscScalar *y = NULL;
627: PetscBLASInt m = 0, k = 0;
629: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
630: MatSolve_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k);
631: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
632: return 0;
633: }
635: static PetscErrorCode MatSolveTranspose_SeqDense_QR(Mat A, Vec xx, Vec yy)
636: {
637: PetscScalar *y = NULL;
638: PetscBLASInt m = 0, k = 0;
640: MatSolve_SeqDense_SetUp(A, xx, yy, &y, &m, &k);
641: MatSolveTranspose_SeqDense_Internal_QR(A, y, PetscMax(m, k), m, 1, k);
642: MatSolve_SeqDense_TearDown(A, xx, yy, &y, &m, &k);
643: return 0;
644: }
646: static PetscErrorCode MatMatSolve_SeqDense_SetUp(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
647: {
648: const PetscScalar *b;
649: PetscScalar *y;
650: PetscInt n, _ldb, _ldx;
651: PetscBLASInt nrhs = 0, m = 0, k = 0, ldb = 0, ldx = 0, ldy = 0;
653: *_ldy = 0;
654: *_m = 0;
655: *_nrhs = 0;
656: *_k = 0;
657: *_y = NULL;
658: PetscBLASIntCast(A->rmap->n, &m);
659: PetscBLASIntCast(A->cmap->n, &k);
660: MatGetSize(B, NULL, &n);
661: PetscBLASIntCast(n, &nrhs);
662: MatDenseGetLDA(B, &_ldb);
663: PetscBLASIntCast(_ldb, &ldb);
664: MatDenseGetLDA(X, &_ldx);
665: PetscBLASIntCast(_ldx, &ldx);
666: if (ldx < m) {
667: MatDenseGetArrayRead(B, &b);
668: PetscMalloc1(nrhs * m, &y);
669: if (ldb == m) {
670: PetscArraycpy(y, b, ldb * nrhs);
671: } else {
672: for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&y[j * m], &b[j * ldb], m);
673: }
674: ldy = m;
675: MatDenseRestoreArrayRead(B, &b);
676: } else {
677: if (ldb == ldx) {
678: MatCopy(B, X, SAME_NONZERO_PATTERN);
679: MatDenseGetArray(X, &y);
680: } else {
681: MatDenseGetArray(X, &y);
682: MatDenseGetArrayRead(B, &b);
683: for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&y[j * ldx], &b[j * ldb], m);
684: MatDenseRestoreArrayRead(B, &b);
685: }
686: ldy = ldx;
687: }
688: *_y = y;
689: *_ldy = ldy;
690: *_k = k;
691: *_m = m;
692: *_nrhs = nrhs;
693: return 0;
694: }
696: static PetscErrorCode MatMatSolve_SeqDense_TearDown(Mat A, Mat B, Mat X, PetscScalar **_y, PetscBLASInt *_ldy, PetscBLASInt *_m, PetscBLASInt *_nrhs, PetscBLASInt *_k)
697: {
698: PetscScalar *y;
699: PetscInt _ldx;
700: PetscBLASInt k, ldy, nrhs, ldx = 0;
702: y = *_y;
703: *_y = NULL;
704: k = *_k;
705: ldy = *_ldy;
706: nrhs = *_nrhs;
707: MatDenseGetLDA(X, &_ldx);
708: PetscBLASIntCast(_ldx, &ldx);
709: if (ldx != ldy) {
710: PetscScalar *xv;
711: MatDenseGetArray(X, &xv);
712: for (PetscInt j = 0; j < nrhs; j++) PetscArraycpy(&xv[j * ldx], &y[j * ldy], k);
713: MatDenseRestoreArray(X, &xv);
714: PetscFree(y);
715: } else {
716: MatDenseRestoreArray(X, &y);
717: }
718: return 0;
719: }
721: static PetscErrorCode MatMatSolve_SeqDense_LU(Mat A, Mat B, Mat X)
722: {
723: PetscScalar *y;
724: PetscBLASInt m, k, ldy, nrhs;
726: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
727: MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_FALSE);
728: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
729: return 0;
730: }
732: static PetscErrorCode MatMatSolveTranspose_SeqDense_LU(Mat A, Mat B, Mat X)
733: {
734: PetscScalar *y;
735: PetscBLASInt m, k, ldy, nrhs;
737: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
738: MatSolve_SeqDense_Internal_LU(A, y, ldy, m, nrhs, k, PETSC_TRUE);
739: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
740: return 0;
741: }
743: static PetscErrorCode MatMatSolve_SeqDense_Cholesky(Mat A, Mat B, Mat X)
744: {
745: PetscScalar *y;
746: PetscBLASInt m, k, ldy, nrhs;
748: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
749: MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_FALSE);
750: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
751: return 0;
752: }
754: static PetscErrorCode MatMatSolveTranspose_SeqDense_Cholesky(Mat A, Mat B, Mat X)
755: {
756: PetscScalar *y;
757: PetscBLASInt m, k, ldy, nrhs;
759: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
760: MatSolve_SeqDense_Internal_Cholesky(A, y, ldy, m, nrhs, k, PETSC_TRUE);
761: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
762: return 0;
763: }
765: static PetscErrorCode MatMatSolve_SeqDense_QR(Mat A, Mat B, Mat X)
766: {
767: PetscScalar *y;
768: PetscBLASInt m, k, ldy, nrhs;
770: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
771: MatSolve_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k);
772: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
773: return 0;
774: }
776: static PetscErrorCode MatMatSolveTranspose_SeqDense_QR(Mat A, Mat B, Mat X)
777: {
778: PetscScalar *y;
779: PetscBLASInt m, k, ldy, nrhs;
781: MatMatSolve_SeqDense_SetUp(A, B, X, &y, &ldy, &m, &nrhs, &k);
782: MatSolveTranspose_SeqDense_Internal_QR(A, y, ldy, m, nrhs, k);
783: MatMatSolve_SeqDense_TearDown(A, B, X, &y, &ldy, &m, &nrhs, &k);
784: return 0;
785: }
787: /* ---------------------------------------------------------------*/
788: /* COMMENT: I have chosen to hide row permutation in the pivots,
789: rather than put it in the Mat->row slot.*/
790: PetscErrorCode MatLUFactor_SeqDense(Mat A, IS row, IS col, const MatFactorInfo *minfo)
791: {
792: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
793: PetscBLASInt n, m, info;
795: PetscBLASIntCast(A->cmap->n, &n);
796: PetscBLASIntCast(A->rmap->n, &m);
797: if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
798: if (!A->rmap->n || !A->cmap->n) return 0;
799: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
800: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&m, &n, mat->v, &mat->lda, mat->pivots, &info));
801: PetscFPTrapPop();
806: A->ops->solve = MatSolve_SeqDense_LU;
807: A->ops->matsolve = MatMatSolve_SeqDense_LU;
808: A->ops->solvetranspose = MatSolveTranspose_SeqDense_LU;
809: A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_LU;
810: A->factortype = MAT_FACTOR_LU;
812: PetscFree(A->solvertype);
813: PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);
815: PetscLogFlops((2.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3);
816: return 0;
817: }
819: static PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
820: {
821: MatFactorInfo info;
823: MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
824: PetscUseTypeMethod(fact, lufactor, NULL, NULL, &info);
825: return 0;
826: }
828: PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
829: {
830: fact->preallocated = PETSC_TRUE;
831: fact->assembled = PETSC_TRUE;
832: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense;
833: return 0;
834: }
836: /* Cholesky as L*L^T or L*D*L^T and the symmetric/hermitian complex variants */
837: PetscErrorCode MatCholeskyFactor_SeqDense(Mat A, IS perm, const MatFactorInfo *factinfo)
838: {
839: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
840: PetscBLASInt info, n;
842: PetscBLASIntCast(A->cmap->n, &n);
843: if (!A->rmap->n || !A->cmap->n) return 0;
844: if (A->spd == PETSC_BOOL3_TRUE) {
845: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
846: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &n, mat->v, &mat->lda, &info));
847: PetscFPTrapPop();
848: #if defined(PETSC_USE_COMPLEX)
849: } else if (A->hermitian == PETSC_BOOL3_TRUE) {
850: if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
851: if (!mat->fwork) {
852: PetscScalar dummy;
854: mat->lfwork = -1;
855: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
856: PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
857: PetscFPTrapPop();
858: mat->lfwork = (PetscInt)PetscRealPart(dummy);
859: PetscMalloc1(mat->lfwork, &mat->fwork);
860: }
861: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
862: PetscCallBLAS("LAPACKhetrf", LAPACKhetrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
863: PetscFPTrapPop();
864: #endif
865: } else { /* symmetric case */
866: if (!mat->pivots) { PetscMalloc1(A->rmap->n, &mat->pivots); }
867: if (!mat->fwork) {
868: PetscScalar dummy;
870: mat->lfwork = -1;
871: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
872: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, &dummy, &mat->lfwork, &info));
873: PetscFPTrapPop();
874: mat->lfwork = (PetscInt)PetscRealPart(dummy);
875: PetscMalloc1(mat->lfwork, &mat->fwork);
876: }
877: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
878: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &n, mat->v, &mat->lda, mat->pivots, mat->fwork, &mat->lfwork, &info));
879: PetscFPTrapPop();
880: }
883: A->ops->solve = MatSolve_SeqDense_Cholesky;
884: A->ops->matsolve = MatMatSolve_SeqDense_Cholesky;
885: A->ops->solvetranspose = MatSolveTranspose_SeqDense_Cholesky;
886: A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_Cholesky;
887: A->factortype = MAT_FACTOR_CHOLESKY;
889: PetscFree(A->solvertype);
890: PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);
892: PetscLogFlops((1.0 * A->cmap->n * A->cmap->n * A->cmap->n) / 3.0);
893: return 0;
894: }
896: static PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
897: {
898: MatFactorInfo info;
900: info.fill = 1.0;
902: MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
903: PetscUseTypeMethod(fact, choleskyfactor, NULL, &info);
904: return 0;
905: }
907: PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
908: {
909: fact->assembled = PETSC_TRUE;
910: fact->preallocated = PETSC_TRUE;
911: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense;
912: return 0;
913: }
915: PetscErrorCode MatQRFactor_SeqDense(Mat A, IS col, const MatFactorInfo *minfo)
916: {
917: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
918: PetscBLASInt n, m, info, min, max;
920: PetscBLASIntCast(A->cmap->n, &n);
921: PetscBLASIntCast(A->rmap->n, &m);
922: max = PetscMax(m, n);
923: min = PetscMin(m, n);
924: if (!mat->tau) { PetscMalloc1(min, &mat->tau); }
925: if (!mat->pivots) { PetscMalloc1(n, &mat->pivots); }
926: if (!mat->qrrhs) MatCreateVecs(A, NULL, &(mat->qrrhs));
927: if (!A->rmap->n || !A->cmap->n) return 0;
928: if (!mat->fwork) {
929: PetscScalar dummy;
931: mat->lfwork = -1;
932: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
933: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, &dummy, &mat->lfwork, &info));
934: PetscFPTrapPop();
935: mat->lfwork = (PetscInt)PetscRealPart(dummy);
936: PetscMalloc1(mat->lfwork, &mat->fwork);
937: }
938: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
939: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&m, &n, mat->v, &mat->lda, mat->tau, mat->fwork, &mat->lfwork, &info));
940: PetscFPTrapPop();
942: // TODO: try to estimate rank or test for and use geqp3 for rank revealing QR. For now just say rank is min of m and n
943: mat->rank = min;
945: A->ops->solve = MatSolve_SeqDense_QR;
946: A->ops->matsolve = MatMatSolve_SeqDense_QR;
947: A->factortype = MAT_FACTOR_QR;
948: if (m == n) {
949: A->ops->solvetranspose = MatSolveTranspose_SeqDense_QR;
950: A->ops->matsolvetranspose = MatMatSolveTranspose_SeqDense_QR;
951: }
953: PetscFree(A->solvertype);
954: PetscStrallocpy(MATSOLVERPETSC, &A->solvertype);
956: PetscLogFlops(2.0 * min * min * (max - min / 3.0));
957: return 0;
958: }
960: static PetscErrorCode MatQRFactorNumeric_SeqDense(Mat fact, Mat A, const MatFactorInfo *info_dummy)
961: {
962: MatFactorInfo info;
964: info.fill = 1.0;
966: MatDuplicateNoCreate_SeqDense(fact, A, MAT_COPY_VALUES);
967: PetscUseMethod(fact, "MatQRFactor_C", (Mat, IS, const MatFactorInfo *), (fact, NULL, &info));
968: return 0;
969: }
971: PetscErrorCode MatQRFactorSymbolic_SeqDense(Mat fact, Mat A, IS row, const MatFactorInfo *info)
972: {
973: fact->assembled = PETSC_TRUE;
974: fact->preallocated = PETSC_TRUE;
975: PetscObjectComposeFunction((PetscObject)fact, "MatQRFactorNumeric_C", MatQRFactorNumeric_SeqDense);
976: return 0;
977: }
979: /* uses LAPACK */
980: PETSC_INTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A, MatFactorType ftype, Mat *fact)
981: {
982: MatCreate(PetscObjectComm((PetscObject)A), fact);
983: MatSetSizes(*fact, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n);
984: MatSetType(*fact, MATDENSE);
985: (*fact)->trivialsymbolic = PETSC_TRUE;
986: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
987: (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense;
988: (*fact)->ops->ilufactorsymbolic = MatLUFactorSymbolic_SeqDense;
989: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
990: (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense;
991: } else if (ftype == MAT_FACTOR_QR) {
992: PetscObjectComposeFunction((PetscObject)(*fact), "MatQRFactorSymbolic_C", MatQRFactorSymbolic_SeqDense);
993: }
994: (*fact)->factortype = ftype;
996: PetscFree((*fact)->solvertype);
997: PetscStrallocpy(MATSOLVERPETSC, &(*fact)->solvertype);
998: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_LU]);
999: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ILU]);
1000: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_CHOLESKY]);
1001: PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&(*fact)->preferredordering[MAT_FACTOR_ICC]);
1002: return 0;
1003: }
1005: /* ------------------------------------------------------------------*/
1006: static PetscErrorCode MatSOR_SeqDense(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal shift, PetscInt its, PetscInt lits, Vec xx)
1007: {
1008: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1009: PetscScalar *x, *v = mat->v, zero = 0.0, xt;
1010: const PetscScalar *b;
1011: PetscInt m = A->rmap->n, i;
1012: PetscBLASInt o = 1, bm = 0;
1014: #if defined(PETSC_HAVE_CUDA)
1016: #endif
1017: if (shift == -1) shift = 0.0; /* negative shift indicates do not error on zero diagonal; this code never zeros on zero diagonal */
1018: PetscBLASIntCast(m, &bm);
1019: if (flag & SOR_ZERO_INITIAL_GUESS) {
1020: /* this is a hack fix, should have another version without the second BLASdotu */
1021: VecSet(xx, zero);
1022: }
1023: VecGetArray(xx, &x);
1024: VecGetArrayRead(bb, &b);
1025: its = its * lits;
1027: while (its--) {
1028: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1029: for (i = 0; i < m; i++) {
1030: PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1031: x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
1032: }
1033: }
1034: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1035: for (i = m - 1; i >= 0; i--) {
1036: PetscCallBLAS("BLASdotu", xt = b[i] - BLASdotu_(&bm, v + i, &bm, x, &o));
1037: x[i] = (1. - omega) * x[i] + omega * (xt + v[i + i * m] * x[i]) / (v[i + i * m] + shift);
1038: }
1039: }
1040: }
1041: VecRestoreArrayRead(bb, &b);
1042: VecRestoreArray(xx, &x);
1043: return 0;
1044: }
1046: /* -----------------------------------------------------------------*/
1047: PetscErrorCode MatMultTranspose_SeqDense(Mat A, Vec xx, Vec yy)
1048: {
1049: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1050: const PetscScalar *v = mat->v, *x;
1051: PetscScalar *y;
1052: PetscBLASInt m, n, _One = 1;
1053: PetscScalar _DOne = 1.0, _DZero = 0.0;
1055: PetscBLASIntCast(A->rmap->n, &m);
1056: PetscBLASIntCast(A->cmap->n, &n);
1057: VecGetArrayRead(xx, &x);
1058: VecGetArrayWrite(yy, &y);
1059: if (!A->rmap->n || !A->cmap->n) {
1060: PetscBLASInt i;
1061: for (i = 0; i < n; i++) y[i] = 0.0;
1062: } else {
1063: PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &mat->lda, x, &_One, &_DZero, y, &_One));
1064: PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->cmap->n);
1065: }
1066: VecRestoreArrayRead(xx, &x);
1067: VecRestoreArrayWrite(yy, &y);
1068: return 0;
1069: }
1071: PetscErrorCode MatMult_SeqDense(Mat A, Vec xx, Vec yy)
1072: {
1073: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1074: PetscScalar *y, _DOne = 1.0, _DZero = 0.0;
1075: PetscBLASInt m, n, _One = 1;
1076: const PetscScalar *v = mat->v, *x;
1078: PetscBLASIntCast(A->rmap->n, &m);
1079: PetscBLASIntCast(A->cmap->n, &n);
1080: VecGetArrayRead(xx, &x);
1081: VecGetArrayWrite(yy, &y);
1082: if (!A->rmap->n || !A->cmap->n) {
1083: PetscBLASInt i;
1084: for (i = 0; i < m; i++) y[i] = 0.0;
1085: } else {
1086: PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DZero, y, &_One));
1087: PetscLogFlops(2.0 * A->rmap->n * A->cmap->n - A->rmap->n);
1088: }
1089: VecRestoreArrayRead(xx, &x);
1090: VecRestoreArrayWrite(yy, &y);
1091: return 0;
1092: }
1094: PetscErrorCode MatMultAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1095: {
1096: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1097: const PetscScalar *v = mat->v, *x;
1098: PetscScalar *y, _DOne = 1.0;
1099: PetscBLASInt m, n, _One = 1;
1101: PetscBLASIntCast(A->rmap->n, &m);
1102: PetscBLASIntCast(A->cmap->n, &n);
1103: VecCopy(zz, yy);
1104: if (!A->rmap->n || !A->cmap->n) return 0;
1105: VecGetArrayRead(xx, &x);
1106: VecGetArray(yy, &y);
1107: PetscCallBLAS("BLASgemv", BLASgemv_("N", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One));
1108: VecRestoreArrayRead(xx, &x);
1109: VecRestoreArray(yy, &y);
1110: PetscLogFlops(2.0 * A->rmap->n * A->cmap->n);
1111: return 0;
1112: }
1114: PetscErrorCode MatMultTransposeAdd_SeqDense(Mat A, Vec xx, Vec zz, Vec yy)
1115: {
1116: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1117: const PetscScalar *v = mat->v, *x;
1118: PetscScalar *y;
1119: PetscBLASInt m, n, _One = 1;
1120: PetscScalar _DOne = 1.0;
1122: PetscBLASIntCast(A->rmap->n, &m);
1123: PetscBLASIntCast(A->cmap->n, &n);
1124: VecCopy(zz, yy);
1125: if (!A->rmap->n || !A->cmap->n) return 0;
1126: VecGetArrayRead(xx, &x);
1127: VecGetArray(yy, &y);
1128: PetscCallBLAS("BLASgemv", BLASgemv_("T", &m, &n, &_DOne, v, &(mat->lda), x, &_One, &_DOne, y, &_One));
1129: VecRestoreArrayRead(xx, &x);
1130: VecRestoreArray(yy, &y);
1131: PetscLogFlops(2.0 * A->rmap->n * A->cmap->n);
1132: return 0;
1133: }
1135: /* -----------------------------------------------------------------*/
1136: static PetscErrorCode MatGetRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1137: {
1138: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1139: PetscInt i;
1141: *ncols = A->cmap->n;
1142: if (cols) {
1143: PetscMalloc1(A->cmap->n, cols);
1144: for (i = 0; i < A->cmap->n; i++) (*cols)[i] = i;
1145: }
1146: if (vals) {
1147: const PetscScalar *v;
1149: MatDenseGetArrayRead(A, &v);
1150: PetscMalloc1(A->cmap->n, vals);
1151: v += row;
1152: for (i = 0; i < A->cmap->n; i++) {
1153: (*vals)[i] = *v;
1154: v += mat->lda;
1155: }
1156: MatDenseRestoreArrayRead(A, &v);
1157: }
1158: return 0;
1159: }
1161: static PetscErrorCode MatRestoreRow_SeqDense(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **vals)
1162: {
1163: if (ncols) *ncols = 0;
1164: if (cols) PetscFree(*cols);
1165: if (vals) PetscFree(*vals);
1166: return 0;
1167: }
1168: /* ----------------------------------------------------------------*/
1169: static PetscErrorCode MatSetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], const PetscScalar v[], InsertMode addv)
1170: {
1171: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1172: PetscScalar *av;
1173: PetscInt i, j, idx = 0;
1174: #if defined(PETSC_HAVE_CUDA)
1175: PetscOffloadMask oldf;
1176: #endif
1178: MatDenseGetArray(A, &av);
1179: if (!mat->roworiented) {
1180: if (addv == INSERT_VALUES) {
1181: for (j = 0; j < n; j++) {
1182: if (indexn[j] < 0) {
1183: idx += m;
1184: continue;
1185: }
1187: for (i = 0; i < m; i++) {
1188: if (indexm[i] < 0) {
1189: idx++;
1190: continue;
1191: }
1193: av[indexn[j] * mat->lda + indexm[i]] = v[idx++];
1194: }
1195: }
1196: } else {
1197: for (j = 0; j < n; j++) {
1198: if (indexn[j] < 0) {
1199: idx += m;
1200: continue;
1201: }
1203: for (i = 0; i < m; i++) {
1204: if (indexm[i] < 0) {
1205: idx++;
1206: continue;
1207: }
1209: av[indexn[j] * mat->lda + indexm[i]] += v[idx++];
1210: }
1211: }
1212: }
1213: } else {
1214: if (addv == INSERT_VALUES) {
1215: for (i = 0; i < m; i++) {
1216: if (indexm[i] < 0) {
1217: idx += n;
1218: continue;
1219: }
1221: for (j = 0; j < n; j++) {
1222: if (indexn[j] < 0) {
1223: idx++;
1224: continue;
1225: }
1227: av[indexn[j] * mat->lda + indexm[i]] = v[idx++];
1228: }
1229: }
1230: } else {
1231: for (i = 0; i < m; i++) {
1232: if (indexm[i] < 0) {
1233: idx += n;
1234: continue;
1235: }
1237: for (j = 0; j < n; j++) {
1238: if (indexn[j] < 0) {
1239: idx++;
1240: continue;
1241: }
1243: av[indexn[j] * mat->lda + indexm[i]] += v[idx++];
1244: }
1245: }
1246: }
1247: }
1248: /* hack to prevent unneeded copy to the GPU while returning the array */
1249: #if defined(PETSC_HAVE_CUDA)
1250: oldf = A->offloadmask;
1251: A->offloadmask = PETSC_OFFLOAD_GPU;
1252: #endif
1253: MatDenseRestoreArray(A, &av);
1254: #if defined(PETSC_HAVE_CUDA)
1255: A->offloadmask = (oldf == PETSC_OFFLOAD_UNALLOCATED ? PETSC_OFFLOAD_UNALLOCATED : PETSC_OFFLOAD_CPU);
1256: #endif
1257: return 0;
1258: }
1260: static PetscErrorCode MatGetValues_SeqDense(Mat A, PetscInt m, const PetscInt indexm[], PetscInt n, const PetscInt indexn[], PetscScalar v[])
1261: {
1262: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1263: const PetscScalar *vv;
1264: PetscInt i, j;
1266: MatDenseGetArrayRead(A, &vv);
1267: /* row-oriented output */
1268: for (i = 0; i < m; i++) {
1269: if (indexm[i] < 0) {
1270: v += n;
1271: continue;
1272: }
1274: for (j = 0; j < n; j++) {
1275: if (indexn[j] < 0) {
1276: v++;
1277: continue;
1278: }
1280: *v++ = vv[indexn[j] * mat->lda + indexm[i]];
1281: }
1282: }
1283: MatDenseRestoreArrayRead(A, &vv);
1284: return 0;
1285: }
1287: /* -----------------------------------------------------------------*/
1289: PetscErrorCode MatView_Dense_Binary(Mat mat, PetscViewer viewer)
1290: {
1291: PetscBool skipHeader;
1292: PetscViewerFormat format;
1293: PetscInt header[4], M, N, m, lda, i, j, k;
1294: const PetscScalar *v;
1295: PetscScalar *vwork;
1297: PetscViewerSetUp(viewer);
1298: PetscViewerBinaryGetSkipHeader(viewer, &skipHeader);
1299: PetscViewerGetFormat(viewer, &format);
1300: if (skipHeader) format = PETSC_VIEWER_NATIVE;
1302: MatGetSize(mat, &M, &N);
1304: /* write matrix header */
1305: header[0] = MAT_FILE_CLASSID;
1306: header[1] = M;
1307: header[2] = N;
1308: header[3] = (format == PETSC_VIEWER_NATIVE) ? MATRIX_BINARY_FORMAT_DENSE : M * N;
1309: if (!skipHeader) PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT);
1311: MatGetLocalSize(mat, &m, NULL);
1312: if (format != PETSC_VIEWER_NATIVE) {
1313: PetscInt nnz = m * N, *iwork;
1314: /* store row lengths for each row */
1315: PetscMalloc1(nnz, &iwork);
1316: for (i = 0; i < m; i++) iwork[i] = N;
1317: PetscViewerBinaryWriteAll(viewer, iwork, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1318: /* store column indices (zero start index) */
1319: for (k = 0, i = 0; i < m; i++)
1320: for (j = 0; j < N; j++, k++) iwork[k] = j;
1321: PetscViewerBinaryWriteAll(viewer, iwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1322: PetscFree(iwork);
1323: }
1324: /* store matrix values as a dense matrix in row major order */
1325: PetscMalloc1(m * N, &vwork);
1326: MatDenseGetArrayRead(mat, &v);
1327: MatDenseGetLDA(mat, &lda);
1328: for (k = 0, i = 0; i < m; i++)
1329: for (j = 0; j < N; j++, k++) vwork[k] = v[i + lda * j];
1330: MatDenseRestoreArrayRead(mat, &v);
1331: PetscViewerBinaryWriteAll(viewer, vwork, m * N, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1332: PetscFree(vwork);
1333: return 0;
1334: }
1336: PetscErrorCode MatLoad_Dense_Binary(Mat mat, PetscViewer viewer)
1337: {
1338: PetscBool skipHeader;
1339: PetscInt header[4], M, N, m, nz, lda, i, j, k;
1340: PetscInt rows, cols;
1341: PetscScalar *v, *vwork;
1343: PetscViewerSetUp(viewer);
1344: PetscViewerBinaryGetSkipHeader(viewer, &skipHeader);
1346: if (!skipHeader) {
1347: PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT);
1349: M = header[1];
1350: N = header[2];
1353: nz = header[3];
1355: } else {
1356: MatGetSize(mat, &M, &N);
1358: nz = MATRIX_BINARY_FORMAT_DENSE;
1359: }
1361: /* setup global sizes if not set */
1362: if (mat->rmap->N < 0) mat->rmap->N = M;
1363: if (mat->cmap->N < 0) mat->cmap->N = N;
1364: MatSetUp(mat);
1365: /* check if global sizes are correct */
1366: MatGetSize(mat, &rows, &cols);
1369: MatGetSize(mat, NULL, &N);
1370: MatGetLocalSize(mat, &m, NULL);
1371: MatDenseGetArray(mat, &v);
1372: MatDenseGetLDA(mat, &lda);
1373: if (nz == MATRIX_BINARY_FORMAT_DENSE) { /* matrix in file is dense format */
1374: PetscInt nnz = m * N;
1375: /* read in matrix values */
1376: PetscMalloc1(nnz, &vwork);
1377: PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1378: /* store values in column major order */
1379: for (j = 0; j < N; j++)
1380: for (i = 0; i < m; i++) v[i + lda * j] = vwork[i * N + j];
1381: PetscFree(vwork);
1382: } else { /* matrix in file is sparse format */
1383: PetscInt nnz = 0, *rlens, *icols;
1384: /* read in row lengths */
1385: PetscMalloc1(m, &rlens);
1386: PetscViewerBinaryReadAll(viewer, rlens, m, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1387: for (i = 0; i < m; i++) nnz += rlens[i];
1388: /* read in column indices and values */
1389: PetscMalloc2(nnz, &icols, nnz, &vwork);
1390: PetscViewerBinaryReadAll(viewer, icols, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT);
1391: PetscViewerBinaryReadAll(viewer, vwork, nnz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR);
1392: /* store values in column major order */
1393: for (k = 0, i = 0; i < m; i++)
1394: for (j = 0; j < rlens[i]; j++, k++) v[i + lda * icols[k]] = vwork[k];
1395: PetscFree(rlens);
1396: PetscFree2(icols, vwork);
1397: }
1398: MatDenseRestoreArray(mat, &v);
1399: MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY);
1400: MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY);
1401: return 0;
1402: }
1404: PetscErrorCode MatLoad_SeqDense(Mat newMat, PetscViewer viewer)
1405: {
1406: PetscBool isbinary, ishdf5;
1410: /* force binary viewer to load .info file if it has not yet done so */
1411: PetscViewerSetUp(viewer);
1412: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1413: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5);
1414: if (isbinary) {
1415: MatLoad_Dense_Binary(newMat, viewer);
1416: } else if (ishdf5) {
1417: #if defined(PETSC_HAVE_HDF5)
1418: MatLoad_Dense_HDF5(newMat, viewer);
1419: #else
1420: SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
1421: #endif
1422: } else {
1423: SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1424: }
1425: return 0;
1426: }
1428: static PetscErrorCode MatView_SeqDense_ASCII(Mat A, PetscViewer viewer)
1429: {
1430: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1431: PetscInt i, j;
1432: const char *name;
1433: PetscScalar *v, *av;
1434: PetscViewerFormat format;
1435: #if defined(PETSC_USE_COMPLEX)
1436: PetscBool allreal = PETSC_TRUE;
1437: #endif
1439: MatDenseGetArrayRead(A, (const PetscScalar **)&av);
1440: PetscViewerGetFormat(viewer, &format);
1441: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442: return 0; /* do nothing for now */
1443: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1444: PetscViewerASCIIUseTabs(viewer, PETSC_FALSE);
1445: for (i = 0; i < A->rmap->n; i++) {
1446: v = av + i;
1447: PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i);
1448: for (j = 0; j < A->cmap->n; j++) {
1449: #if defined(PETSC_USE_COMPLEX)
1450: if (PetscRealPart(*v) != 0.0 && PetscImaginaryPart(*v) != 0.0) {
1451: PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", j, (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v));
1452: } else if (PetscRealPart(*v)) {
1453: PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)PetscRealPart(*v));
1454: }
1455: #else
1456: if (*v) PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", j, (double)*v);
1457: #endif
1458: v += a->lda;
1459: }
1460: PetscViewerASCIIPrintf(viewer, "\n");
1461: }
1462: PetscViewerASCIIUseTabs(viewer, PETSC_TRUE);
1463: } else {
1464: PetscViewerASCIIUseTabs(viewer, PETSC_FALSE);
1465: #if defined(PETSC_USE_COMPLEX)
1466: /* determine if matrix has all real values */
1467: for (j = 0; j < A->cmap->n; j++) {
1468: v = av + j * a->lda;
1469: for (i = 0; i < A->rmap->n; i++) {
1470: if (PetscImaginaryPart(v[i])) {
1471: allreal = PETSC_FALSE;
1472: break;
1473: }
1474: }
1475: }
1476: #endif
1477: if (format == PETSC_VIEWER_ASCII_MATLAB) {
1478: PetscObjectGetName((PetscObject)A, &name);
1479: PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", A->rmap->n, A->cmap->n);
1480: PetscViewerASCIIPrintf(viewer, "%s = zeros(%" PetscInt_FMT ",%" PetscInt_FMT ");\n", name, A->rmap->n, A->cmap->n);
1481: PetscViewerASCIIPrintf(viewer, "%s = [\n", name);
1482: }
1484: for (i = 0; i < A->rmap->n; i++) {
1485: v = av + i;
1486: for (j = 0; j < A->cmap->n; j++) {
1487: #if defined(PETSC_USE_COMPLEX)
1488: if (allreal) {
1489: PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(*v));
1490: } else {
1491: PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei ", (double)PetscRealPart(*v), (double)PetscImaginaryPart(*v));
1492: }
1493: #else
1494: PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)*v);
1495: #endif
1496: v += a->lda;
1497: }
1498: PetscViewerASCIIPrintf(viewer, "\n");
1499: }
1500: if (format == PETSC_VIEWER_ASCII_MATLAB) PetscViewerASCIIPrintf(viewer, "];\n");
1501: PetscViewerASCIIUseTabs(viewer, PETSC_TRUE);
1502: }
1503: MatDenseRestoreArrayRead(A, (const PetscScalar **)&av);
1504: PetscViewerFlush(viewer);
1505: return 0;
1506: }
1508: #include <petscdraw.h>
1509: static PetscErrorCode MatView_SeqDense_Draw_Zoom(PetscDraw draw, void *Aa)
1510: {
1511: Mat A = (Mat)Aa;
1512: PetscInt m = A->rmap->n, n = A->cmap->n, i, j;
1513: int color = PETSC_DRAW_WHITE;
1514: const PetscScalar *v;
1515: PetscViewer viewer;
1516: PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1517: PetscViewerFormat format;
1519: PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer);
1520: PetscViewerGetFormat(viewer, &format);
1521: PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr);
1523: /* Loop over matrix elements drawing boxes */
1524: MatDenseGetArrayRead(A, &v);
1525: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1526: PetscDrawCollectiveBegin(draw);
1527: /* Blue for negative and Red for positive */
1528: for (j = 0; j < n; j++) {
1529: x_l = j;
1530: x_r = x_l + 1.0;
1531: for (i = 0; i < m; i++) {
1532: y_l = m - i - 1.0;
1533: y_r = y_l + 1.0;
1534: if (PetscRealPart(v[j * m + i]) > 0.) color = PETSC_DRAW_RED;
1535: else if (PetscRealPart(v[j * m + i]) < 0.) color = PETSC_DRAW_BLUE;
1536: else continue;
1537: PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color);
1538: }
1539: }
1540: PetscDrawCollectiveEnd(draw);
1541: } else {
1542: /* use contour shading to indicate magnitude of values */
1543: /* first determine max of all nonzero values */
1544: PetscReal minv = 0.0, maxv = 0.0;
1545: PetscDraw popup;
1547: for (i = 0; i < m * n; i++) {
1548: if (PetscAbsScalar(v[i]) > maxv) maxv = PetscAbsScalar(v[i]);
1549: }
1550: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1551: PetscDrawGetPopup(draw, &popup);
1552: PetscDrawScalePopup(popup, minv, maxv);
1554: PetscDrawCollectiveBegin(draw);
1555: for (j = 0; j < n; j++) {
1556: x_l = j;
1557: x_r = x_l + 1.0;
1558: for (i = 0; i < m; i++) {
1559: y_l = m - i - 1.0;
1560: y_r = y_l + 1.0;
1561: color = PetscDrawRealToColor(PetscAbsScalar(v[j * m + i]), minv, maxv);
1562: PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color);
1563: }
1564: }
1565: PetscDrawCollectiveEnd(draw);
1566: }
1567: MatDenseRestoreArrayRead(A, &v);
1568: return 0;
1569: }
1571: static PetscErrorCode MatView_SeqDense_Draw(Mat A, PetscViewer viewer)
1572: {
1573: PetscDraw draw;
1574: PetscBool isnull;
1575: PetscReal xr, yr, xl, yl, h, w;
1577: PetscViewerDrawGetDraw(viewer, 0, &draw);
1578: PetscDrawIsNull(draw, &isnull);
1579: if (isnull) return 0;
1581: xr = A->cmap->n;
1582: yr = A->rmap->n;
1583: h = yr / 10.0;
1584: w = xr / 10.0;
1585: xr += w;
1586: yr += h;
1587: xl = -w;
1588: yl = -h;
1589: PetscDrawSetCoordinates(draw, xl, yl, xr, yr);
1590: PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer);
1591: PetscDrawZoom(draw, MatView_SeqDense_Draw_Zoom, A);
1592: PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL);
1593: PetscDrawSave(draw);
1594: return 0;
1595: }
1597: PetscErrorCode MatView_SeqDense(Mat A, PetscViewer viewer)
1598: {
1599: PetscBool iascii, isbinary, isdraw;
1601: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
1602: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary);
1603: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw);
1604: if (iascii) MatView_SeqDense_ASCII(A, viewer);
1605: else if (isbinary) MatView_Dense_Binary(A, viewer);
1606: else if (isdraw) MatView_SeqDense_Draw(A, viewer);
1607: return 0;
1608: }
1610: static PetscErrorCode MatDensePlaceArray_SeqDense(Mat A, const PetscScalar *array)
1611: {
1612: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1617: a->unplacedarray = a->v;
1618: a->unplaced_user_alloc = a->user_alloc;
1619: a->v = (PetscScalar *)array;
1620: a->user_alloc = PETSC_TRUE;
1621: #if defined(PETSC_HAVE_CUDA)
1622: A->offloadmask = PETSC_OFFLOAD_CPU;
1623: #endif
1624: return 0;
1625: }
1627: static PetscErrorCode MatDenseResetArray_SeqDense(Mat A)
1628: {
1629: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1633: a->v = a->unplacedarray;
1634: a->user_alloc = a->unplaced_user_alloc;
1635: a->unplacedarray = NULL;
1636: #if defined(PETSC_HAVE_CUDA)
1637: A->offloadmask = PETSC_OFFLOAD_CPU;
1638: #endif
1639: return 0;
1640: }
1642: static PetscErrorCode MatDenseReplaceArray_SeqDense(Mat A, const PetscScalar *array)
1643: {
1644: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
1648: if (!a->user_alloc) PetscFree(a->v);
1649: a->v = (PetscScalar *)array;
1650: a->user_alloc = PETSC_FALSE;
1651: #if defined(PETSC_HAVE_CUDA)
1652: A->offloadmask = PETSC_OFFLOAD_CPU;
1653: #endif
1654: return 0;
1655: }
1657: PetscErrorCode MatDestroy_SeqDense(Mat mat)
1658: {
1659: Mat_SeqDense *l = (Mat_SeqDense *)mat->data;
1661: #if defined(PETSC_USE_LOG)
1662: PetscLogObjectState((PetscObject)mat, "Rows %" PetscInt_FMT " Cols %" PetscInt_FMT, mat->rmap->n, mat->cmap->n);
1663: #endif
1664: VecDestroy(&(l->qrrhs));
1665: PetscFree(l->tau);
1666: PetscFree(l->pivots);
1667: PetscFree(l->fwork);
1668: MatDestroy(&l->ptapwork);
1669: if (!l->user_alloc) PetscFree(l->v);
1670: if (!l->unplaced_user_alloc) PetscFree(l->unplacedarray);
1673: VecDestroy(&l->cvec);
1674: MatDestroy(&l->cmat);
1675: PetscFree(mat->data);
1677: PetscObjectChangeTypeName((PetscObject)mat, NULL);
1678: PetscObjectComposeFunction((PetscObject)mat, "MatQRFactor_C", NULL);
1679: PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorSymbolic_C", NULL);
1680: PetscObjectComposeFunction((PetscObject)mat, "MatQRFactorNumeric_C", NULL);
1681: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL);
1682: PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL);
1683: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL);
1684: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL);
1685: PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL);
1686: PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL);
1687: PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL);
1688: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL);
1689: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL);
1690: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL);
1691: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL);
1692: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqaij_C", NULL);
1693: #if defined(PETSC_HAVE_ELEMENTAL)
1694: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_elemental_C", NULL);
1695: #endif
1696: #if defined(PETSC_HAVE_SCALAPACK)
1697: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_scalapack_C", NULL);
1698: #endif
1699: #if defined(PETSC_HAVE_CUDA)
1700: PetscObjectComposeFunction((PetscObject)mat, "MatConvert_seqdense_seqdensecuda_C", NULL);
1701: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", NULL);
1702: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdensecuda_seqdense_C", NULL);
1703: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdensecuda_C", NULL);
1704: #endif
1705: PetscObjectComposeFunction((PetscObject)mat, "MatSeqDenseSetPreallocation_C", NULL);
1706: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqaij_seqdense_C", NULL);
1707: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqdense_seqdense_C", NULL);
1708: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqbaij_seqdense_C", NULL);
1709: PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_seqsbaij_seqdense_C", NULL);
1711: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL);
1712: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL);
1713: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL);
1714: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL);
1715: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL);
1716: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL);
1717: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL);
1718: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL);
1719: PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL);
1720: PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL);
1721: return 0;
1722: }
1724: static PetscErrorCode MatTranspose_SeqDense(Mat A, MatReuse reuse, Mat *matout)
1725: {
1726: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1727: PetscInt k, j, m = A->rmap->n, M = mat->lda, n = A->cmap->n;
1728: PetscScalar *v, tmp;
1730: if (reuse == MAT_REUSE_MATRIX) MatTransposeCheckNonzeroState_Private(A, *matout);
1731: if (reuse == MAT_INPLACE_MATRIX) {
1732: if (m == n) { /* in place transpose */
1733: MatDenseGetArray(A, &v);
1734: for (j = 0; j < m; j++) {
1735: for (k = 0; k < j; k++) {
1736: tmp = v[j + k * M];
1737: v[j + k * M] = v[k + j * M];
1738: v[k + j * M] = tmp;
1739: }
1740: }
1741: MatDenseRestoreArray(A, &v);
1742: } else { /* reuse memory, temporary allocates new memory */
1743: PetscScalar *v2;
1744: PetscLayout tmplayout;
1746: PetscMalloc1((size_t)m * n, &v2);
1747: MatDenseGetArray(A, &v);
1748: for (j = 0; j < n; j++) {
1749: for (k = 0; k < m; k++) v2[j + (size_t)k * n] = v[k + (size_t)j * M];
1750: }
1751: PetscArraycpy(v, v2, (size_t)m * n);
1752: PetscFree(v2);
1753: MatDenseRestoreArray(A, &v);
1754: /* cleanup size dependent quantities */
1755: VecDestroy(&mat->cvec);
1756: MatDestroy(&mat->cmat);
1757: PetscFree(mat->pivots);
1758: PetscFree(mat->fwork);
1759: MatDestroy(&mat->ptapwork);
1760: /* swap row/col layouts */
1761: mat->lda = n;
1762: tmplayout = A->rmap;
1763: A->rmap = A->cmap;
1764: A->cmap = tmplayout;
1765: }
1766: } else { /* out-of-place transpose */
1767: Mat tmat;
1768: Mat_SeqDense *tmatd;
1769: PetscScalar *v2;
1770: PetscInt M2;
1772: if (reuse == MAT_INITIAL_MATRIX) {
1773: MatCreate(PetscObjectComm((PetscObject)A), &tmat);
1774: MatSetSizes(tmat, A->cmap->n, A->rmap->n, A->cmap->n, A->rmap->n);
1775: MatSetType(tmat, ((PetscObject)A)->type_name);
1776: MatSeqDenseSetPreallocation(tmat, NULL);
1777: } else tmat = *matout;
1779: MatDenseGetArrayRead(A, (const PetscScalar **)&v);
1780: MatDenseGetArray(tmat, &v2);
1781: tmatd = (Mat_SeqDense *)tmat->data;
1782: M2 = tmatd->lda;
1783: for (j = 0; j < n; j++) {
1784: for (k = 0; k < m; k++) v2[j + k * M2] = v[k + j * M];
1785: }
1786: MatDenseRestoreArray(tmat, &v2);
1787: MatDenseRestoreArrayRead(A, (const PetscScalar **)&v);
1788: MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY);
1789: MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY);
1790: *matout = tmat;
1791: }
1792: return 0;
1793: }
1795: static PetscErrorCode MatEqual_SeqDense(Mat A1, Mat A2, PetscBool *flg)
1796: {
1797: Mat_SeqDense *mat1 = (Mat_SeqDense *)A1->data;
1798: Mat_SeqDense *mat2 = (Mat_SeqDense *)A2->data;
1799: PetscInt i;
1800: const PetscScalar *v1, *v2;
1802: if (A1->rmap->n != A2->rmap->n) {
1803: *flg = PETSC_FALSE;
1804: return 0;
1805: }
1806: if (A1->cmap->n != A2->cmap->n) {
1807: *flg = PETSC_FALSE;
1808: return 0;
1809: }
1810: MatDenseGetArrayRead(A1, &v1);
1811: MatDenseGetArrayRead(A2, &v2);
1812: for (i = 0; i < A1->cmap->n; i++) {
1813: PetscArraycmp(v1, v2, A1->rmap->n, flg);
1814: if (*flg == PETSC_FALSE) return 0;
1815: v1 += mat1->lda;
1816: v2 += mat2->lda;
1817: }
1818: MatDenseRestoreArrayRead(A1, &v1);
1819: MatDenseRestoreArrayRead(A2, &v2);
1820: *flg = PETSC_TRUE;
1821: return 0;
1822: }
1824: static PetscErrorCode MatGetDiagonal_SeqDense(Mat A, Vec v)
1825: {
1826: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1827: PetscInt i, n, len;
1828: PetscScalar *x;
1829: const PetscScalar *vv;
1831: VecGetSize(v, &n);
1832: VecGetArray(v, &x);
1833: len = PetscMin(A->rmap->n, A->cmap->n);
1834: MatDenseGetArrayRead(A, &vv);
1836: for (i = 0; i < len; i++) x[i] = vv[i * mat->lda + i];
1837: MatDenseRestoreArrayRead(A, &vv);
1838: VecRestoreArray(v, &x);
1839: return 0;
1840: }
1842: static PetscErrorCode MatDiagonalScale_SeqDense(Mat A, Vec ll, Vec rr)
1843: {
1844: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1845: const PetscScalar *l, *r;
1846: PetscScalar x, *v, *vv;
1847: PetscInt i, j, m = A->rmap->n, n = A->cmap->n;
1849: MatDenseGetArray(A, &vv);
1850: if (ll) {
1851: VecGetSize(ll, &m);
1852: VecGetArrayRead(ll, &l);
1854: for (i = 0; i < m; i++) {
1855: x = l[i];
1856: v = vv + i;
1857: for (j = 0; j < n; j++) {
1858: (*v) *= x;
1859: v += mat->lda;
1860: }
1861: }
1862: VecRestoreArrayRead(ll, &l);
1863: PetscLogFlops(1.0 * n * m);
1864: }
1865: if (rr) {
1866: VecGetSize(rr, &n);
1867: VecGetArrayRead(rr, &r);
1869: for (i = 0; i < n; i++) {
1870: x = r[i];
1871: v = vv + i * mat->lda;
1872: for (j = 0; j < m; j++) (*v++) *= x;
1873: }
1874: VecRestoreArrayRead(rr, &r);
1875: PetscLogFlops(1.0 * n * m);
1876: }
1877: MatDenseRestoreArray(A, &vv);
1878: return 0;
1879: }
1881: PetscErrorCode MatNorm_SeqDense(Mat A, NormType type, PetscReal *nrm)
1882: {
1883: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
1884: PetscScalar *v, *vv;
1885: PetscReal sum = 0.0;
1886: PetscInt lda, m = A->rmap->n, i, j;
1888: MatDenseGetArrayRead(A, (const PetscScalar **)&vv);
1889: MatDenseGetLDA(A, &lda);
1890: v = vv;
1891: if (type == NORM_FROBENIUS) {
1892: if (lda > m) {
1893: for (j = 0; j < A->cmap->n; j++) {
1894: v = vv + j * lda;
1895: for (i = 0; i < m; i++) {
1896: sum += PetscRealPart(PetscConj(*v) * (*v));
1897: v++;
1898: }
1899: }
1900: } else {
1901: #if defined(PETSC_USE_REAL___FP16)
1902: PetscBLASInt one = 1, cnt = A->cmap->n * A->rmap->n;
1903: PetscCallBLAS("BLASnrm2", *nrm = BLASnrm2_(&cnt, v, &one));
1904: }
1905: #else
1906: for (i = 0; i < A->cmap->n * A->rmap->n; i++) {
1907: sum += PetscRealPart(PetscConj(*v) * (*v));
1908: v++;
1909: }
1910: }
1911: *nrm = PetscSqrtReal(sum);
1912: #endif
1913: PetscLogFlops(2.0 * A->cmap->n * A->rmap->n);
1914: } else if (type == NORM_1) {
1915: *nrm = 0.0;
1916: for (j = 0; j < A->cmap->n; j++) {
1917: v = vv + j * mat->lda;
1918: sum = 0.0;
1919: for (i = 0; i < A->rmap->n; i++) {
1920: sum += PetscAbsScalar(*v);
1921: v++;
1922: }
1923: if (sum > *nrm) *nrm = sum;
1924: }
1925: PetscLogFlops(1.0 * A->cmap->n * A->rmap->n);
1926: } else if (type == NORM_INFINITY) {
1927: *nrm = 0.0;
1928: for (j = 0; j < A->rmap->n; j++) {
1929: v = vv + j;
1930: sum = 0.0;
1931: for (i = 0; i < A->cmap->n; i++) {
1932: sum += PetscAbsScalar(*v);
1933: v += mat->lda;
1934: }
1935: if (sum > *nrm) *nrm = sum;
1936: }
1937: PetscLogFlops(1.0 * A->cmap->n * A->rmap->n);
1938: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No two norm");
1939: MatDenseRestoreArrayRead(A, (const PetscScalar **)&vv);
1940: return 0;
1941: }
1943: static PetscErrorCode MatSetOption_SeqDense(Mat A, MatOption op, PetscBool flg)
1944: {
1945: Mat_SeqDense *aij = (Mat_SeqDense *)A->data;
1947: switch (op) {
1948: case MAT_ROW_ORIENTED:
1949: aij->roworiented = flg;
1950: break;
1951: case MAT_NEW_NONZERO_LOCATIONS:
1952: case MAT_NEW_NONZERO_LOCATION_ERR:
1953: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1954: case MAT_FORCE_DIAGONAL_ENTRIES:
1955: case MAT_KEEP_NONZERO_PATTERN:
1956: case MAT_IGNORE_OFF_PROC_ENTRIES:
1957: case MAT_USE_HASH_TABLE:
1958: case MAT_IGNORE_ZERO_ENTRIES:
1959: case MAT_IGNORE_LOWER_TRIANGULAR:
1960: case MAT_SORTED_FULL:
1961: PetscInfo(A, "Option %s ignored\n", MatOptions[op]);
1962: break;
1963: case MAT_SPD:
1964: case MAT_SYMMETRIC:
1965: case MAT_STRUCTURALLY_SYMMETRIC:
1966: case MAT_HERMITIAN:
1967: case MAT_SYMMETRY_ETERNAL:
1968: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1969: case MAT_SPD_ETERNAL:
1970: break;
1971: default:
1972: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
1973: }
1974: return 0;
1975: }
1977: PetscErrorCode MatZeroEntries_SeqDense(Mat A)
1978: {
1979: Mat_SeqDense *l = (Mat_SeqDense *)A->data;
1980: PetscInt lda = l->lda, m = A->rmap->n, n = A->cmap->n, j;
1981: PetscScalar *v;
1983: MatDenseGetArrayWrite(A, &v);
1984: if (lda > m) {
1985: for (j = 0; j < n; j++) PetscArrayzero(v + j * lda, m);
1986: } else {
1987: PetscArrayzero(v, PetscInt64Mult(m, n));
1988: }
1989: MatDenseRestoreArrayWrite(A, &v);
1990: return 0;
1991: }
1993: static PetscErrorCode MatZeroRows_SeqDense(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1994: {
1995: Mat_SeqDense *l = (Mat_SeqDense *)A->data;
1996: PetscInt m = l->lda, n = A->cmap->n, i, j;
1997: PetscScalar *slot, *bb, *v;
1998: const PetscScalar *xx;
2000: if (PetscDefined(USE_DEBUG)) {
2001: for (i = 0; i < N; i++) {
2004: }
2005: }
2006: if (!N) return 0;
2008: /* fix right hand side if needed */
2009: if (x && b) {
2010: VecGetArrayRead(x, &xx);
2011: VecGetArray(b, &bb);
2012: for (i = 0; i < N; i++) bb[rows[i]] = diag * xx[rows[i]];
2013: VecRestoreArrayRead(x, &xx);
2014: VecRestoreArray(b, &bb);
2015: }
2017: MatDenseGetArray(A, &v);
2018: for (i = 0; i < N; i++) {
2019: slot = v + rows[i];
2020: for (j = 0; j < n; j++) {
2021: *slot = 0.0;
2022: slot += m;
2023: }
2024: }
2025: if (diag != 0.0) {
2027: for (i = 0; i < N; i++) {
2028: slot = v + (m + 1) * rows[i];
2029: *slot = diag;
2030: }
2031: }
2032: MatDenseRestoreArray(A, &v);
2033: return 0;
2034: }
2036: static PetscErrorCode MatDenseGetLDA_SeqDense(Mat A, PetscInt *lda)
2037: {
2038: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2040: *lda = mat->lda;
2041: return 0;
2042: }
2044: PetscErrorCode MatDenseGetArray_SeqDense(Mat A, PetscScalar **array)
2045: {
2046: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2049: *array = mat->v;
2050: return 0;
2051: }
2053: PetscErrorCode MatDenseRestoreArray_SeqDense(Mat A, PetscScalar **array)
2054: {
2055: if (array) *array = NULL;
2056: return 0;
2057: }
2059: /*@
2060: MatDenseGetLDA - gets the leading dimension of the array returned from `MatDenseGetArray()`
2062: Not collective
2064: Input Parameter:
2065: . mat - a `MATDENSE` or `MATDENSECUDA` matrix
2067: Output Parameter:
2068: . lda - the leading dimension
2070: Level: intermediate
2072: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseSetLDA()`
2073: @*/
2074: PetscErrorCode MatDenseGetLDA(Mat A, PetscInt *lda)
2075: {
2078: MatCheckPreallocated(A, 1);
2079: PetscUseMethod(A, "MatDenseGetLDA_C", (Mat, PetscInt *), (A, lda));
2080: return 0;
2081: }
2083: /*@
2084: MatDenseSetLDA - Sets the leading dimension of the array used by the `MATDENSE` matrix
2086: Not collective
2088: Input Parameters:
2089: + mat - a `MATDENSE` or `MATDENSECUDA` matrix
2090: - lda - the leading dimension
2092: Level: intermediate
2094: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetLDA()`
2095: @*/
2096: PetscErrorCode MatDenseSetLDA(Mat A, PetscInt lda)
2097: {
2099: PetscTryMethod(A, "MatDenseSetLDA_C", (Mat, PetscInt), (A, lda));
2100: return 0;
2101: }
2103: /*@C
2104: MatDenseGetArray - gives read-write access to the array where the data for a `MATDENSE` matrix is stored
2106: Logically Collective on A
2108: Input Parameter:
2109: . mat - a dense matrix
2111: Output Parameter:
2112: . array - pointer to the data
2114: Level: intermediate
2116: .seealso: `MATDENSE`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2117: @*/
2118: PetscErrorCode MatDenseGetArray(Mat A, PetscScalar **array)
2119: {
2122: PetscUseMethod(A, "MatDenseGetArray_C", (Mat, PetscScalar **), (A, array));
2123: return 0;
2124: }
2126: /*@C
2127: MatDenseRestoreArray - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArray()`
2129: Logically Collective on A
2131: Input Parameters:
2132: + mat - a dense matrix
2133: - array - pointer to the data (may be NULL)
2135: Level: intermediate
2137: .seealso: `MATDENSE`, `MatDenseGetArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2138: @*/
2139: PetscErrorCode MatDenseRestoreArray(Mat A, PetscScalar **array)
2140: {
2143: PetscUseMethod(A, "MatDenseRestoreArray_C", (Mat, PetscScalar **), (A, array));
2144: PetscObjectStateIncrease((PetscObject)A);
2145: #if defined(PETSC_HAVE_CUDA)
2146: A->offloadmask = PETSC_OFFLOAD_CPU;
2147: #endif
2148: return 0;
2149: }
2151: /*@C
2152: MatDenseGetArrayRead - gives read-only access to the array where the data for a `MATDENSE` matrix is stored
2154: Not Collective
2156: Input Parameter:
2157: . mat - a dense matrix
2159: Output Parameter:
2160: . array - pointer to the data
2162: Level: intermediate
2164: .seealso: `MATDENSE`, `MatDenseRestoreArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2165: @*/
2166: PetscErrorCode MatDenseGetArrayRead(Mat A, const PetscScalar **array)
2167: {
2170: PetscUseMethod(A, "MatDenseGetArrayRead_C", (Mat, const PetscScalar **), (A, array));
2171: return 0;
2172: }
2174: /*@C
2175: MatDenseRestoreArrayRead - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayRead()`
2177: Not Collective
2179: Input Parameters:
2180: + mat - a dense matrix
2181: - array - pointer to the data (may be NULL)
2183: Level: intermediate
2185: .seealso: `MATDENSE`, `MatDenseGetArrayRead()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayWrite()`, `MatDenseRestoreArrayWrite()`
2186: @*/
2187: PetscErrorCode MatDenseRestoreArrayRead(Mat A, const PetscScalar **array)
2188: {
2191: PetscUseMethod(A, "MatDenseRestoreArrayRead_C", (Mat, const PetscScalar **), (A, array));
2192: return 0;
2193: }
2195: /*@C
2196: MatDenseGetArrayWrite - gives write-only access to the array where the data for a `MATDENSE` matrix is stored
2198: Not Collective
2200: Input Parameter:
2201: . mat - a dense matrix
2203: Output Parameter:
2204: . array - pointer to the data
2206: Level: intermediate
2208: .seealso: `MATDENSE`, `MatDenseRestoreArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2209: @*/
2210: PetscErrorCode MatDenseGetArrayWrite(Mat A, PetscScalar **array)
2211: {
2214: PetscUseMethod(A, "MatDenseGetArrayWrite_C", (Mat, PetscScalar **), (A, array));
2215: return 0;
2216: }
2218: /*@C
2219: MatDenseRestoreArrayWrite - returns access to the array where the data for a `MATDENSE` matrix is stored obtained by `MatDenseGetArrayWrite()`
2221: Not Collective
2223: Input Parameters:
2224: + mat - a dense matrix
2225: - array - pointer to the data (may be NULL)
2227: Level: intermediate
2229: .seealso: `MATDENSE`, `MatDenseGetArrayWrite()`, `MatDenseGetArray()`, `MatDenseRestoreArray()`, `MatDenseGetArrayRead()`, `MatDenseRestoreArrayRead()`
2230: @*/
2231: PetscErrorCode MatDenseRestoreArrayWrite(Mat A, PetscScalar **array)
2232: {
2235: PetscUseMethod(A, "MatDenseRestoreArrayWrite_C", (Mat, PetscScalar **), (A, array));
2236: PetscObjectStateIncrease((PetscObject)A);
2237: #if defined(PETSC_HAVE_CUDA)
2238: A->offloadmask = PETSC_OFFLOAD_CPU;
2239: #endif
2240: return 0;
2241: }
2243: static PetscErrorCode MatCreateSubMatrix_SeqDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
2244: {
2245: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2246: PetscInt i, j, nrows, ncols, ldb;
2247: const PetscInt *irow, *icol;
2248: PetscScalar *av, *bv, *v = mat->v;
2249: Mat newmat;
2251: ISGetIndices(isrow, &irow);
2252: ISGetIndices(iscol, &icol);
2253: ISGetLocalSize(isrow, &nrows);
2254: ISGetLocalSize(iscol, &ncols);
2256: /* Check submatrixcall */
2257: if (scall == MAT_REUSE_MATRIX) {
2258: PetscInt n_cols, n_rows;
2259: MatGetSize(*B, &n_rows, &n_cols);
2260: if (n_rows != nrows || n_cols != ncols) {
2261: /* resize the result matrix to match number of requested rows/columns */
2262: MatSetSizes(*B, nrows, ncols, nrows, ncols);
2263: }
2264: newmat = *B;
2265: } else {
2266: /* Create and fill new matrix */
2267: MatCreate(PetscObjectComm((PetscObject)A), &newmat);
2268: MatSetSizes(newmat, nrows, ncols, nrows, ncols);
2269: MatSetType(newmat, ((PetscObject)A)->type_name);
2270: MatSeqDenseSetPreallocation(newmat, NULL);
2271: }
2273: /* Now extract the data pointers and do the copy,column at a time */
2274: MatDenseGetArray(newmat, &bv);
2275: MatDenseGetLDA(newmat, &ldb);
2276: for (i = 0; i < ncols; i++) {
2277: av = v + mat->lda * icol[i];
2278: for (j = 0; j < nrows; j++) bv[j] = av[irow[j]];
2279: bv += ldb;
2280: }
2281: MatDenseRestoreArray(newmat, &bv);
2283: /* Assemble the matrices so that the correct flags are set */
2284: MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY);
2285: MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY);
2287: /* Free work space */
2288: ISRestoreIndices(isrow, &irow);
2289: ISRestoreIndices(iscol, &icol);
2290: *B = newmat;
2291: return 0;
2292: }
2294: static PetscErrorCode MatCreateSubMatrices_SeqDense(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
2295: {
2296: PetscInt i;
2298: if (scall == MAT_INITIAL_MATRIX) PetscCalloc1(n, B);
2300: for (i = 0; i < n; i++) MatCreateSubMatrix_SeqDense(A, irow[i], icol[i], scall, &(*B)[i]);
2301: return 0;
2302: }
2304: static PetscErrorCode MatAssemblyBegin_SeqDense(Mat mat, MatAssemblyType mode)
2305: {
2306: return 0;
2307: }
2309: static PetscErrorCode MatAssemblyEnd_SeqDense(Mat mat, MatAssemblyType mode)
2310: {
2311: return 0;
2312: }
2314: PetscErrorCode MatCopy_SeqDense(Mat A, Mat B, MatStructure str)
2315: {
2316: Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data;
2317: const PetscScalar *va;
2318: PetscScalar *vb;
2319: PetscInt lda1 = a->lda, lda2 = b->lda, m = A->rmap->n, n = A->cmap->n, j;
2321: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2322: if (A->ops->copy != B->ops->copy) {
2323: MatCopy_Basic(A, B, str);
2324: return 0;
2325: }
2327: MatDenseGetArrayRead(A, &va);
2328: MatDenseGetArray(B, &vb);
2329: if (lda1 > m || lda2 > m) {
2330: for (j = 0; j < n; j++) PetscArraycpy(vb + j * lda2, va + j * lda1, m);
2331: } else {
2332: PetscArraycpy(vb, va, A->rmap->n * A->cmap->n);
2333: }
2334: MatDenseRestoreArray(B, &vb);
2335: MatDenseRestoreArrayRead(A, &va);
2336: MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY);
2337: MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY);
2338: return 0;
2339: }
2341: PetscErrorCode MatSetUp_SeqDense(Mat A)
2342: {
2343: PetscLayoutSetUp(A->rmap);
2344: PetscLayoutSetUp(A->cmap);
2345: if (!A->preallocated) MatSeqDenseSetPreallocation(A, NULL);
2346: return 0;
2347: }
2349: static PetscErrorCode MatConjugate_SeqDense(Mat A)
2350: {
2351: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2352: PetscInt i, j;
2353: PetscInt min = PetscMin(A->rmap->n, A->cmap->n);
2354: PetscScalar *aa;
2356: MatDenseGetArray(A, &aa);
2357: for (j = 0; j < A->cmap->n; j++) {
2358: for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscConj(aa[i + j * mat->lda]);
2359: }
2360: MatDenseRestoreArray(A, &aa);
2361: if (mat->tau)
2362: for (i = 0; i < min; i++) mat->tau[i] = PetscConj(mat->tau[i]);
2363: return 0;
2364: }
2366: static PetscErrorCode MatRealPart_SeqDense(Mat A)
2367: {
2368: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2369: PetscInt i, j;
2370: PetscScalar *aa;
2372: MatDenseGetArray(A, &aa);
2373: for (j = 0; j < A->cmap->n; j++) {
2374: for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscRealPart(aa[i + j * mat->lda]);
2375: }
2376: MatDenseRestoreArray(A, &aa);
2377: return 0;
2378: }
2380: static PetscErrorCode MatImaginaryPart_SeqDense(Mat A)
2381: {
2382: Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
2383: PetscInt i, j;
2384: PetscScalar *aa;
2386: MatDenseGetArray(A, &aa);
2387: for (j = 0; j < A->cmap->n; j++) {
2388: for (i = 0; i < A->rmap->n; i++) aa[i + j * mat->lda] = PetscImaginaryPart(aa[i + j * mat->lda]);
2389: }
2390: MatDenseRestoreArray(A, &aa);
2391: return 0;
2392: }
2394: /* ----------------------------------------------------------------*/
2395: PetscErrorCode MatMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2396: {
2397: PetscInt m = A->rmap->n, n = B->cmap->n;
2398: PetscBool cisdense;
2400: MatSetSizes(C, m, n, m, n);
2401: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2402: if (!cisdense) {
2403: PetscBool flg;
2405: PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2406: MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2407: }
2408: MatSetUp(C);
2409: return 0;
2410: }
2412: PetscErrorCode MatMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2413: {
2414: Mat_SeqDense *a = (Mat_SeqDense *)A->data, *b = (Mat_SeqDense *)B->data, *c = (Mat_SeqDense *)C->data;
2415: PetscBLASInt m, n, k;
2416: const PetscScalar *av, *bv;
2417: PetscScalar *cv;
2418: PetscScalar _DOne = 1.0, _DZero = 0.0;
2420: PetscBLASIntCast(C->rmap->n, &m);
2421: PetscBLASIntCast(C->cmap->n, &n);
2422: PetscBLASIntCast(A->cmap->n, &k);
2423: if (!m || !n || !k) return 0;
2424: MatDenseGetArrayRead(A, &av);
2425: MatDenseGetArrayRead(B, &bv);
2426: MatDenseGetArrayWrite(C, &cv);
2427: PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2428: PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2429: MatDenseRestoreArrayRead(A, &av);
2430: MatDenseRestoreArrayRead(B, &bv);
2431: MatDenseRestoreArrayWrite(C, &cv);
2432: return 0;
2433: }
2435: PetscErrorCode MatMatTransposeMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2436: {
2437: PetscInt m = A->rmap->n, n = B->rmap->n;
2438: PetscBool cisdense;
2440: MatSetSizes(C, m, n, m, n);
2441: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2442: if (!cisdense) {
2443: PetscBool flg;
2445: PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2446: MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2447: }
2448: MatSetUp(C);
2449: return 0;
2450: }
2452: PetscErrorCode MatMatTransposeMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2453: {
2454: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2455: Mat_SeqDense *b = (Mat_SeqDense *)B->data;
2456: Mat_SeqDense *c = (Mat_SeqDense *)C->data;
2457: const PetscScalar *av, *bv;
2458: PetscScalar *cv;
2459: PetscBLASInt m, n, k;
2460: PetscScalar _DOne = 1.0, _DZero = 0.0;
2462: PetscBLASIntCast(C->rmap->n, &m);
2463: PetscBLASIntCast(C->cmap->n, &n);
2464: PetscBLASIntCast(A->cmap->n, &k);
2465: if (!m || !n || !k) return 0;
2466: MatDenseGetArrayRead(A, &av);
2467: MatDenseGetArrayRead(B, &bv);
2468: MatDenseGetArrayWrite(C, &cv);
2469: PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2470: MatDenseRestoreArrayRead(A, &av);
2471: MatDenseRestoreArrayRead(B, &bv);
2472: MatDenseRestoreArrayWrite(C, &cv);
2473: PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2474: return 0;
2475: }
2477: PetscErrorCode MatTransposeMatMultSymbolic_SeqDense_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
2478: {
2479: PetscInt m = A->cmap->n, n = B->cmap->n;
2480: PetscBool cisdense;
2482: MatSetSizes(C, m, n, m, n);
2483: PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATSEQDENSECUDA, "");
2484: if (!cisdense) {
2485: PetscBool flg;
2487: PetscObjectTypeCompare((PetscObject)B, ((PetscObject)A)->type_name, &flg);
2488: MatSetType(C, flg ? ((PetscObject)A)->type_name : MATDENSE);
2489: }
2490: MatSetUp(C);
2491: return 0;
2492: }
2494: PetscErrorCode MatTransposeMatMultNumeric_SeqDense_SeqDense(Mat A, Mat B, Mat C)
2495: {
2496: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2497: Mat_SeqDense *b = (Mat_SeqDense *)B->data;
2498: Mat_SeqDense *c = (Mat_SeqDense *)C->data;
2499: const PetscScalar *av, *bv;
2500: PetscScalar *cv;
2501: PetscBLASInt m, n, k;
2502: PetscScalar _DOne = 1.0, _DZero = 0.0;
2504: PetscBLASIntCast(C->rmap->n, &m);
2505: PetscBLASIntCast(C->cmap->n, &n);
2506: PetscBLASIntCast(A->rmap->n, &k);
2507: if (!m || !n || !k) return 0;
2508: MatDenseGetArrayRead(A, &av);
2509: MatDenseGetArrayRead(B, &bv);
2510: MatDenseGetArrayWrite(C, &cv);
2511: PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &m, &n, &k, &_DOne, av, &a->lda, bv, &b->lda, &_DZero, cv, &c->lda));
2512: MatDenseRestoreArrayRead(A, &av);
2513: MatDenseRestoreArrayRead(B, &bv);
2514: MatDenseRestoreArrayWrite(C, &cv);
2515: PetscLogFlops(1.0 * m * n * k + 1.0 * m * n * (k - 1));
2516: return 0;
2517: }
2519: /* ----------------------------------------------- */
2520: static PetscErrorCode MatProductSetFromOptions_SeqDense_AB(Mat C)
2521: {
2522: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqDense;
2523: C->ops->productsymbolic = MatProductSymbolic_AB;
2524: return 0;
2525: }
2527: static PetscErrorCode MatProductSetFromOptions_SeqDense_AtB(Mat C)
2528: {
2529: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqDense_SeqDense;
2530: C->ops->productsymbolic = MatProductSymbolic_AtB;
2531: return 0;
2532: }
2534: static PetscErrorCode MatProductSetFromOptions_SeqDense_ABt(Mat C)
2535: {
2536: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqDense_SeqDense;
2537: C->ops->productsymbolic = MatProductSymbolic_ABt;
2538: return 0;
2539: }
2541: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense(Mat C)
2542: {
2543: Mat_Product *product = C->product;
2545: switch (product->type) {
2546: case MATPRODUCT_AB:
2547: MatProductSetFromOptions_SeqDense_AB(C);
2548: break;
2549: case MATPRODUCT_AtB:
2550: MatProductSetFromOptions_SeqDense_AtB(C);
2551: break;
2552: case MATPRODUCT_ABt:
2553: MatProductSetFromOptions_SeqDense_ABt(C);
2554: break;
2555: default:
2556: break;
2557: }
2558: return 0;
2559: }
2560: /* ----------------------------------------------- */
2562: static PetscErrorCode MatGetRowMax_SeqDense(Mat A, Vec v, PetscInt idx[])
2563: {
2564: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2565: PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2566: PetscScalar *x;
2567: const PetscScalar *aa;
2570: VecGetArray(v, &x);
2571: VecGetLocalSize(v, &p);
2572: MatDenseGetArrayRead(A, &aa);
2574: for (i = 0; i < m; i++) {
2575: x[i] = aa[i];
2576: if (idx) idx[i] = 0;
2577: for (j = 1; j < n; j++) {
2578: if (PetscRealPart(x[i]) < PetscRealPart(aa[i + a->lda * j])) {
2579: x[i] = aa[i + a->lda * j];
2580: if (idx) idx[i] = j;
2581: }
2582: }
2583: }
2584: MatDenseRestoreArrayRead(A, &aa);
2585: VecRestoreArray(v, &x);
2586: return 0;
2587: }
2589: static PetscErrorCode MatGetRowMaxAbs_SeqDense(Mat A, Vec v, PetscInt idx[])
2590: {
2591: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2592: PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2593: PetscScalar *x;
2594: PetscReal atmp;
2595: const PetscScalar *aa;
2598: VecGetArray(v, &x);
2599: VecGetLocalSize(v, &p);
2600: MatDenseGetArrayRead(A, &aa);
2602: for (i = 0; i < m; i++) {
2603: x[i] = PetscAbsScalar(aa[i]);
2604: for (j = 1; j < n; j++) {
2605: atmp = PetscAbsScalar(aa[i + a->lda * j]);
2606: if (PetscAbsScalar(x[i]) < atmp) {
2607: x[i] = atmp;
2608: if (idx) idx[i] = j;
2609: }
2610: }
2611: }
2612: MatDenseRestoreArrayRead(A, &aa);
2613: VecRestoreArray(v, &x);
2614: return 0;
2615: }
2617: static PetscErrorCode MatGetRowMin_SeqDense(Mat A, Vec v, PetscInt idx[])
2618: {
2619: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2620: PetscInt i, j, m = A->rmap->n, n = A->cmap->n, p;
2621: PetscScalar *x;
2622: const PetscScalar *aa;
2625: MatDenseGetArrayRead(A, &aa);
2626: VecGetArray(v, &x);
2627: VecGetLocalSize(v, &p);
2629: for (i = 0; i < m; i++) {
2630: x[i] = aa[i];
2631: if (idx) idx[i] = 0;
2632: for (j = 1; j < n; j++) {
2633: if (PetscRealPart(x[i]) > PetscRealPart(aa[i + a->lda * j])) {
2634: x[i] = aa[i + a->lda * j];
2635: if (idx) idx[i] = j;
2636: }
2637: }
2638: }
2639: VecRestoreArray(v, &x);
2640: MatDenseRestoreArrayRead(A, &aa);
2641: return 0;
2642: }
2644: PetscErrorCode MatGetColumnVector_SeqDense(Mat A, Vec v, PetscInt col)
2645: {
2646: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2647: PetscScalar *x;
2648: const PetscScalar *aa;
2651: MatDenseGetArrayRead(A, &aa);
2652: VecGetArray(v, &x);
2653: PetscArraycpy(x, aa + col * a->lda, A->rmap->n);
2654: VecRestoreArray(v, &x);
2655: MatDenseRestoreArrayRead(A, &aa);
2656: return 0;
2657: }
2659: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat A, PetscInt type, PetscReal *reductions)
2660: {
2661: PetscInt i, j, m, n;
2662: const PetscScalar *a;
2664: MatGetSize(A, &m, &n);
2665: PetscArrayzero(reductions, n);
2666: MatDenseGetArrayRead(A, &a);
2667: if (type == NORM_2) {
2668: for (i = 0; i < n; i++) {
2669: for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j] * a[j]);
2670: a += m;
2671: }
2672: } else if (type == NORM_1) {
2673: for (i = 0; i < n; i++) {
2674: for (j = 0; j < m; j++) reductions[i] += PetscAbsScalar(a[j]);
2675: a += m;
2676: }
2677: } else if (type == NORM_INFINITY) {
2678: for (i = 0; i < n; i++) {
2679: for (j = 0; j < m; j++) reductions[i] = PetscMax(PetscAbsScalar(a[j]), reductions[i]);
2680: a += m;
2681: }
2682: } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2683: for (i = 0; i < n; i++) {
2684: for (j = 0; j < m; j++) reductions[i] += PetscRealPart(a[j]);
2685: a += m;
2686: }
2687: } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2688: for (i = 0; i < n; i++) {
2689: for (j = 0; j < m; j++) reductions[i] += PetscImaginaryPart(a[j]);
2690: a += m;
2691: }
2692: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2693: MatDenseRestoreArrayRead(A, &a);
2694: if (type == NORM_2) {
2695: for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2696: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2697: for (i = 0; i < n; i++) reductions[i] /= m;
2698: }
2699: return 0;
2700: }
2702: PetscErrorCode MatSetRandom_SeqDense(Mat x, PetscRandom rctx)
2703: {
2704: PetscScalar *a;
2705: PetscInt lda, m, n, i, j;
2707: MatGetSize(x, &m, &n);
2708: MatDenseGetLDA(x, &lda);
2709: MatDenseGetArrayWrite(x, &a);
2710: for (j = 0; j < n; j++) {
2711: for (i = 0; i < m; i++) PetscRandomGetValue(rctx, a + j * lda + i);
2712: }
2713: MatDenseRestoreArrayWrite(x, &a);
2714: return 0;
2715: }
2717: static PetscErrorCode MatMissingDiagonal_SeqDense(Mat A, PetscBool *missing, PetscInt *d)
2718: {
2719: *missing = PETSC_FALSE;
2720: return 0;
2721: }
2723: /* vals is not const */
2724: static PetscErrorCode MatDenseGetColumn_SeqDense(Mat A, PetscInt col, PetscScalar **vals)
2725: {
2726: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
2727: PetscScalar *v;
2730: MatDenseGetArray(A, &v);
2731: *vals = v + col * a->lda;
2732: MatDenseRestoreArray(A, &v);
2733: return 0;
2734: }
2736: static PetscErrorCode MatDenseRestoreColumn_SeqDense(Mat A, PetscScalar **vals)
2737: {
2738: if (vals) *vals = NULL; /* user cannot accidentally use the array later */
2739: return 0;
2740: }
2742: /* -------------------------------------------------------------------*/
2743: static struct _MatOps MatOps_Values = {MatSetValues_SeqDense,
2744: MatGetRow_SeqDense,
2745: MatRestoreRow_SeqDense,
2746: MatMult_SeqDense,
2747: /* 4*/ MatMultAdd_SeqDense,
2748: MatMultTranspose_SeqDense,
2749: MatMultTransposeAdd_SeqDense,
2750: NULL,
2751: NULL,
2752: NULL,
2753: /* 10*/ NULL,
2754: MatLUFactor_SeqDense,
2755: MatCholeskyFactor_SeqDense,
2756: MatSOR_SeqDense,
2757: MatTranspose_SeqDense,
2758: /* 15*/ MatGetInfo_SeqDense,
2759: MatEqual_SeqDense,
2760: MatGetDiagonal_SeqDense,
2761: MatDiagonalScale_SeqDense,
2762: MatNorm_SeqDense,
2763: /* 20*/ MatAssemblyBegin_SeqDense,
2764: MatAssemblyEnd_SeqDense,
2765: MatSetOption_SeqDense,
2766: MatZeroEntries_SeqDense,
2767: /* 24*/ MatZeroRows_SeqDense,
2768: NULL,
2769: NULL,
2770: NULL,
2771: NULL,
2772: /* 29*/ MatSetUp_SeqDense,
2773: NULL,
2774: NULL,
2775: NULL,
2776: NULL,
2777: /* 34*/ MatDuplicate_SeqDense,
2778: NULL,
2779: NULL,
2780: NULL,
2781: NULL,
2782: /* 39*/ MatAXPY_SeqDense,
2783: MatCreateSubMatrices_SeqDense,
2784: NULL,
2785: MatGetValues_SeqDense,
2786: MatCopy_SeqDense,
2787: /* 44*/ MatGetRowMax_SeqDense,
2788: MatScale_SeqDense,
2789: MatShift_SeqDense,
2790: NULL,
2791: MatZeroRowsColumns_SeqDense,
2792: /* 49*/ MatSetRandom_SeqDense,
2793: NULL,
2794: NULL,
2795: NULL,
2796: NULL,
2797: /* 54*/ NULL,
2798: NULL,
2799: NULL,
2800: NULL,
2801: NULL,
2802: /* 59*/ MatCreateSubMatrix_SeqDense,
2803: MatDestroy_SeqDense,
2804: MatView_SeqDense,
2805: NULL,
2806: NULL,
2807: /* 64*/ NULL,
2808: NULL,
2809: NULL,
2810: NULL,
2811: NULL,
2812: /* 69*/ MatGetRowMaxAbs_SeqDense,
2813: NULL,
2814: NULL,
2815: NULL,
2816: NULL,
2817: /* 74*/ NULL,
2818: NULL,
2819: NULL,
2820: NULL,
2821: NULL,
2822: /* 79*/ NULL,
2823: NULL,
2824: NULL,
2825: NULL,
2826: /* 83*/ MatLoad_SeqDense,
2827: MatIsSymmetric_SeqDense,
2828: MatIsHermitian_SeqDense,
2829: NULL,
2830: NULL,
2831: NULL,
2832: /* 89*/ NULL,
2833: NULL,
2834: MatMatMultNumeric_SeqDense_SeqDense,
2835: NULL,
2836: NULL,
2837: /* 94*/ NULL,
2838: NULL,
2839: NULL,
2840: MatMatTransposeMultNumeric_SeqDense_SeqDense,
2841: NULL,
2842: /* 99*/ MatProductSetFromOptions_SeqDense,
2843: NULL,
2844: NULL,
2845: MatConjugate_SeqDense,
2846: NULL,
2847: /*104*/ NULL,
2848: MatRealPart_SeqDense,
2849: MatImaginaryPart_SeqDense,
2850: NULL,
2851: NULL,
2852: /*109*/ NULL,
2853: NULL,
2854: MatGetRowMin_SeqDense,
2855: MatGetColumnVector_SeqDense,
2856: MatMissingDiagonal_SeqDense,
2857: /*114*/ NULL,
2858: NULL,
2859: NULL,
2860: NULL,
2861: NULL,
2862: /*119*/ NULL,
2863: NULL,
2864: NULL,
2865: NULL,
2866: NULL,
2867: /*124*/ NULL,
2868: MatGetColumnReductions_SeqDense,
2869: NULL,
2870: NULL,
2871: NULL,
2872: /*129*/ NULL,
2873: NULL,
2874: NULL,
2875: MatTransposeMatMultNumeric_SeqDense_SeqDense,
2876: NULL,
2877: /*134*/ NULL,
2878: NULL,
2879: NULL,
2880: NULL,
2881: NULL,
2882: /*139*/ NULL,
2883: NULL,
2884: NULL,
2885: NULL,
2886: NULL,
2887: MatCreateMPIMatConcatenateSeqMat_SeqDense,
2888: /*145*/ NULL,
2889: NULL,
2890: NULL,
2891: NULL,
2892: NULL,
2893: /*150*/ NULL};
2895: /*@C
2896: MatCreateSeqDense - Creates a `MATSEQDENSE` that
2897: is stored in column major order (the usual Fortran 77 manner). Many
2898: of the matrix operations use the BLAS and LAPACK routines.
2900: Collective
2902: Input Parameters:
2903: + comm - MPI communicator, set to `PETSC_COMM_SELF`
2904: . m - number of rows
2905: . n - number of columns
2906: - data - optional location of matrix data in column major order. Set data=NULL for PETSc
2907: to control all matrix memory allocation.
2909: Output Parameter:
2910: . A - the matrix
2912: Note:
2913: The data input variable is intended primarily for Fortran programmers
2914: who wish to allocate their own matrix memory space. Most users should
2915: set data=NULL.
2917: Level: intermediate
2919: .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
2920: @*/
2921: PetscErrorCode MatCreateSeqDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscScalar *data, Mat *A)
2922: {
2923: MatCreate(comm, A);
2924: MatSetSizes(*A, m, n, m, n);
2925: MatSetType(*A, MATSEQDENSE);
2926: MatSeqDenseSetPreallocation(*A, data);
2927: return 0;
2928: }
2930: /*@C
2931: MatSeqDenseSetPreallocation - Sets the array used for storing the matrix elements of a `MATSEQDENSE` matrix
2933: Collective
2935: Input Parameters:
2936: + B - the matrix
2937: - data - the array (or NULL)
2939: Note:
2940: The data input variable is intended primarily for Fortran programmers
2941: who wish to allocate their own matrix memory space. Most users should
2942: need not call this routine.
2944: Level: intermediate
2946: .seealso: `MATSEQDENSE`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`, `MatDenseSetLDA()`
2947: @*/
2948: PetscErrorCode MatSeqDenseSetPreallocation(Mat B, PetscScalar data[])
2949: {
2951: PetscTryMethod(B, "MatSeqDenseSetPreallocation_C", (Mat, PetscScalar[]), (B, data));
2952: return 0;
2953: }
2955: PetscErrorCode MatSeqDenseSetPreallocation_SeqDense(Mat B, PetscScalar *data)
2956: {
2957: Mat_SeqDense *b = (Mat_SeqDense *)B->data;
2960: B->preallocated = PETSC_TRUE;
2962: PetscLayoutSetUp(B->rmap);
2963: PetscLayoutSetUp(B->cmap);
2965: if (b->lda <= 0) b->lda = B->rmap->n;
2967: if (!data) { /* petsc-allocated storage */
2968: if (!b->user_alloc) PetscFree(b->v);
2969: PetscCalloc1((size_t)b->lda * B->cmap->n, &b->v);
2971: b->user_alloc = PETSC_FALSE;
2972: } else { /* user-allocated storage */
2973: if (!b->user_alloc) PetscFree(b->v);
2974: b->v = data;
2975: b->user_alloc = PETSC_TRUE;
2976: }
2977: B->assembled = PETSC_TRUE;
2978: return 0;
2979: }
2981: #if defined(PETSC_HAVE_ELEMENTAL)
2982: PETSC_INTERN PetscErrorCode MatConvert_SeqDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2983: {
2984: Mat mat_elemental;
2985: const PetscScalar *array;
2986: PetscScalar *v_colwise;
2987: PetscInt M = A->rmap->N, N = A->cmap->N, i, j, k, *rows, *cols;
2989: PetscMalloc3(M * N, &v_colwise, M, &rows, N, &cols);
2990: MatDenseGetArrayRead(A, &array);
2991: /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */
2992: k = 0;
2993: for (j = 0; j < N; j++) {
2994: cols[j] = j;
2995: for (i = 0; i < M; i++) v_colwise[j * M + i] = array[k++];
2996: }
2997: for (i = 0; i < M; i++) rows[i] = i;
2998: MatDenseRestoreArrayRead(A, &array);
3000: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
3001: MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N);
3002: MatSetType(mat_elemental, MATELEMENTAL);
3003: MatSetUp(mat_elemental);
3005: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
3006: MatSetValues(mat_elemental, M, rows, N, cols, v_colwise, ADD_VALUES);
3007: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
3008: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
3009: PetscFree3(v_colwise, rows, cols);
3011: if (reuse == MAT_INPLACE_MATRIX) {
3012: MatHeaderReplace(A, &mat_elemental);
3013: } else {
3014: *newmat = mat_elemental;
3015: }
3016: return 0;
3017: }
3018: #endif
3020: PetscErrorCode MatDenseSetLDA_SeqDense(Mat B, PetscInt lda)
3021: {
3022: Mat_SeqDense *b = (Mat_SeqDense *)B->data;
3023: PetscBool data;
3025: data = (PetscBool)((B->rmap->n > 0 && B->cmap->n > 0) ? (b->v ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE);
3028: b->lda = lda;
3029: return 0;
3030: }
3032: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3033: {
3034: MatCreateMPIMatConcatenateSeqMat_MPIDense(comm, inmat, n, scall, outmat);
3035: return 0;
3036: }
3038: PetscErrorCode MatDenseGetColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3039: {
3040: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3044: if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3045: a->vecinuse = col + 1;
3046: MatDenseGetArray(A, (PetscScalar **)&a->ptrinuse);
3047: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3048: *v = a->cvec;
3049: return 0;
3050: }
3052: PetscErrorCode MatDenseRestoreColumnVec_SeqDense(Mat A, PetscInt col, Vec *v)
3053: {
3054: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3058: a->vecinuse = 0;
3059: MatDenseRestoreArray(A, (PetscScalar **)&a->ptrinuse);
3060: VecResetArray(a->cvec);
3061: if (v) *v = NULL;
3062: return 0;
3063: }
3065: PetscErrorCode MatDenseGetColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3066: {
3067: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3071: if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3072: a->vecinuse = col + 1;
3073: MatDenseGetArrayRead(A, &a->ptrinuse);
3074: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3075: VecLockReadPush(a->cvec);
3076: *v = a->cvec;
3077: return 0;
3078: }
3080: PetscErrorCode MatDenseRestoreColumnVecRead_SeqDense(Mat A, PetscInt col, Vec *v)
3081: {
3082: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3086: a->vecinuse = 0;
3087: MatDenseRestoreArrayRead(A, &a->ptrinuse);
3088: VecLockReadPop(a->cvec);
3089: VecResetArray(a->cvec);
3090: if (v) *v = NULL;
3091: return 0;
3092: }
3094: PetscErrorCode MatDenseGetColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3095: {
3096: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3100: if (!a->cvec) { VecCreateSeqWithArray(PetscObjectComm((PetscObject)A), A->rmap->bs, A->rmap->n, NULL, &a->cvec); }
3101: a->vecinuse = col + 1;
3102: MatDenseGetArrayWrite(A, (PetscScalar **)&a->ptrinuse);
3103: VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)a->lda);
3104: *v = a->cvec;
3105: return 0;
3106: }
3108: PetscErrorCode MatDenseRestoreColumnVecWrite_SeqDense(Mat A, PetscInt col, Vec *v)
3109: {
3110: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3114: a->vecinuse = 0;
3115: MatDenseRestoreArrayWrite(A, (PetscScalar **)&a->ptrinuse);
3116: VecResetArray(a->cvec);
3117: if (v) *v = NULL;
3118: return 0;
3119: }
3121: PetscErrorCode MatDenseGetSubMatrix_SeqDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3122: {
3123: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3127: if (a->cmat && (cend - cbegin != a->cmat->cmap->N || rend - rbegin != a->cmat->rmap->N)) MatDestroy(&a->cmat);
3128: if (!a->cmat) {
3129: MatCreateDense(PetscObjectComm((PetscObject)A), rend - rbegin, PETSC_DECIDE, rend - rbegin, cend - cbegin, a->v + rbegin + (size_t)cbegin * a->lda, &a->cmat);
3130: } else {
3131: MatDensePlaceArray(a->cmat, a->v + rbegin + (size_t)cbegin * a->lda);
3132: }
3133: MatDenseSetLDA(a->cmat, a->lda);
3134: a->matinuse = cbegin + 1;
3135: *v = a->cmat;
3136: #if defined(PETSC_HAVE_CUDA)
3137: A->offloadmask = PETSC_OFFLOAD_CPU;
3138: #endif
3139: return 0;
3140: }
3142: PetscErrorCode MatDenseRestoreSubMatrix_SeqDense(Mat A, Mat *v)
3143: {
3144: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3149: a->matinuse = 0;
3150: MatDenseResetArray(a->cmat);
3151: if (v) *v = NULL;
3152: #if defined(PETSC_HAVE_CUDA)
3153: A->offloadmask = PETSC_OFFLOAD_CPU;
3154: #endif
3155: return 0;
3156: }
3158: /*MC
3159: MATSEQDENSE - MATSEQDENSE = "seqdense" - A matrix type to be used for sequential dense matrices.
3161: Options Database Keys:
3162: . -mat_type seqdense - sets the matrix type to `MATSEQDENSE` during a call to `MatSetFromOptions()`
3164: Level: beginner
3166: .seealso: `MATSEQDENSE`, `MatCreateSeqDense()`
3167: M*/
3168: PetscErrorCode MatCreate_SeqDense(Mat B)
3169: {
3170: Mat_SeqDense *b;
3171: PetscMPIInt size;
3173: MPI_Comm_size(PetscObjectComm((PetscObject)B), &size);
3176: PetscNew(&b);
3177: PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps));
3178: B->data = (void *)b;
3180: b->roworiented = PETSC_TRUE;
3182: PetscObjectComposeFunction((PetscObject)B, "MatQRFactor_C", MatQRFactor_SeqDense);
3183: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetLDA_C", MatDenseGetLDA_SeqDense);
3184: PetscObjectComposeFunction((PetscObject)B, "MatDenseSetLDA_C", MatDenseSetLDA_SeqDense);
3185: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArray_C", MatDenseGetArray_SeqDense);
3186: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArray_C", MatDenseRestoreArray_SeqDense);
3187: PetscObjectComposeFunction((PetscObject)B, "MatDensePlaceArray_C", MatDensePlaceArray_SeqDense);
3188: PetscObjectComposeFunction((PetscObject)B, "MatDenseResetArray_C", MatDenseResetArray_SeqDense);
3189: PetscObjectComposeFunction((PetscObject)B, "MatDenseReplaceArray_C", MatDenseReplaceArray_SeqDense);
3190: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayRead_C", MatDenseGetArray_SeqDense);
3191: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayRead_C", MatDenseRestoreArray_SeqDense);
3192: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetArrayWrite_C", MatDenseGetArray_SeqDense);
3193: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArray_SeqDense);
3194: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqaij_C", MatConvert_SeqDense_SeqAIJ);
3195: #if defined(PETSC_HAVE_ELEMENTAL)
3196: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_elemental_C", MatConvert_SeqDense_Elemental);
3197: #endif
3198: #if defined(PETSC_HAVE_SCALAPACK)
3199: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_scalapack_C", MatConvert_Dense_ScaLAPACK);
3200: #endif
3201: #if defined(PETSC_HAVE_CUDA)
3202: PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqdense_seqdensecuda_C", MatConvert_SeqDense_SeqDenseCUDA);
3203: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdensecuda_C", MatProductSetFromOptions_SeqDense);
3204: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdensecuda_seqdense_C", MatProductSetFromOptions_SeqDense);
3205: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdensecuda_C", MatProductSetFromOptions_SeqDense);
3206: #endif
3207: PetscObjectComposeFunction((PetscObject)B, "MatSeqDenseSetPreallocation_C", MatSeqDenseSetPreallocation_SeqDense);
3208: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense);
3209: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqdense_seqdense_C", MatProductSetFromOptions_SeqDense);
3210: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense);
3211: PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqsbaij_seqdense_C", MatProductSetFromOptions_SeqXBAIJ_SeqDense);
3213: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumn_C", MatDenseGetColumn_SeqDense);
3214: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_SeqDense);
3215: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_SeqDense);
3216: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_SeqDense);
3217: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_SeqDense);
3218: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_SeqDense);
3219: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_SeqDense);
3220: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_SeqDense);
3221: PetscObjectComposeFunction((PetscObject)B, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_SeqDense);
3222: PetscObjectComposeFunction((PetscObject)B, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_SeqDense);
3223: PetscObjectChangeTypeName((PetscObject)B, MATSEQDENSE);
3224: return 0;
3225: }
3227: /*@C
3228: MatDenseGetColumn - gives access to a column of a dense matrix. This is only the local part of the column. You MUST call `MatDenseRestoreColumn()` to avoid memory bleeding.
3230: Not Collective
3232: Input Parameters:
3233: + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3234: - col - column index
3236: Output Parameter:
3237: . vals - pointer to the data
3239: Level: intermediate
3241: Note:
3242: Use `MatDenseGetColumnVec()` to get access to a column of a `MATDENSE` treated as a `Vec`
3244: .seealso: `MATDENSE`, `MatDenseRestoreColumn()`, `MatDenseGetColumnVec()`
3245: @*/
3246: PetscErrorCode MatDenseGetColumn(Mat A, PetscInt col, PetscScalar **vals)
3247: {
3251: PetscUseMethod(A, "MatDenseGetColumn_C", (Mat, PetscInt, PetscScalar **), (A, col, vals));
3252: return 0;
3253: }
3255: /*@C
3256: MatDenseRestoreColumn - returns access to a column of a `MATDENSE` matrix which is returned by `MatDenseGetColumn()`.
3258: Not Collective
3260: Input Parameters:
3261: + mat - a `MATSEQDENSE` or `MATMPIDENSE` matrix
3262: - vals - pointer to the data (may be NULL)
3264: Level: intermediate
3266: .seealso: `MATDENSE`, `MatDenseGetColumn()`
3267: @*/
3268: PetscErrorCode MatDenseRestoreColumn(Mat A, PetscScalar **vals)
3269: {
3272: PetscUseMethod(A, "MatDenseRestoreColumn_C", (Mat, PetscScalar **), (A, vals));
3273: return 0;
3274: }
3276: /*@
3277: MatDenseGetColumnVec - Gives read-write access to a column of a `MATDENSE` matrix, represented as a `Vec`.
3279: Collective
3281: Input Parameters:
3282: + mat - the `Mat` object
3283: - col - the column index
3285: Output Parameter:
3286: . v - the vector
3288: Notes:
3289: The vector is owned by PETSc. Users need to call `MatDenseRestoreColumnVec()` when the vector is no longer needed.
3291: Use `MatDenseGetColumnVecRead()` to obtain read-only access or `MatDenseGetColumnVecWrite()` for write-only access.
3293: Level: intermediate
3295: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`, `MatDenseGetColumn()`
3296: @*/
3297: PetscErrorCode MatDenseGetColumnVec(Mat A, PetscInt col, Vec *v)
3298: {
3305: PetscUseMethod(A, "MatDenseGetColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3306: return 0;
3307: }
3309: /*@
3310: MatDenseRestoreColumnVec - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVec().
3312: Collective
3314: Input Parameters:
3315: + mat - the Mat object
3316: . col - the column index
3317: - v - the Vec object (may be NULL)
3319: Level: intermediate
3321: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3322: @*/
3323: PetscErrorCode MatDenseRestoreColumnVec(Mat A, PetscInt col, Vec *v)
3324: {
3330: PetscUseMethod(A, "MatDenseRestoreColumnVec_C", (Mat, PetscInt, Vec *), (A, col, v));
3331: return 0;
3332: }
3334: /*@
3335: MatDenseGetColumnVecRead - Gives read-only access to a column of a dense matrix, represented as a Vec.
3337: Collective
3339: Input Parameters:
3340: + mat - the Mat object
3341: - col - the column index
3343: Output Parameter:
3344: . v - the vector
3346: Notes:
3347: The vector is owned by PETSc and users cannot modify it.
3349: Users need to call MatDenseRestoreColumnVecRead() when the vector is no longer needed.
3351: Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecWrite() for write-only access.
3353: Level: intermediate
3355: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3356: @*/
3357: PetscErrorCode MatDenseGetColumnVecRead(Mat A, PetscInt col, Vec *v)
3358: {
3365: PetscUseMethod(A, "MatDenseGetColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3366: return 0;
3367: }
3369: /*@
3370: MatDenseRestoreColumnVecRead - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecRead().
3372: Collective
3374: Input Parameters:
3375: + mat - the Mat object
3376: . col - the column index
3377: - v - the Vec object (may be NULL)
3379: Level: intermediate
3381: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecWrite()`
3382: @*/
3383: PetscErrorCode MatDenseRestoreColumnVecRead(Mat A, PetscInt col, Vec *v)
3384: {
3390: PetscUseMethod(A, "MatDenseRestoreColumnVecRead_C", (Mat, PetscInt, Vec *), (A, col, v));
3391: return 0;
3392: }
3394: /*@
3395: MatDenseGetColumnVecWrite - Gives write-only access to a column of a dense matrix, represented as a Vec.
3397: Collective
3399: Input Parameters:
3400: + mat - the Mat object
3401: - col - the column index
3403: Output Parameter:
3404: . v - the vector
3406: Notes:
3407: The vector is owned by PETSc. Users need to call MatDenseRestoreColumnVecWrite() when the vector is no longer needed.
3409: Use MatDenseGetColumnVec() to obtain read-write access or MatDenseGetColumnVecRead() for read-only access.
3411: Level: intermediate
3413: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`, `MatDenseRestoreColumnVecWrite()`
3414: @*/
3415: PetscErrorCode MatDenseGetColumnVecWrite(Mat A, PetscInt col, Vec *v)
3416: {
3423: PetscUseMethod(A, "MatDenseGetColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3424: return 0;
3425: }
3427: /*@
3428: MatDenseRestoreColumnVecWrite - Returns access to a column of a dense matrix obtained from MatDenseGetColumnVecWrite().
3430: Collective
3432: Input Parameters:
3433: + mat - the Mat object
3434: . col - the column index
3435: - v - the Vec object (may be NULL)
3437: Level: intermediate
3439: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseGetColumnVecRead()`, `MatDenseGetColumnVecWrite()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreColumnVecRead()`
3440: @*/
3441: PetscErrorCode MatDenseRestoreColumnVecWrite(Mat A, PetscInt col, Vec *v)
3442: {
3448: PetscUseMethod(A, "MatDenseRestoreColumnVecWrite_C", (Mat, PetscInt, Vec *), (A, col, v));
3449: return 0;
3450: }
3452: /*@
3453: MatDenseGetSubMatrix - Gives access to a block of rows and columns of a dense matrix, represented as a Mat.
3455: Collective
3457: Input Parameters:
3458: + mat - the Mat object
3459: . rbegin - the first global row index in the block (if PETSC_DECIDE, is 0)
3460: . rend - the global row index past the last one in the block (if PETSC_DECIDE, is M)
3461: . cbegin - the first global column index in the block (if PETSC_DECIDE, is 0)
3462: - cend - the global column index past the last one in the block (if PETSC_DECIDE, is N)
3464: Output Parameter:
3465: . v - the matrix
3467: Notes:
3468: The matrix is owned by PETSc. Users need to call MatDenseRestoreSubMatrix() when the matrix is no longer needed.
3470: The output matrix is not redistributed by PETSc, so depending on the values of rbegin and rend, some processes may have no local rows.
3472: Level: intermediate
3474: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseRestoreSubMatrix()`
3475: @*/
3476: PetscErrorCode MatDenseGetSubMatrix(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
3477: {
3485: if (rbegin == PETSC_DECIDE) rbegin = 0;
3486: if (rend == PETSC_DECIDE) rend = A->rmap->N;
3487: if (cbegin == PETSC_DECIDE) cbegin = 0;
3488: if (cend == PETSC_DECIDE) cend = A->cmap->N;
3494: PetscUseMethod(A, "MatDenseGetSubMatrix_C", (Mat, PetscInt, PetscInt, PetscInt, PetscInt, Mat *), (A, rbegin, rend, cbegin, cend, v));
3495: return 0;
3496: }
3498: /*@
3499: MatDenseRestoreSubMatrix - Returns access to a block of columns of a dense matrix obtained from MatDenseGetSubMatrix().
3501: Collective
3503: Input Parameters:
3504: + mat - the Mat object
3505: - v - the Mat object (may be NULL)
3507: Level: intermediate
3509: .seealso: `MATDENSE`, `MATDENSECUDA`, `MatDenseGetColumnVec()`, `MatDenseRestoreColumnVec()`, `MatDenseGetSubMatrix()`
3510: @*/
3511: PetscErrorCode MatDenseRestoreSubMatrix(Mat A, Mat *v)
3512: {
3516: PetscUseMethod(A, "MatDenseRestoreSubMatrix_C", (Mat, Mat *), (A, v));
3517: return 0;
3518: }
3520: #include <petscblaslapack.h>
3521: #include <petsc/private/kernels/blockinvert.h>
3523: PetscErrorCode MatSeqDenseInvert(Mat A)
3524: {
3525: Mat_SeqDense *a = (Mat_SeqDense *)A->data;
3526: PetscInt bs = A->rmap->n;
3527: MatScalar *values = a->v;
3528: const PetscReal shift = 0.0;
3529: PetscBool allowzeropivot = PetscNot(A->erroriffailure), zeropivotdetected = PETSC_FALSE;
3531: /* factor and invert each block */
3532: switch (bs) {
3533: case 1:
3534: values[0] = (PetscScalar)1.0 / (values[0] + shift);
3535: break;
3536: case 2:
3537: PetscKernel_A_gets_inverse_A_2(values, shift, allowzeropivot, &zeropivotdetected);
3538: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3539: break;
3540: case 3:
3541: PetscKernel_A_gets_inverse_A_3(values, shift, allowzeropivot, &zeropivotdetected);
3542: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3543: break;
3544: case 4:
3545: PetscKernel_A_gets_inverse_A_4(values, shift, allowzeropivot, &zeropivotdetected);
3546: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3547: break;
3548: case 5: {
3549: PetscScalar work[25];
3550: PetscInt ipvt[5];
3552: PetscKernel_A_gets_inverse_A_5(values, ipvt, work, shift, allowzeropivot, &zeropivotdetected);
3553: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3554: } break;
3555: case 6:
3556: PetscKernel_A_gets_inverse_A_6(values, shift, allowzeropivot, &zeropivotdetected);
3557: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3558: break;
3559: case 7:
3560: PetscKernel_A_gets_inverse_A_7(values, shift, allowzeropivot, &zeropivotdetected);
3561: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3562: break;
3563: default: {
3564: PetscInt *v_pivots, *IJ, j;
3565: PetscScalar *v_work;
3567: PetscMalloc3(bs, &v_work, bs, &v_pivots, bs, &IJ);
3568: for (j = 0; j < bs; j++) IJ[j] = j;
3569: PetscKernel_A_gets_inverse_A(bs, values, v_pivots, v_work, allowzeropivot, &zeropivotdetected);
3570: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3571: PetscFree3(v_work, v_pivots, IJ);
3572: }
3573: }
3574: return 0;
3575: }