Actual source code: bvec1.c
2: /*
3: Defines the BLAS based vector operations. Code shared by parallel
4: and sequential vectors.
5: */
7: #include <../src/vec/vec/impls/dvecimpl.h>
8: #include <petscblaslapack.h>
10: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
11: {
12: const PetscInt n = xin->map->n;
13: const PetscBLASInt one = 1;
14: const PetscScalar *ya, *xa;
15: PetscBLASInt bn;
17: PetscFunctionBegin;
18: PetscCall(PetscBLASIntCast(n, &bn));
19: if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
20: PetscCall(VecGetArrayRead(xin, &xa));
21: PetscCall(VecGetArrayRead(yin, &ya));
22: /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
23: the second */
24: PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
25: PetscCall(VecRestoreArrayRead(xin, &xa));
26: PetscCall(VecRestoreArrayRead(yin, &ya));
27: PetscFunctionReturn(PETSC_SUCCESS);
28: }
30: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
31: {
32: PetscFunctionBegin;
33: PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
34: PetscFunctionReturn(PETSC_SUCCESS);
35: }
37: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
38: {
39: PetscFunctionBegin;
40: /*
41: pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
42: dot(&bn, xa, &one, ya, &one)
43: */
44: PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
45: PetscFunctionReturn(PETSC_SUCCESS);
46: }
48: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
49: {
50: PetscFunctionBegin;
51: if (alpha == (PetscScalar)0.0) {
52: PetscCall(VecSet_Seq(xin, alpha));
53: } else if (alpha != (PetscScalar)1.0) {
54: const PetscBLASInt one = 1;
55: PetscBLASInt bn;
56: PetscScalar *xarray;
58: PetscCall(PetscBLASIntCast(xin->map->n, &bn));
59: PetscCall(PetscLogFlops(bn));
60: PetscCall(VecGetArray(xin, &xarray));
61: PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
62: PetscCall(VecRestoreArray(xin, &xarray));
63: }
64: PetscFunctionReturn(PETSC_SUCCESS);
65: }
67: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
68: {
69: PetscFunctionBegin;
70: /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
71: if (alpha != (PetscScalar)0.0) {
72: const PetscScalar *xarray;
73: PetscScalar *yarray;
74: const PetscBLASInt one = 1;
75: PetscBLASInt bn;
77: PetscCall(PetscBLASIntCast(yin->map->n, &bn));
78: PetscCall(PetscLogFlops(2.0 * bn));
79: PetscCall(VecGetArrayRead(xin, &xarray));
80: PetscCall(VecGetArray(yin, &yarray));
81: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
82: PetscCall(VecRestoreArrayRead(xin, &xarray));
83: PetscCall(VecRestoreArray(yin, &yarray));
84: }
85: PetscFunctionReturn(PETSC_SUCCESS);
86: }
88: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
89: {
90: PetscFunctionBegin;
91: if (a == (PetscScalar)0.0) {
92: PetscCall(VecScale_Seq(yin, b));
93: } else if (b == (PetscScalar)1.0) {
94: PetscCall(VecAXPY_Seq(yin, a, xin));
95: } else if (a == (PetscScalar)1.0) {
96: PetscCall(VecAYPX_Seq(yin, b, xin));
97: } else {
98: const PetscInt n = yin->map->n;
99: const PetscScalar *xx;
100: PetscInt flops;
101: PetscScalar *yy;
103: PetscCall(VecGetArrayRead(xin, &xx));
104: PetscCall(VecGetArray(yin, &yy));
105: if (b == (PetscScalar)0.0) {
106: flops = n;
107: for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
108: } else {
109: flops = 3 * n;
110: for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
111: }
112: PetscCall(VecRestoreArrayRead(xin, &xx));
113: PetscCall(VecRestoreArray(yin, &yy));
114: PetscCall(PetscLogFlops(flops));
115: }
116: PetscFunctionReturn(PETSC_SUCCESS);
117: }
119: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
120: {
121: const PetscInt n = zin->map->n;
122: const PetscScalar *yy, *xx;
123: PetscInt flops = 4 * n; // common case
124: PetscScalar *zz;
126: PetscFunctionBegin;
127: PetscCall(VecGetArrayRead(xin, &xx));
128: PetscCall(VecGetArrayRead(yin, &yy));
129: PetscCall(VecGetArray(zin, &zz));
130: if (alpha == (PetscScalar)1.0) {
131: for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
132: } else if (gamma == (PetscScalar)1.0) {
133: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
134: } else if (gamma == (PetscScalar)0.0) {
135: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
136: flops -= n;
137: } else {
138: for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
139: flops += n;
140: }
141: PetscCall(VecRestoreArrayRead(xin, &xx));
142: PetscCall(VecRestoreArrayRead(yin, &yy));
143: PetscCall(VecRestoreArray(zin, &zz));
144: PetscCall(PetscLogFlops(flops));
145: PetscFunctionReturn(PETSC_SUCCESS);
146: }