Actual source code: bvec1.c


  2: /*
  3:    Defines the BLAS based vector operations. Code shared by parallel
  4:   and sequential vectors.
  5: */

  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <petscblaslapack.h>

 10: static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
 11: {
 12:   const PetscInt     n   = xin->map->n;
 13:   const PetscBLASInt one = 1;
 14:   const PetscScalar *ya, *xa;
 15:   PetscBLASInt       bn;

 17:   PetscFunctionBegin;
 18:   PetscCall(PetscBLASIntCast(n, &bn));
 19:   if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
 20:   PetscCall(VecGetArrayRead(xin, &xa));
 21:   PetscCall(VecGetArrayRead(yin, &ya));
 22:   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
 23:      the second */
 24:   PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
 25:   PetscCall(VecRestoreArrayRead(xin, &xa));
 26:   PetscCall(VecRestoreArrayRead(yin, &ya));
 27:   PetscFunctionReturn(PETSC_SUCCESS);
 28: }

 30: PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 31: {
 32:   PetscFunctionBegin;
 33:   PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
 34:   PetscFunctionReturn(PETSC_SUCCESS);
 35: }

 37: PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
 38: {
 39:   PetscFunctionBegin;
 40:   /*
 41:     pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
 42:     dot(&bn, xa, &one, ya, &one)
 43:   */
 44:   PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
 45:   PetscFunctionReturn(PETSC_SUCCESS);
 46: }

 48: PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
 49: {
 50:   PetscFunctionBegin;
 51:   if (alpha == (PetscScalar)0.0) {
 52:     PetscCall(VecSet_Seq(xin, alpha));
 53:   } else if (alpha != (PetscScalar)1.0) {
 54:     const PetscBLASInt one = 1;
 55:     PetscBLASInt       bn;
 56:     PetscScalar       *xarray;

 58:     PetscCall(PetscBLASIntCast(xin->map->n, &bn));
 59:     PetscCall(PetscLogFlops(bn));
 60:     PetscCall(VecGetArray(xin, &xarray));
 61:     PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
 62:     PetscCall(VecRestoreArray(xin, &xarray));
 63:   }
 64:   PetscFunctionReturn(PETSC_SUCCESS);
 65: }

 67: PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
 68: {
 69:   PetscFunctionBegin;
 70:   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
 71:   if (alpha != (PetscScalar)0.0) {
 72:     const PetscScalar *xarray;
 73:     PetscScalar       *yarray;
 74:     const PetscBLASInt one = 1;
 75:     PetscBLASInt       bn;

 77:     PetscCall(PetscBLASIntCast(yin->map->n, &bn));
 78:     PetscCall(PetscLogFlops(2.0 * bn));
 79:     PetscCall(VecGetArrayRead(xin, &xarray));
 80:     PetscCall(VecGetArray(yin, &yarray));
 81:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
 82:     PetscCall(VecRestoreArrayRead(xin, &xarray));
 83:     PetscCall(VecRestoreArray(yin, &yarray));
 84:   }
 85:   PetscFunctionReturn(PETSC_SUCCESS);
 86: }

 88: PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
 89: {
 90:   PetscFunctionBegin;
 91:   if (a == (PetscScalar)0.0) {
 92:     PetscCall(VecScale_Seq(yin, b));
 93:   } else if (b == (PetscScalar)1.0) {
 94:     PetscCall(VecAXPY_Seq(yin, a, xin));
 95:   } else if (a == (PetscScalar)1.0) {
 96:     PetscCall(VecAYPX_Seq(yin, b, xin));
 97:   } else {
 98:     const PetscInt     n = yin->map->n;
 99:     const PetscScalar *xx;
100:     PetscInt           flops;
101:     PetscScalar       *yy;

103:     PetscCall(VecGetArrayRead(xin, &xx));
104:     PetscCall(VecGetArray(yin, &yy));
105:     if (b == (PetscScalar)0.0) {
106:       flops = n;
107:       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
108:     } else {
109:       flops = 3 * n;
110:       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
111:     }
112:     PetscCall(VecRestoreArrayRead(xin, &xx));
113:     PetscCall(VecRestoreArray(yin, &yy));
114:     PetscCall(PetscLogFlops(flops));
115:   }
116:   PetscFunctionReturn(PETSC_SUCCESS);
117: }

119: PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
120: {
121:   const PetscInt     n = zin->map->n;
122:   const PetscScalar *yy, *xx;
123:   PetscInt           flops = 4 * n; // common case
124:   PetscScalar       *zz;

126:   PetscFunctionBegin;
127:   PetscCall(VecGetArrayRead(xin, &xx));
128:   PetscCall(VecGetArrayRead(yin, &yy));
129:   PetscCall(VecGetArray(zin, &zz));
130:   if (alpha == (PetscScalar)1.0) {
131:     for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
132:   } else if (gamma == (PetscScalar)1.0) {
133:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
134:   } else if (gamma == (PetscScalar)0.0) {
135:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
136:     flops -= n;
137:   } else {
138:     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
139:     flops += n;
140:   }
141:   PetscCall(VecRestoreArrayRead(xin, &xx));
142:   PetscCall(VecRestoreArrayRead(yin, &yy));
143:   PetscCall(VecRestoreArray(zin, &zz));
144:   PetscCall(PetscLogFlops(flops));
145:   PetscFunctionReturn(PETSC_SUCCESS);
146: }