Actual source code: dot.h

  1: /* $Id: dot.h,v 1.19 2000/05/10 16:38:34 bsmith Exp $ */

  3: #ifndef DOT
 4:  #include petsc.h

  6: EXTERN_C_BEGIN

  8: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
  9: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 10: #define fortranmdot4_      FORTRANMDOT4
 11: #define fortranmdot3_      FORTRANMDOT3
 12: #define fortranmdot2_      FORTRANMDOT2
 13: #define fortranmdot1_      FORTRANMDOT1
 14: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 15: #define fortranmdot4_      fortranmdot4
 16: #define fortranmdot3_      fortranmdot3
 17: #define fortranmdot2_      fortranmdot2
 18: #define fortranmdot1_      fortranmdot1
 19: #endif
 20: EXTERN void fortranmdot4_(void *,void *,void *,void *,void *,int *,
 21:                            void *,void *,void *,void *);
 22: EXTERN void fortranmdot3_(void *,void *,void *,void *,int *,
 23:                            void *,void *,void *);
 24: EXTERN void fortranmdot2_(void *,void *,void *,int *,
 25:                            void *,void *);
 26: EXTERN void fortranmdot1_(void *,void *,int *,
 27:                            void *);
 28: #endif

 30: #if defined(PETSC_USE_FORTRAN_KERNEL_NORMSQR)
 31: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 32: #define fortrannormsqr_    FORTRANNORMSQR
 33: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 34: #define fortrannormsqr_    fortrannormsqr
 35: #endif
 36: EXTERN void fortrannormsqr_(void *,int *,void *);
 37: #endif

 39: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJ)
 40: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 41: #define fortranmultaij_    FORTRANMULTAIJ
 42: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 43: #define fortranmultaij_    fortranmultaij
 44: #endif
 45: EXTERN void fortranmultaij_(int *,void*,int *,int *,void *,void*);
 46: #endif

 48: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJ)
 49: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 50: #define fortranmultaddaij_ FORTRANMULTADDAIJ
 51: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 52: #define fortranmultaddaij_ fortranmultaddaij
 53: #endif
 54: EXTERN void fortranmultaddaij_(int *,void*,int *,int *,void *,void*,void*);
 55: #endif

 57: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
 58: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 59: #define fortransolveaij_   FORTRANSOLVEAIJ
 60: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 61: #define fortransolveaij_   fortransolveaij
 62: #endif
 63: EXTERN void fortransolveaij_(int *,void*,int *,int *,int*,void *,void*);
 64: #endif

 66: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
 67: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 68: #define fortransolvebaij4_         FORTRANSOLVEBAIJ4
 69: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 70: #define fortransolvebaij4_          fortransolvebaij4
 71: #endif
 72: EXTERN void fortransolvebaij4_(int *,void*,int *,int *,int*,void *,void*,void *);
 73: #endif

 75: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
 76: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 77: #define fortransolvebaij4unroll_   FORTRANSOLVEBAIJ4UNROLL
 78: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 79: #define fortransolvebaij4unroll_    fortransolvebaij4unroll
 80: #endif
 81: EXTERN void fortransolvebaij4unroll_(int *,void*,int *,int *,int*,void *,void*);
 82: #endif

 84: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
 85: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 86: #define fortransolvebaij4blas_     FORTRANSOLVEBAIJ4BLAS
 87: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 88: #define fortransolvebaij4blas_      fortransolvebaij4blas
 89: #endif
 90: EXTERN void fortransolvebaij4blas_(int *,void*,int *,int *,int*,void *,void*,void *);
 91: #endif

 93: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
 94: #ifdef PETSC_HAVE_FORTRAN_CAPS
 95: #define fortranxtimesy_ FORTRANXTIMESY
 96: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 97: #define fortranxtimesy_ fortranxtimesy
 98: #endif
 99: EXTERN void fortranxtimesy_(void *,void *,void *,int *);
100: #endif

102: EXTERN_C_END

104: /* ------------------------------------------------------------------- */


107: #if !defined(PETSC_USE_COMPLEX)

109: #ifdef PETSC_USE_UNROLL_KERNELS
110: #define DOT(sum,x,y,n) {
111: switch (n & 0x3) {
112: case 3: sum += *x++ * *y++;
113: case 2: sum += *x++ * *y++;
114: case 1: sum += *x++ * *y++;
115: n -= 4;case 0:break;}
116: while (n>0) {sum += x[0]*y[0]+x[1]*y[1]+x[2]*y[2]+x[3]*y[3];x+=4;y+=4;
117: n -= 4;}}
118: #define DOT2(sum1,sum2,x,y1,y2,n) {
119: if(n&0x1){sum1+=*x**y1++;sum2+=*x++**y2++;n--;}
120: while (n>0) {sum1+=x[0]*y1[0]+x[1]*y1[1];sum2+=x[0]*y2[0]+x[1]*y2[1];x+=2;
121: y1+=2;y2+=2;n -= 2;}}
122: #define SQR(sum,x,n) {
123: switch (n & 0x3) {
124: case 3: sum += *x * *x;x++;
125: case 2: sum += *x * *x;x++;
126: case 1: sum += *x * *x;x++;
127: n -= 4;case 0:break;}
128: while (n>0) {sum += x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];x+=4;
129: n -= 4;}}

131: #elif defined(PETSC_USE_WHILE_KERNELS)
132: #define DOT(sum,x,y,n) {
133: while(n--) sum+= *x++ * *y++;}
134: #define DOT2(sum1,sum2,x,y1,y2,n) {
135: while(n--){sum1+= *x**y1++;sum2+=*x++**y2++;}}
136: #define SQR(sum,x,n)   {
137: while(n--) {sum+= *x * *x; x++;}}

139: #elif defined(PETSC_USE_BLAS_KERNELS)
140: EXTERN double ddot_();
141: #define DOT(sum,x,y,n) {int one=1;
142: sum=ddot_(&n,x,&one,y,&one);}
143: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
144: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
145: #define SQR(sum,x,n)   {int one=1;
146: sum=ddot_(&n,x,&one,x,&one);}

148: #else
149: #define DOT(sum,x,y,n) {int __i;
150: for(__i=0;__i<n;__i++)sum+=x[__i]*y[__i];}
151: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
152: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
153: #define SQR(sum,x,n)   {int __i;
154: for(__i=0;__i<n;__i++)sum+=x[__i]*x[__i];}
155: #endif

157: #else

159: #ifdef PETSC_USE_UNROLL_KERNELS
160: #define DOT(sum,x,y,n) {
161: switch (n & 0x3) {
162: case 3: sum += *x * conj(*y); x++; y++;
163: case 2: sum += *x * conj(*y); x++; y++;
164: case 1: sum += *x * conj(*y); x++; y++;
165: n -= 4;case 0:break;}
166: while (n>0) {sum += x[0]*conj(y[0])+x[1]*conj(y[1])+x[2]*conj(y[2])+x[3]*conj(y[3]);x+=4;y+=4;
167: n -= 4;}}
168: #define DOT2(sum1,sum2,x,y1,y2,n) {
169: if(n&0x1){sum1+=*x*conj(*y1)++;sum2+=*x++*conj(*y2)++;n--;}
170: while (n>0) {sum1+=x[0]*conj(y1[0])+x[1]*conj(y1[1]);sum2+=x[0]*conj(y2[0])+x[1]*conj(y2[1]);x+=2;
171: y1+=2;y2+=2;n -= 2;}}
172: #define SQR(sum,x,n) {
173: switch (n & 0x3) {
174: case 3: sum += *x * conj(*x);x++;
175: case 2: sum += *x * conj(*x);x++;
176: case 1: sum += *x * conj(*x);x++;
177: n -= 4;case 0:break;}
178: while (n>0) {sum += x[0]*conj(x[0])+x[1]*conj(x[1])+x[2]*conj(x[2])+x[3]*conj(x[3]);x+=4;
179: n -= 4;}}

181: #elif defined(PETSC_USE_WHILE_KERNELS)
182: #define DOT(sum,x,y,n) {
183: while(n--) sum+= *x++ * conj(*y++);}
184: #define DOT2(sum1,sum2,x,y1,y2,n) {
185: while(n--){sum1+= *x*conj(*y1);sum2+=*x*conj(*y2); x++; y1++; y2++;}}
186: #define SQR(sum,x,n)   {
187: while(n--) {sum+= *x * conj(*x); x++;}}

189: #else
190: #define DOT(sum,x,y,n) {int __i;
191: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(y[__i]);}
192: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
193: for(__i=0;__i<n;__i++){sum1+=x[__i]*conj(y1[__i]);sum2+=x[__i]*conj(y2[__i]);}}
194: #define SQR(sum,x,n)   {int __i;
195: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(x[__i]);}
196: #endif

198: #endif

200: #endif