Actual source code: pvec2.c

  1: /*$Id: pvec2.c,v 1.52 2001/03/23 23:21:26 balay Exp $*/

  3: /*
  4:      Code for some of the parallel vector primatives.
  5: */
 6:  #include src/vec/impls/mpi/pvecimpl.h
 7:  #include src/inline/dot.h

  9: #define do_not_use_ethernet
 10: int Ethernet_Allreduce(PetscReal *in,PetscReal *out,int n,MPI_Datatype type,MPI_Op op,MPI_Comm comm)
 11: {
 12:   int        i,rank,size,ierr;
 13:   MPI_Status status;


 16:   MPI_Comm_size(comm,&size);
 17:   MPI_Comm_rank(comm,&rank);

 19:   if (rank) {
 20:     MPI_Recv(out,n,MPI_DOUBLE,rank-1,837,comm,&status);
 21:     for (i =0; i<n; i++) in[i] += out[i];
 22:   }
 23:   if (rank != size - 1) {
 24:     MPI_Send(in,n,MPI_DOUBLE,rank+1,837,comm);
 25:   }
 26:   if (rank == size-1) {
 27:     for (i=0; i<n; i++) out[i] = in[i];
 28:   } else {
 29:     MPI_Recv(out,n,MPI_DOUBLE,rank+1,838,comm,&status);
 30:   }
 31:   if (rank) {
 32:     MPI_Send(out,n,MPI_DOUBLE,rank-1,838,comm);
 33:   }
 34:   return 0;
 35: }


 38: int VecMDot_MPI(int nv,Vec xin,const Vec y[],Scalar *z)
 39: {
 40:   Scalar awork[128],*work = awork;
 41:   int    ierr;

 44:   if (nv > 128) {
 45:     PetscMalloc(nv*sizeof(Scalar),&work);
 46:   }
 47:   VecMDot_Seq(nv,xin,y,work);
 48:   MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
 49:   if (nv > 128) {
 50:     PetscFree(work);
 51:   }
 52:   return(0);
 53: }

 55: int VecMTDot_MPI(int nv,Vec xin,const Vec y[],Scalar *z)
 56: {
 57:   Scalar awork[128],*work = awork;
 58:   int    ierr;

 61:   if (nv > 128) {
 62:     PetscMalloc(nv*sizeof(Scalar),&work);
 63:   }
 64:   VecMTDot_Seq(nv,xin,y,work);
 65:   MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
 66:   if (nv > 128) {
 67:     PetscFree(work);
 68:   }
 69:   return(0);
 70: }

 72: int VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
 73: {
 74:   Vec_MPI      *x = (Vec_MPI*)xin->data;
 75:   PetscReal    sum,work = 0.0;
 76:   Scalar       *xx = x->array;
 77:   int          n = xin->n,ierr;

 80:   if (type == NORM_2) {

 82: #if defined(PETSC_USE_FORTRAN_KERNEL_NORMSQR)
 83:     fortrannormsqr_(xx,&n,&work);
 84: #else
 85:     /* int i; for (i=0; i<n; i++) work += xx[i]*xx[i];   */
 86:     switch (n & 0x3) {
 87:       case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
 88:       case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
 89:       case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
 90:     }
 91:     while (n>0) {
 92:       work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
 93:                         xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
 94:       xx += 4; n -= 4;
 95:     }
 96:     /*
 97:          On the IBM Power2 Super with four memory cards unrolling to 4
 98:          worked better than unrolling to 8.
 99:     */
100:     /*
101:     switch (n & 0x7) {
102:       case 7: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
103:       case 6: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
104:       case 5: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
105:       case 4: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
106:       case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
107:       case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
108:       case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 8;
109:     }
110:     while (n>0) {
111:       work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
112:                         xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3])+
113:                         xx[4]*PetscConj(xx[4])+xx[5]*PetscConj(xx[5])+
114:                         xx[6]*PetscConj(xx[6])+xx[7]*PetscConj(xx[7]));
115:       xx += 8; n -= 8;
116:     } 
117:     */
118: #endif
119:     MPI_Allreduce(&work,&sum,1,MPI_DOUBLE,MPI_SUM,xin->comm);
120:     *z = sqrt(sum);
121:     PetscLogFlops(2*xin->n);
122:   } else if (type == NORM_1) {
123:     /* Find the local part */
124:     VecNorm_Seq(xin,NORM_1,&work);
125:     /* Find the global max */
126:     MPI_Allreduce(&work,z,1,MPI_DOUBLE,MPI_SUM,xin->comm);
127:   } else if (type == NORM_INFINITY) {
128:     /* Find the local max */
129:     VecNorm_Seq(xin,NORM_INFINITY,&work);
130:     /* Find the global max */
131:     MPI_Allreduce(&work,z,1,MPI_DOUBLE,MPI_MAX,xin->comm);
132:   } else if (type == NORM_1_AND_2) {
133:     PetscReal temp[2];
134:     VecNorm_Seq(xin,NORM_1,temp);
135:     VecNorm_Seq(xin,NORM_2,temp+1);
136:     temp[1] = temp[1]*temp[1];
137:     MPI_Allreduce(temp,z,2,MPI_DOUBLE,MPI_SUM,xin->comm);
138:     z[1] = sqrt(z[1]);
139:   }
140:   return(0);
141: }

143: /*
144:        These two functions are the MPI reduction operation used for max and min with index
145:    The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the 
146:    MPI operator Vec[Max,Min]_Local_Op.
147: */
148: MPI_Op VecMax_Local_Op = 0;
149: MPI_Op VecMin_Local_Op = 0;

151: EXTERN_C_BEGIN
152: void VecMax_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
153: {
154:   PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;

157:   if (*datatype != MPI_DOUBLE) {
158:     (*PetscErrorPrintf)("Can only handle MPI_DOUBLE data types");
159:     MPI_Abort(MPI_COMM_WORLD,1);
160:   }
161:   if (xin[0] > xout[0]) {
162:     xout[0] = xin[0];
163:     xout[1] = xin[1];
164:   }
165:   PetscStackPop;
166:   return; /* cannot return a value */
167: }
168: EXTERN_C_END

170: EXTERN_C_BEGIN
171: void VecMin_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
172: {
173:   PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;

176:   if (*datatype != MPI_DOUBLE) {
177:     (*PetscErrorPrintf)("Can only handle MPI_DOUBLE data types");
178:     MPI_Abort(MPI_COMM_WORLD,1);
179:   }
180:   if (xin[0] < xout[0]) {
181:     xout[0] = xin[0];
182:     xout[1] = xin[1];
183:   }
184:   PetscStackPop;
185:   return;
186: }
187: EXTERN_C_END

189: int VecMax_MPI(Vec xin,int *idx,PetscReal *z)
190: {
191:   int    ierr;
192:   PetscReal work;

195:   /* Find the local max */
196:   VecMax_Seq(xin,idx,&work);

198:   /* Find the global max */
199:   if (!idx) {
200:     MPI_Allreduce(&work,z,1,MPI_DOUBLE,MPI_MAX,xin->comm);
201:   } else {
202:     PetscReal work2[2],z2[2];
203:     int    rstart;

205:     if (!VecMax_Local_Op) {
206:       MPI_Op_create(VecMax_Local,1,&VecMax_Local_Op);
207:     }
208: 
209:     VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
210:     work2[0] = work;
211:     work2[1] = *idx + rstart;
212:     MPI_Allreduce(work2,z2,2,MPI_DOUBLE,VecMax_Local_Op,xin->comm);
213:     *z   = z2[0];
214:     *idx = (int)z2[1];

216:   }
217:   return(0);
218: }

220: int VecMin_MPI(Vec xin,int *idx,PetscReal *z)
221: {
222:   int       ierr;
223:   PetscReal work;

226:   /* Find the local Min */
227:   VecMin_Seq(xin,idx,&work);

229:   /* Find the global Min */
230:   if (!idx) {
231:     MPI_Allreduce(&work,z,1,MPI_DOUBLE,MPI_MIN,xin->comm);
232:   } else {
233:     PetscReal work2[2],z2[2];
234:     int    rstart;

236:     if (!VecMin_Local_Op) {
237:       MPI_Op_create(VecMin_Local,1,&VecMin_Local_Op);
238:     }
239: 
240:     VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
241:     work2[0] = work;
242:     work2[1] = *idx + rstart;
243:     MPI_Allreduce(work2,z2,2,MPI_DOUBLE,VecMin_Local_Op,xin->comm);
244:     *z   = z2[0];
245:     *idx = (int)z2[1];

247:   }
248:   return(0);
249: }