Actual source code: pvec2.c
1: /*$Id: pvec2.c,v 1.52 2001/03/23 23:21:26 balay Exp $*/
3: /*
4: Code for some of the parallel vector primatives.
5: */
6: #include src/vec/impls/mpi/pvecimpl.h
7: #include src/inline/dot.h
9: #define do_not_use_ethernet
10: int Ethernet_Allreduce(PetscReal *in,PetscReal *out,int n,MPI_Datatype type,MPI_Op op,MPI_Comm comm)
11: {
12: int i,rank,size,ierr;
13: MPI_Status status;
16: MPI_Comm_size(comm,&size);
17: MPI_Comm_rank(comm,&rank);
19: if (rank) {
20: MPI_Recv(out,n,MPI_DOUBLE,rank-1,837,comm,&status);
21: for (i =0; i<n; i++) in[i] += out[i];
22: }
23: if (rank != size - 1) {
24: MPI_Send(in,n,MPI_DOUBLE,rank+1,837,comm);
25: }
26: if (rank == size-1) {
27: for (i=0; i<n; i++) out[i] = in[i];
28: } else {
29: MPI_Recv(out,n,MPI_DOUBLE,rank+1,838,comm,&status);
30: }
31: if (rank) {
32: MPI_Send(out,n,MPI_DOUBLE,rank-1,838,comm);
33: }
34: return 0;
35: }
38: int VecMDot_MPI(int nv,Vec xin,const Vec y[],Scalar *z)
39: {
40: Scalar awork[128],*work = awork;
41: int ierr;
44: if (nv > 128) {
45: PetscMalloc(nv*sizeof(Scalar),&work);
46: }
47: VecMDot_Seq(nv,xin,y,work);
48: MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
49: if (nv > 128) {
50: PetscFree(work);
51: }
52: return(0);
53: }
55: int VecMTDot_MPI(int nv,Vec xin,const Vec y[],Scalar *z)
56: {
57: Scalar awork[128],*work = awork;
58: int ierr;
61: if (nv > 128) {
62: PetscMalloc(nv*sizeof(Scalar),&work);
63: }
64: VecMTDot_Seq(nv,xin,y,work);
65: MPI_Allreduce(work,z,nv,MPIU_SCALAR,PetscSum_Op,xin->comm);
66: if (nv > 128) {
67: PetscFree(work);
68: }
69: return(0);
70: }
72: int VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
73: {
74: Vec_MPI *x = (Vec_MPI*)xin->data;
75: PetscReal sum,work = 0.0;
76: Scalar *xx = x->array;
77: int n = xin->n,ierr;
80: if (type == NORM_2) {
82: #if defined(PETSC_USE_FORTRAN_KERNEL_NORMSQR)
83: fortrannormsqr_(xx,&n,&work);
84: #else
85: /* int i; for (i=0; i<n; i++) work += xx[i]*xx[i]; */
86: switch (n & 0x3) {
87: case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
88: case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
89: case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
90: }
91: while (n>0) {
92: work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
93: xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
94: xx += 4; n -= 4;
95: }
96: /*
97: On the IBM Power2 Super with four memory cards unrolling to 4
98: worked better than unrolling to 8.
99: */
100: /*
101: switch (n & 0x7) {
102: case 7: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
103: case 6: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
104: case 5: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
105: case 4: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
106: case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
107: case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
108: case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 8;
109: }
110: while (n>0) {
111: work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
112: xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3])+
113: xx[4]*PetscConj(xx[4])+xx[5]*PetscConj(xx[5])+
114: xx[6]*PetscConj(xx[6])+xx[7]*PetscConj(xx[7]));
115: xx += 8; n -= 8;
116: }
117: */
118: #endif
119: MPI_Allreduce(&work,&sum,1,MPI_DOUBLE,MPI_SUM,xin->comm);
120: *z = sqrt(sum);
121: PetscLogFlops(2*xin->n);
122: } else if (type == NORM_1) {
123: /* Find the local part */
124: VecNorm_Seq(xin,NORM_1,&work);
125: /* Find the global max */
126: MPI_Allreduce(&work,z,1,MPI_DOUBLE,MPI_SUM,xin->comm);
127: } else if (type == NORM_INFINITY) {
128: /* Find the local max */
129: VecNorm_Seq(xin,NORM_INFINITY,&work);
130: /* Find the global max */
131: MPI_Allreduce(&work,z,1,MPI_DOUBLE,MPI_MAX,xin->comm);
132: } else if (type == NORM_1_AND_2) {
133: PetscReal temp[2];
134: VecNorm_Seq(xin,NORM_1,temp);
135: VecNorm_Seq(xin,NORM_2,temp+1);
136: temp[1] = temp[1]*temp[1];
137: MPI_Allreduce(temp,z,2,MPI_DOUBLE,MPI_SUM,xin->comm);
138: z[1] = sqrt(z[1]);
139: }
140: return(0);
141: }
143: /*
144: These two functions are the MPI reduction operation used for max and min with index
145: The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the
146: MPI operator Vec[Max,Min]_Local_Op.
147: */
148: MPI_Op VecMax_Local_Op = 0;
149: MPI_Op VecMin_Local_Op = 0;
151: EXTERN_C_BEGIN
152: void VecMax_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
153: {
154: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
157: if (*datatype != MPI_DOUBLE) {
158: (*PetscErrorPrintf)("Can only handle MPI_DOUBLE data types");
159: MPI_Abort(MPI_COMM_WORLD,1);
160: }
161: if (xin[0] > xout[0]) {
162: xout[0] = xin[0];
163: xout[1] = xin[1];
164: }
165: PetscStackPop;
166: return; /* cannot return a value */
167: }
168: EXTERN_C_END
170: EXTERN_C_BEGIN
171: void VecMin_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
172: {
173: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
176: if (*datatype != MPI_DOUBLE) {
177: (*PetscErrorPrintf)("Can only handle MPI_DOUBLE data types");
178: MPI_Abort(MPI_COMM_WORLD,1);
179: }
180: if (xin[0] < xout[0]) {
181: xout[0] = xin[0];
182: xout[1] = xin[1];
183: }
184: PetscStackPop;
185: return;
186: }
187: EXTERN_C_END
189: int VecMax_MPI(Vec xin,int *idx,PetscReal *z)
190: {
191: int ierr;
192: PetscReal work;
195: /* Find the local max */
196: VecMax_Seq(xin,idx,&work);
198: /* Find the global max */
199: if (!idx) {
200: MPI_Allreduce(&work,z,1,MPI_DOUBLE,MPI_MAX,xin->comm);
201: } else {
202: PetscReal work2[2],z2[2];
203: int rstart;
205: if (!VecMax_Local_Op) {
206: MPI_Op_create(VecMax_Local,1,&VecMax_Local_Op);
207: }
208:
209: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
210: work2[0] = work;
211: work2[1] = *idx + rstart;
212: MPI_Allreduce(work2,z2,2,MPI_DOUBLE,VecMax_Local_Op,xin->comm);
213: *z = z2[0];
214: *idx = (int)z2[1];
216: }
217: return(0);
218: }
220: int VecMin_MPI(Vec xin,int *idx,PetscReal *z)
221: {
222: int ierr;
223: PetscReal work;
226: /* Find the local Min */
227: VecMin_Seq(xin,idx,&work);
229: /* Find the global Min */
230: if (!idx) {
231: MPI_Allreduce(&work,z,1,MPI_DOUBLE,MPI_MIN,xin->comm);
232: } else {
233: PetscReal work2[2],z2[2];
234: int rstart;
236: if (!VecMin_Local_Op) {
237: MPI_Op_create(VecMin_Local,1,&VecMin_Local_Op);
238: }
239:
240: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
241: work2[0] = work;
242: work2[1] = *idx + rstart;
243: MPI_Allreduce(work2,z2,2,MPI_DOUBLE,VecMin_Local_Op,xin->comm);
244: *z = z2[0];
245: *idx = (int)z2[1];
247: }
248: return(0);
249: }