Actual source code: dvec2.c
petsc-3.5.4 2015-05-23
2: /*
3: Defines some vector operation functions that are shared by
4: sequential and parallel vectors.
5: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <petsc-private/kernels/petscaxpy.h>
8: #include <petscthreadcomm.h>
10: #if defined(PETSC_THREADCOMM_ACTIVE)
11: PetscErrorCode VecMDot_kernel(PetscInt thread_id,Vec xin,PetscInt *nvp,Vec *yvec,PetscThreadCommReduction red)
12: {
14: PetscInt *trstarts=xin->map->trstarts;
15: PetscInt start,end,nv=*nvp,i,j;
16: PetscScalar *xx,*yy;
17: PetscScalar sum;
19: start = trstarts[thread_id];
20: end = trstarts[thread_id+1];
21: VecGetArray(xin,&xx);
22: for (i=0; i<nv; i++) {
23: sum = 0.;
24: VecGetArray(yvec[i],&yy);
25: for (j=start; j<end; j++) sum += xx[j]*PetscConj(yy[j]);
26: PetscThreadReductionKernelPost(thread_id,red,&sum);
27: VecRestoreArray(yvec[i],&yy);
28: }
29: VecRestoreArray(xin,&xx);
31: return 0;
32: }
36: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
37: {
38: PetscErrorCode ierr;
39: PetscThreadCommReduction red;
40: PetscInt *nvp,nreds,i,j;
43: PetscThreadCommGetInts(PetscObjectComm((PetscObject)xin),&nvp,NULL,NULL);
44: for (i=0; i<nv; i+=nreds) {
45: nreds = PetscMin(nv-i,PETSC_REDUCTIONS_MAX);
46: PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_SUM,PETSC_SCALAR,nreds,&red);
47: *nvp = nreds;
48: PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMDot_kernel,xin,nvp,(Vec*)yin+i,red);
49: for (j=0; j<nreds; j++) {
50: PetscThreadReductionEnd(red,&z[i+j]);
51: }
52: }
53: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
54: return(0);
55: }
57: #else
58: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
59: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
62: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
63: {
64: PetscErrorCode ierr;
65: PetscInt i,nv_rem,n = xin->map->n;
66: PetscScalar sum0,sum1,sum2,sum3;
67: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
68: Vec *yy;
71: sum0 = 0.0;
72: sum1 = 0.0;
73: sum2 = 0.0;
75: i = nv;
76: nv_rem = nv&0x3;
77: yy = (Vec*)yin;
78: VecGetArrayRead(xin,&x);
80: switch (nv_rem) {
81: case 3:
82: VecGetArrayRead(yy[0],&yy0);
83: VecGetArrayRead(yy[1],&yy1);
84: VecGetArrayRead(yy[2],&yy2);
85: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
86: VecRestoreArrayRead(yy[0],&yy0);
87: VecRestoreArrayRead(yy[1],&yy1);
88: VecRestoreArrayRead(yy[2],&yy2);
89: z[0] = sum0;
90: z[1] = sum1;
91: z[2] = sum2;
92: break;
93: case 2:
94: VecGetArrayRead(yy[0],&yy0);
95: VecGetArrayRead(yy[1],&yy1);
96: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
97: VecRestoreArrayRead(yy[0],&yy0);
98: VecRestoreArrayRead(yy[1],&yy1);
99: z[0] = sum0;
100: z[1] = sum1;
101: break;
102: case 1:
103: VecGetArrayRead(yy[0],&yy0);
104: fortranmdot1_(x,yy0,&n,&sum0);
105: VecRestoreArrayRead(yy[0],&yy0);
106: z[0] = sum0;
107: break;
108: case 0:
109: break;
110: }
111: z += nv_rem;
112: i -= nv_rem;
113: yy += nv_rem;
115: while (i >0) {
116: sum0 = 0.;
117: sum1 = 0.;
118: sum2 = 0.;
119: sum3 = 0.;
120: VecGetArrayRead(yy[0],&yy0);
121: VecGetArrayRead(yy[1],&yy1);
122: VecGetArrayRead(yy[2],&yy2);
123: VecGetArrayRead(yy[3],&yy3);
124: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
125: VecRestoreArrayRead(yy[0],&yy0);
126: VecRestoreArrayRead(yy[1],&yy1);
127: VecRestoreArrayRead(yy[2],&yy2);
128: VecRestoreArrayRead(yy[3],&yy3);
129: yy += 4;
130: z[0] = sum0;
131: z[1] = sum1;
132: z[2] = sum2;
133: z[3] = sum3;
134: z += 4;
135: i -= 4;
136: }
137: VecRestoreArrayRead(xin,&x);
138: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
139: return(0);
140: }
142: #else
145: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
146: {
147: PetscErrorCode ierr;
148: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
149: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
150: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
151: Vec *yy;
154: sum0 = 0.;
155: sum1 = 0.;
156: sum2 = 0.;
158: i = nv;
159: nv_rem = nv&0x3;
160: yy = (Vec*)yin;
161: j = n;
162: VecGetArrayRead(xin,&xbase);
163: x = xbase;
165: switch (nv_rem) {
166: case 3:
167: VecGetArrayRead(yy[0],&yy0);
168: VecGetArrayRead(yy[1],&yy1);
169: VecGetArrayRead(yy[2],&yy2);
170: switch (j_rem=j&0x3) {
171: case 3:
172: x2 = x[2];
173: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
174: sum2 += x2*PetscConj(yy2[2]);
175: case 2:
176: x1 = x[1];
177: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
178: sum2 += x1*PetscConj(yy2[1]);
179: case 1:
180: x0 = x[0];
181: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
182: sum2 += x0*PetscConj(yy2[0]);
183: case 0:
184: x += j_rem;
185: yy0 += j_rem;
186: yy1 += j_rem;
187: yy2 += j_rem;
188: j -= j_rem;
189: break;
190: }
191: while (j>0) {
192: x0 = x[0];
193: x1 = x[1];
194: x2 = x[2];
195: x3 = x[3];
196: x += 4;
198: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
199: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
200: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
201: j -= 4;
202: }
203: z[0] = sum0;
204: z[1] = sum1;
205: z[2] = sum2;
206: VecRestoreArrayRead(yy[0],&yy0);
207: VecRestoreArrayRead(yy[1],&yy1);
208: VecRestoreArrayRead(yy[2],&yy2);
209: break;
210: case 2:
211: VecGetArrayRead(yy[0],&yy0);
212: VecGetArrayRead(yy[1],&yy1);
213: switch (j_rem=j&0x3) {
214: case 3:
215: x2 = x[2];
216: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
217: case 2:
218: x1 = x[1];
219: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
220: case 1:
221: x0 = x[0];
222: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
223: case 0:
224: x += j_rem;
225: yy0 += j_rem;
226: yy1 += j_rem;
227: j -= j_rem;
228: break;
229: }
230: while (j>0) {
231: x0 = x[0];
232: x1 = x[1];
233: x2 = x[2];
234: x3 = x[3];
235: x += 4;
237: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
238: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
239: j -= 4;
240: }
241: z[0] = sum0;
242: z[1] = sum1;
244: VecRestoreArrayRead(yy[0],&yy0);
245: VecRestoreArrayRead(yy[1],&yy1);
246: break;
247: case 1:
248: VecGetArrayRead(yy[0],&yy0);
249: switch (j_rem=j&0x3) {
250: case 3:
251: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
252: case 2:
253: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
254: case 1:
255: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
256: case 0:
257: x += j_rem;
258: yy0 += j_rem;
259: j -= j_rem;
260: break;
261: }
262: while (j>0) {
263: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
264: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
265: yy0 +=4;
266: j -= 4; x+=4;
267: }
268: z[0] = sum0;
270: VecRestoreArrayRead(yy[0],&yy0);
271: break;
272: case 0:
273: break;
274: }
275: z += nv_rem;
276: i -= nv_rem;
277: yy += nv_rem;
279: while (i >0) {
280: sum0 = 0.;
281: sum1 = 0.;
282: sum2 = 0.;
283: sum3 = 0.;
284: VecGetArrayRead(yy[0],&yy0);
285: VecGetArrayRead(yy[1],&yy1);
286: VecGetArrayRead(yy[2],&yy2);
287: VecGetArrayRead(yy[3],&yy3);
289: j = n;
290: x = xbase;
291: switch (j_rem=j&0x3) {
292: case 3:
293: x2 = x[2];
294: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
295: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
296: case 2:
297: x1 = x[1];
298: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
299: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
300: case 1:
301: x0 = x[0];
302: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
303: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
304: case 0:
305: x += j_rem;
306: yy0 += j_rem;
307: yy1 += j_rem;
308: yy2 += j_rem;
309: yy3 += j_rem;
310: j -= j_rem;
311: break;
312: }
313: while (j>0) {
314: x0 = x[0];
315: x1 = x[1];
316: x2 = x[2];
317: x3 = x[3];
318: x += 4;
320: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
321: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
322: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
323: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
324: j -= 4;
325: }
326: z[0] = sum0;
327: z[1] = sum1;
328: z[2] = sum2;
329: z[3] = sum3;
330: z += 4;
331: i -= 4;
332: VecRestoreArrayRead(yy[0],&yy0);
333: VecRestoreArrayRead(yy[1],&yy1);
334: VecRestoreArrayRead(yy[2],&yy2);
335: VecRestoreArrayRead(yy[3],&yy3);
336: yy += 4;
337: }
338: VecRestoreArrayRead(xin,&xbase);
339: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
340: return(0);
341: }
342: #endif
343: #endif
344: /* ----------------------------------------------------------------------------*/
347: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
348: {
349: PetscErrorCode ierr;
350: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
351: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
352: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
353: Vec *yy;
356: sum0 = 0.;
357: sum1 = 0.;
358: sum2 = 0.;
360: i = nv;
361: nv_rem = nv&0x3;
362: yy = (Vec*)yin;
363: j = n;
364: VecGetArrayRead(xin,&xbase);
365: x = xbase;
367: switch (nv_rem) {
368: case 3:
369: VecGetArrayRead(yy[0],&yy0);
370: VecGetArrayRead(yy[1],&yy1);
371: VecGetArrayRead(yy[2],&yy2);
372: switch (j_rem=j&0x3) {
373: case 3:
374: x2 = x[2];
375: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
376: sum2 += x2*yy2[2];
377: case 2:
378: x1 = x[1];
379: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
380: sum2 += x1*yy2[1];
381: case 1:
382: x0 = x[0];
383: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
384: sum2 += x0*yy2[0];
385: case 0:
386: x += j_rem;
387: yy0 += j_rem;
388: yy1 += j_rem;
389: yy2 += j_rem;
390: j -= j_rem;
391: break;
392: }
393: while (j>0) {
394: x0 = x[0];
395: x1 = x[1];
396: x2 = x[2];
397: x3 = x[3];
398: x += 4;
400: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
401: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
402: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
403: j -= 4;
404: }
405: z[0] = sum0;
406: z[1] = sum1;
407: z[2] = sum2;
408: VecRestoreArrayRead(yy[0],&yy0);
409: VecRestoreArrayRead(yy[1],&yy1);
410: VecRestoreArrayRead(yy[2],&yy2);
411: break;
412: case 2:
413: VecGetArrayRead(yy[0],&yy0);
414: VecGetArrayRead(yy[1],&yy1);
415: switch (j_rem=j&0x3) {
416: case 3:
417: x2 = x[2];
418: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
419: case 2:
420: x1 = x[1];
421: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
422: case 1:
423: x0 = x[0];
424: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
425: case 0:
426: x += j_rem;
427: yy0 += j_rem;
428: yy1 += j_rem;
429: j -= j_rem;
430: break;
431: }
432: while (j>0) {
433: x0 = x[0];
434: x1 = x[1];
435: x2 = x[2];
436: x3 = x[3];
437: x += 4;
439: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
440: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
441: j -= 4;
442: }
443: z[0] = sum0;
444: z[1] = sum1;
446: VecRestoreArrayRead(yy[0],&yy0);
447: VecRestoreArrayRead(yy[1],&yy1);
448: break;
449: case 1:
450: VecGetArrayRead(yy[0],&yy0);
451: switch (j_rem=j&0x3) {
452: case 3:
453: x2 = x[2]; sum0 += x2*yy0[2];
454: case 2:
455: x1 = x[1]; sum0 += x1*yy0[1];
456: case 1:
457: x0 = x[0]; sum0 += x0*yy0[0];
458: case 0:
459: x += j_rem;
460: yy0 += j_rem;
461: j -= j_rem;
462: break;
463: }
464: while (j>0) {
465: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
466: j -= 4; x+=4;
467: }
468: z[0] = sum0;
470: VecRestoreArrayRead(yy[0],&yy0);
471: break;
472: case 0:
473: break;
474: }
475: z += nv_rem;
476: i -= nv_rem;
477: yy += nv_rem;
479: while (i >0) {
480: sum0 = 0.;
481: sum1 = 0.;
482: sum2 = 0.;
483: sum3 = 0.;
484: VecGetArrayRead(yy[0],&yy0);
485: VecGetArrayRead(yy[1],&yy1);
486: VecGetArrayRead(yy[2],&yy2);
487: VecGetArrayRead(yy[3],&yy3);
488: x = xbase;
490: j = n;
491: switch (j_rem=j&0x3) {
492: case 3:
493: x2 = x[2];
494: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
495: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
496: case 2:
497: x1 = x[1];
498: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
499: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
500: case 1:
501: x0 = x[0];
502: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
503: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
504: case 0:
505: x += j_rem;
506: yy0 += j_rem;
507: yy1 += j_rem;
508: yy2 += j_rem;
509: yy3 += j_rem;
510: j -= j_rem;
511: break;
512: }
513: while (j>0) {
514: x0 = x[0];
515: x1 = x[1];
516: x2 = x[2];
517: x3 = x[3];
518: x += 4;
520: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
521: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
522: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
523: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
524: j -= 4;
525: }
526: z[0] = sum0;
527: z[1] = sum1;
528: z[2] = sum2;
529: z[3] = sum3;
530: z += 4;
531: i -= 4;
532: VecRestoreArrayRead(yy[0],&yy0);
533: VecRestoreArrayRead(yy[1],&yy1);
534: VecRestoreArrayRead(yy[2],&yy2);
535: VecRestoreArrayRead(yy[3],&yy3);
536: yy += 4;
537: }
538: VecRestoreArrayRead(xin,&xbase);
539: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
540: return(0);
541: }
543: #if defined(PETSC_THREADCOMM_ACTIVE)
544: PetscErrorCode VecMax_kernel(PetscInt thread_id,Vec xin,PetscThreadCommReduction red)
545: {
546: PetscErrorCode ierr;
547: PetscInt *trstarts=xin->map->trstarts;
548: PetscInt start,end,i;
549: const PetscScalar *xx;
550: PetscReal lred[2],tmp;
552: start = trstarts[thread_id];
553: end = trstarts[thread_id+1];
554: VecGetArrayRead(xin,&xx);
555: lred[0] = xx[start]; lred[1] = start;
556: for (i=start+1; i<end; i++) {
557: if ((tmp = PetscRealPart(xx[i])) > lred[0]) { lred[0] = tmp; lred[1] = i;}
558: }
559: VecRestoreArrayRead(xin,&xx);
560: PetscThreadReductionKernelPost(thread_id,red,lred);
561: return 0;
562: }
566: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
567: {
568: PetscErrorCode ierr;
569: PetscInt n=xin->map->n;
570: PetscThreadCommReduction red;
571: PetscReal out[2];
574: if (!n) {
575: *z = PETSC_MIN_REAL;
576: *idx = -1;
577: } else {
578: PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_MAXLOC,PETSC_REAL,1,&red);
579: PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMax_kernel,xin,red);
580: PetscThreadReductionEnd(red,out);
581: *z = out[0];
582: if (idx) *idx = (PetscInt)out[1];
583: }
584: return(0);
585: }
586: #else
589: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
590: {
591: PetscInt i,j=0,n = xin->map->n;
592: PetscReal max,tmp;
593: const PetscScalar *xx;
594: PetscErrorCode ierr;
597: VecGetArrayRead(xin,&xx);
598: if (!n) {
599: max = PETSC_MIN_REAL;
600: j = -1;
601: } else {
602: max = PetscRealPart(*xx++); j = 0;
603: for (i=1; i<n; i++) {
604: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
605: }
606: }
607: *z = max;
608: if (idx) *idx = j;
609: VecRestoreArrayRead(xin,&xx);
610: return(0);
611: }
612: #endif
614: #if defined(PETSC_THREADCOMM_ACTIVE)
615: PetscErrorCode VecMin_kernel(PetscInt thread_id,Vec xin,PetscThreadCommReduction red)
616: {
617: PetscErrorCode ierr;
618: PetscInt *trstarts=xin->map->trstarts;
619: PetscInt start,end,i;
620: const PetscScalar *xx;
621: PetscReal lred[2],tmp;
623: start = trstarts[thread_id];
624: end = trstarts[thread_id+1];
625: VecGetArrayRead(xin,&xx);
626: lred[0] = xx[start]; lred[1] = start;
627: for (i=start+1;i<end;i++) {
628: if ((tmp = PetscRealPart(xx[i])) < lred[0]) { lred[0] = tmp; lred[1] = i;}
629: }
630: VecRestoreArrayRead(xin,&xx);
631: PetscThreadReductionKernelPost(thread_id,red,lred);
632: return 0;
633: }
637: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
638: {
639: PetscErrorCode ierr;
640: PetscInt n=xin->map->n;
641: PetscThreadCommReduction red;
642: PetscReal out[2];
645: if (!n) {
646: *z = PETSC_MAX_REAL;
647: *idx = -1;
648: } else {
649: PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_MINLOC,PETSC_REAL,1,&red);
650: PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMin_kernel,xin,red);
651: PetscThreadReductionEnd(red,out);
652: *z = out[0];
653: if (idx) *idx = (PetscInt)out[1];
654: }
655: return(0);
656: }
657: #else
660: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
661: {
662: PetscInt i,j=0,n = xin->map->n;
663: PetscReal min,tmp;
664: const PetscScalar *xx;
665: PetscErrorCode ierr;
668: VecGetArrayRead(xin,&xx);
669: if (!n) {
670: min = PETSC_MAX_REAL;
671: j = -1;
672: } else {
673: min = PetscRealPart(*xx++); j = 0;
674: for (i=1; i<n; i++) {
675: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
676: }
677: }
678: *z = min;
679: if (idx) *idx = j;
680: VecGetArrayRead(xin,&xx);
681: return(0);
682: }
683: #endif
685: #if defined(PETSC_THREADCOMM_ACTIVE)
686: PetscErrorCode VecSet_kernel(PetscInt thread_id,Vec xin,PetscScalar *alpha_p)
687: {
689: PetscScalar *xx;
690: PetscInt i,start,end;
691: PetscInt *trstarts=xin->map->trstarts;
692: PetscScalar alpha = *alpha_p;
694: start = trstarts[thread_id];
695: end = trstarts[thread_id+1];
696: VecGetArray(xin,&xx);
697: if (alpha == (PetscScalar)0.0) {
698: PetscMemzero(xx+start,(end-start)*sizeof(PetscScalar));
699: } else {
700: for (i=start; i<end; i++) xx[i] = alpha;
701: }
702: VecRestoreArray(xin,&xx);
703: return 0;
704: }
708: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
709: {
711: PetscScalar *scalar;
714: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)xin),&scalar,NULL,NULL);
715: *scalar = alpha;
716: PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecSet_kernel,xin,scalar);
717: return(0);
718: }
719: #else
722: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
723: {
724: PetscInt i,n = xin->map->n;
725: PetscScalar *xx;
729: VecGetArray(xin,&xx);
730: if (alpha == (PetscScalar)0.0) {
731: PetscMemzero(xx,n*sizeof(PetscScalar));
732: } else {
733: for (i=0; i<n; i++) xx[i] = alpha;
734: }
735: VecRestoreArray(xin,&xx);
736: return(0);
737: }
738: #endif
740: #if defined(PETSC_THREADCOMM_ACTIVE)
741: PetscErrorCode VecMAXPY_kernel(PetscInt thread_id,Vec xin,PetscInt *nv_p,const PetscScalar *alpha,Vec *y)
742: {
743: PetscErrorCode ierr;
744: PetscInt *trstarts=xin->map->trstarts,j,j_rem,nv=*nv_p;
745: PetscInt start,end,n;
746: const PetscScalar *yy0,*yy1,*yy2,*yy3;
747: PetscScalar *xx,*x,alpha0,alpha1,alpha2,alpha3;
748: const PetscScalar *y0,*y1,*y2,*y3;
751: start = trstarts[thread_id];
752: end = trstarts[thread_id+1];
753: n = end-start;
754: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
755: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
756: #endif
758: VecGetArray(xin,&xx);
759: x = xx+start;
760: switch (j_rem=nv&0x3) {
761: case 3:
762: VecGetArrayRead(y[0],&yy0);
763: VecGetArrayRead(y[1],&yy1);
764: VecGetArrayRead(y[2],&yy2);
765: alpha0 = alpha[0];
766: alpha1 = alpha[1];
767: alpha2 = alpha[2];
768: alpha += 3;
769: y0 = yy0 + start; y1 = yy1+start; y2 = yy2+start;
770: PetscKernelAXPY3(x,alpha0,alpha1,alpha2,y0,y1,y2,n);
771: VecRestoreArrayRead(y[0],&yy0);
772: VecRestoreArrayRead(y[1],&yy1);
773: VecRestoreArrayRead(y[2],&yy2);
774: y += 3;
775: break;
776: case 2:
777: VecGetArrayRead(y[0],&yy0);
778: VecGetArrayRead(y[1],&yy1);
779: alpha0 = alpha[0];
780: alpha1 = alpha[1];
781: alpha +=2;
782: y0 = yy0+start; y1=yy1+start;
783: PetscKernelAXPY2(x,alpha0,alpha1,y0,y1,n);
784: VecRestoreArrayRead(y[0],&yy0);
785: VecRestoreArrayRead(y[1],&yy1);
786: y +=2;
787: break;
788: case 1:
789: VecGetArrayRead(y[0],&yy0);
790: alpha0 = *alpha++;
791: y0 = yy0 + start;
792: PetscKernelAXPY(x,alpha0,y0,n);
793: VecRestoreArrayRead(y[0],&yy0);
794: y +=1;
795: break;
796: }
797: for (j=j_rem; j<nv; j+=4) {
798: VecGetArrayRead(y[0],&yy0);
799: VecGetArrayRead(y[1],&yy1);
800: VecGetArrayRead(y[2],&yy2);
801: VecGetArrayRead(y[3],&yy3);
802: alpha0 = alpha[0];
803: alpha1 = alpha[1];
804: alpha2 = alpha[2];
805: alpha3 = alpha[3];
806: alpha += 4;
808: y0 = yy0+start; y1=yy1+start; y2=yy2+start; y3=yy3+start;
809: PetscKernelAXPY4(x,alpha0,alpha1,alpha2,alpha3,y0,y1,y2,y3,n);
810: VecRestoreArrayRead(y[0],&yy0);
811: VecRestoreArrayRead(y[1],&yy1);
812: VecRestoreArrayRead(y[2],&yy2);
813: VecRestoreArrayRead(y[3],&yy3);
814: y += 4;
815: }
816: VecRestoreArray(xin,&xx);
817: return 0;
818: }
822: PetscErrorCode VecMAXPY_Seq(Vec xin,PetscInt nv,const PetscScalar *alpha,Vec *y)
823: {
825: PetscInt *int1;
828: PetscLogFlops(nv*2.0*xin->map->n);
829: PetscThreadCommGetInts(PetscObjectComm((PetscObject)xin),&int1,NULL,NULL);
830: *int1 = nv;
831: PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMAXPY_kernel,xin,int1,(void*)alpha,y);
832: return(0);
833: }
834: #else
837: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
838: {
839: PetscErrorCode ierr;
840: PetscInt n = xin->map->n,j,j_rem;
841: const PetscScalar *yy0,*yy1,*yy2,*yy3;
842: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
844: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
845: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
846: #endif
849: PetscLogFlops(nv*2.0*n);
850: VecGetArray(xin,&xx);
851: switch (j_rem=nv&0x3) {
852: case 3:
853: VecGetArrayRead(y[0],&yy0);
854: VecGetArrayRead(y[1],&yy1);
855: VecGetArrayRead(y[2],&yy2);
856: alpha0 = alpha[0];
857: alpha1 = alpha[1];
858: alpha2 = alpha[2];
859: alpha += 3;
860: PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
861: VecRestoreArrayRead(y[0],&yy0);
862: VecRestoreArrayRead(y[1],&yy1);
863: VecRestoreArrayRead(y[2],&yy2);
864: y += 3;
865: break;
866: case 2:
867: VecGetArrayRead(y[0],&yy0);
868: VecGetArrayRead(y[1],&yy1);
869: alpha0 = alpha[0];
870: alpha1 = alpha[1];
871: alpha +=2;
872: PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
873: VecRestoreArrayRead(y[0],&yy0);
874: VecRestoreArrayRead(y[1],&yy1);
875: y +=2;
876: break;
877: case 1:
878: VecGetArrayRead(y[0],&yy0);
879: alpha0 = *alpha++;
880: PetscKernelAXPY(xx,alpha0,yy0,n);
881: VecRestoreArrayRead(y[0],&yy0);
882: y +=1;
883: break;
884: }
885: for (j=j_rem; j<nv; j+=4) {
886: VecGetArrayRead(y[0],&yy0);
887: VecGetArrayRead(y[1],&yy1);
888: VecGetArrayRead(y[2],&yy2);
889: VecGetArrayRead(y[3],&yy3);
890: alpha0 = alpha[0];
891: alpha1 = alpha[1];
892: alpha2 = alpha[2];
893: alpha3 = alpha[3];
894: alpha += 4;
896: PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
897: VecRestoreArrayRead(y[0],&yy0);
898: VecRestoreArrayRead(y[1],&yy1);
899: VecRestoreArrayRead(y[2],&yy2);
900: VecRestoreArrayRead(y[3],&yy3);
901: y += 4;
902: }
903: VecRestoreArray(xin,&xx);
904: return(0);
905: }
906: #endif
908: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
909: #if defined(PETSC_THREADCOMM_ACTIVE)
910: PetscErrorCode VecAYPX_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
911: {
912: PetscErrorCode ierr;
913: PetscScalar *yy;
914: const PetscScalar *xx;
915: PetscInt *trstarts=yin->map->trstarts,i;
916: PetscScalar alpha = *alpha_p;
918: VecGetArrayRead(xin,&xx);
919: VecGetArray(yin,&yy);
921: if (alpha == (PetscScalar)-1.0) {
922: for (i=trstarts[thread_id]; i < trstarts[thread_id+1]; i++) {
923: yy[i] = xx[i] - yy[i];
924: }
925: } else {
926: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
927: PetscInt start=trstarts[thread_id],end=trstarts[thread_id+1],n;
928: PetscScalar oalpha = alpha;
929: n = end - start;
930: fortranaypx_(&n,&oalpha,xx+start,yy+start);
931: }
932: #else
933: for (i=trstarts[thread_id]; i < trstarts[thread_id+1];i++) {
934: yy[i] = xx[i] + alpha*yy[i];
935: }
936: }
937: #endif
938: VecRestoreArrayRead(xin,&xx);
939: VecRestoreArray(yin,&yy);
940: return 0;
941: }
944: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
945: {
946: PetscErrorCode ierr;
947: PetscInt n=yin->map->n;
950: if (alpha == (PetscScalar)0.0) {
951: VecCopy(xin,yin);
952: } else if (alpha == (PetscScalar)1.0) {
953: VecAXPY_Seq(yin,alpha,xin);
954: } else {
955: PetscScalar *scal1;
956: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)yin),&scal1,NULL,NULL);
957: *scal1 = alpha;
958: PetscThreadCommRunKernel3(PetscObjectComm((PetscObject)yin),(PetscThreadKernel)VecAYPX_kernel,yin,scal1,xin);
959: if (alpha == (PetscScalar)-1.0) {
960: PetscLogFlops(1.0*n);
961: } else {
962: PetscLogFlops(2.0*n);
963: }
964: }
965: return(0);
966: }
967: #else
970: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
971: {
972: PetscErrorCode ierr;
973: PetscInt n = yin->map->n;
974: PetscScalar *yy;
975: const PetscScalar *xx;
978: if (alpha == (PetscScalar)0.0) {
979: VecCopy(xin,yin);
980: } else if (alpha == (PetscScalar)1.0) {
981: VecAXPY_Seq(yin,alpha,xin);
982: } else if (alpha == (PetscScalar)-1.0) {
983: PetscInt i;
984: VecGetArrayRead(xin,&xx);
985: VecGetArray(yin,&yy);
987: for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];
989: VecRestoreArrayRead(xin,&xx);
990: VecRestoreArray(yin,&yy);
991: PetscLogFlops(1.0*n);
992: } else {
993: VecGetArrayRead(xin,&xx);
994: VecGetArray(yin,&yy);
995: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
996: {
997: PetscScalar oalpha = alpha;
998: fortranaypx_(&n,&oalpha,xx,yy);
999: }
1000: #else
1001: {
1002: PetscInt i;
1004: for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
1005: }
1006: #endif
1007: VecRestoreArrayRead(xin,&xx);
1008: VecRestoreArray(yin,&yy);
1009: PetscLogFlops(2.0*n);
1010: }
1011: return(0);
1012: }
1013: #endif
1015: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
1016: /*
1017: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
1018: to be slower than a regular C loop. Hence,we do not include it.
1019: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
1020: */
1021: #if defined(PETSC_THREADCOMM_ACTIVE)
1022: PetscErrorCode VecWAXPY_kernel(PetscInt thread_id,Vec win,PetscScalar *alpha_p,Vec xin,Vec yin)
1023: {
1024: PetscErrorCode ierr;
1025: PetscScalar alpha=*alpha_p;
1026: PetscInt *trstarts=win->map->trstarts,start,end,i;
1027: const PetscScalar *xx,*yy;
1028: PetscScalar *ww;
1030: start = trstarts[thread_id];
1031: end = trstarts[thread_id+1];
1033: VecGetArrayRead(xin,&xx);
1034: VecGetArrayRead(yin,&yy);
1035: VecGetArray(win,&ww);
1036: if (alpha == (PetscScalar)1.0) {
1037: for (i=start; i < end; i++) ww[i] = yy[i] + xx[i];
1038: } else if (alpha == (PetscScalar)-1.0) {
1039: for (i=start; i < end; i++) ww[i] = yy[i] - xx[i];
1040: } else if (alpha == (PetscScalar)0.0) {
1041: PetscMemcpy(ww+start,yy+start,(end-start)*sizeof(PetscScalar));
1042: } else {
1043: PetscScalar oalpha = alpha;
1044: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
1045: fortranwaxpy_(&n,&oalpha,xx+start,yy+start,ww+start);
1046: #else
1047: for (i=start; i < end; i++) ww[i] = yy[i] + oalpha*xx[i];
1048: #endif
1049: }
1050: VecRestoreArrayRead(xin,&xx);
1051: VecRestoreArrayRead(yin,&yy);
1052: VecRestoreArray(win,&ww);
1053: return 0;
1054: }
1058: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
1059: {
1060: PetscErrorCode ierr;
1061: PetscInt n = win->map->n;
1062: PetscScalar *scal1;
1065: PetscThreadCommGetScalars(PetscObjectComm((PetscObject)win),&scal1,NULL,NULL);
1066: *scal1 = alpha;
1067: PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)win),(PetscThreadKernel)VecWAXPY_kernel,win,scal1,xin,yin);
1068: if (alpha == (PetscScalar)1.0) {
1069: PetscLogFlops(n);
1070: } else if (alpha == (PetscScalar)-1.0) {
1071: PetscLogFlops(n);
1072: } else {
1073: PetscLogFlops(2.0*n);
1074: }
1075: return(0);
1076: }
1077: #else
1080: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
1081: {
1082: PetscErrorCode ierr;
1083: PetscInt i,n = win->map->n;
1084: PetscScalar *ww;
1085: const PetscScalar *yy,*xx;
1088: VecGetArrayRead(xin,&xx);
1089: VecGetArrayRead(yin,&yy);
1090: VecGetArray(win,&ww);
1091: if (alpha == (PetscScalar)1.0) {
1092: PetscLogFlops(n);
1093: /* could call BLAS axpy after call to memcopy, but may be slower */
1094: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
1095: } else if (alpha == (PetscScalar)-1.0) {
1096: PetscLogFlops(n);
1097: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
1098: } else if (alpha == (PetscScalar)0.0) {
1099: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
1100: } else {
1101: PetscScalar oalpha = alpha;
1102: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
1103: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
1104: #else
1105: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
1106: #endif
1107: PetscLogFlops(2.0*n);
1108: }
1109: VecRestoreArrayRead(xin,&xx);
1110: VecRestoreArrayRead(yin,&yy);
1111: VecRestoreArray(win,&ww);
1112: return(0);
1113: }
1114: #endif
1118: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
1119: {
1120: PetscErrorCode ierr;
1121: PetscInt n = xin->map->n,i;
1122: const PetscScalar *xx,*yy;
1123: PetscReal m = 0.0;
1126: VecGetArrayRead(xin,&xx);
1127: VecGetArrayRead(yin,&yy);
1128: for (i = 0; i < n; i++) {
1129: if (yy[i] != (PetscScalar)0.0) {
1130: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
1131: } else {
1132: m = PetscMax(PetscAbsScalar(xx[i]), m);
1133: }
1134: }
1135: VecRestoreArrayRead(xin,&xx);
1136: VecRestoreArrayRead(yin,&yy);
1137: MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
1138: PetscLogFlops(n);
1139: return(0);
1140: }
1144: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
1145: {
1146: Vec_Seq *v = (Vec_Seq*)vin->data;
1149: if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
1150: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
1151: v->array = (PetscScalar*)a;
1152: return(0);
1153: }
1157: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
1158: {
1159: Vec_Seq *v = (Vec_Seq*)vin->data;
1163: PetscFree(v->array_allocated);
1164: v->array_allocated = v->array = (PetscScalar*)a;
1165: return(0);
1166: }