Actual source code: dvec2.c

petsc-3.5.4 2015-05-23
Report Typos and Errors
  2: /*
  3:    Defines some vector operation functions that are shared by
  4:   sequential and parallel vectors.
  5: */
  6: #include <../src/vec/vec/impls/dvecimpl.h>
  7: #include <petsc-private/kernels/petscaxpy.h>
  8: #include <petscthreadcomm.h>

 10: #if defined(PETSC_THREADCOMM_ACTIVE)
 11: PetscErrorCode VecMDot_kernel(PetscInt thread_id,Vec xin,PetscInt *nvp,Vec *yvec,PetscThreadCommReduction red)
 12: {
 14:   PetscInt       *trstarts=xin->map->trstarts;
 15:   PetscInt       start,end,nv=*nvp,i,j;
 16:   PetscScalar    *xx,*yy;
 17:   PetscScalar    sum;

 19:   start = trstarts[thread_id];
 20:   end   = trstarts[thread_id+1];
 21:   VecGetArray(xin,&xx);
 22:   for (i=0; i<nv; i++) {
 23:     sum  = 0.;
 24:     VecGetArray(yvec[i],&yy);
 25:     for (j=start; j<end; j++) sum += xx[j]*PetscConj(yy[j]);
 26:     PetscThreadReductionKernelPost(thread_id,red,&sum);
 27:     VecRestoreArray(yvec[i],&yy);
 28:   }
 29:   VecRestoreArray(xin,&xx);

 31:   return 0;
 32: }

 36: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 37: {
 38:   PetscErrorCode           ierr;
 39:   PetscThreadCommReduction red;
 40:   PetscInt                 *nvp,nreds,i,j;

 43:   PetscThreadCommGetInts(PetscObjectComm((PetscObject)xin),&nvp,NULL,NULL);
 44:   for (i=0; i<nv; i+=nreds) {
 45:     nreds = PetscMin(nv-i,PETSC_REDUCTIONS_MAX);
 46:     PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_SUM,PETSC_SCALAR,nreds,&red);
 47:     *nvp  = nreds;
 48:     PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMDot_kernel,xin,nvp,(Vec*)yin+i,red);
 49:     for (j=0; j<nreds; j++) {
 50:       PetscThreadReductionEnd(red,&z[i+j]);
 51:     }
 52:   }
 53:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 54:   return(0);
 55: }

 57: #else
 58: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 59: #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 62: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 63: {
 64:   PetscErrorCode    ierr;
 65:   PetscInt          i,nv_rem,n = xin->map->n;
 66:   PetscScalar       sum0,sum1,sum2,sum3;
 67:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 68:   Vec               *yy;

 71:   sum0 = 0.0;
 72:   sum1 = 0.0;
 73:   sum2 = 0.0;

 75:   i      = nv;
 76:   nv_rem = nv&0x3;
 77:   yy     = (Vec*)yin;
 78:   VecGetArrayRead(xin,&x);

 80:   switch (nv_rem) {
 81:   case 3:
 82:     VecGetArrayRead(yy[0],&yy0);
 83:     VecGetArrayRead(yy[1],&yy1);
 84:     VecGetArrayRead(yy[2],&yy2);
 85:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 86:     VecRestoreArrayRead(yy[0],&yy0);
 87:     VecRestoreArrayRead(yy[1],&yy1);
 88:     VecRestoreArrayRead(yy[2],&yy2);
 89:     z[0] = sum0;
 90:     z[1] = sum1;
 91:     z[2] = sum2;
 92:     break;
 93:   case 2:
 94:     VecGetArrayRead(yy[0],&yy0);
 95:     VecGetArrayRead(yy[1],&yy1);
 96:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 97:     VecRestoreArrayRead(yy[0],&yy0);
 98:     VecRestoreArrayRead(yy[1],&yy1);
 99:     z[0] = sum0;
100:     z[1] = sum1;
101:     break;
102:   case 1:
103:     VecGetArrayRead(yy[0],&yy0);
104:     fortranmdot1_(x,yy0,&n,&sum0);
105:     VecRestoreArrayRead(yy[0],&yy0);
106:     z[0] = sum0;
107:     break;
108:   case 0:
109:     break;
110:   }
111:   z  += nv_rem;
112:   i  -= nv_rem;
113:   yy += nv_rem;

115:   while (i >0) {
116:     sum0 = 0.;
117:     sum1 = 0.;
118:     sum2 = 0.;
119:     sum3 = 0.;
120:     VecGetArrayRead(yy[0],&yy0);
121:     VecGetArrayRead(yy[1],&yy1);
122:     VecGetArrayRead(yy[2],&yy2);
123:     VecGetArrayRead(yy[3],&yy3);
124:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
125:     VecRestoreArrayRead(yy[0],&yy0);
126:     VecRestoreArrayRead(yy[1],&yy1);
127:     VecRestoreArrayRead(yy[2],&yy2);
128:     VecRestoreArrayRead(yy[3],&yy3);
129:     yy  += 4;
130:     z[0] = sum0;
131:     z[1] = sum1;
132:     z[2] = sum2;
133:     z[3] = sum3;
134:     z   += 4;
135:     i   -= 4;
136:   }
137:   VecRestoreArrayRead(xin,&x);
138:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
139:   return(0);
140: }

142: #else
145: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
146: {
147:   PetscErrorCode    ierr;
148:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
149:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
150:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
151:   Vec               *yy;

154:   sum0 = 0.;
155:   sum1 = 0.;
156:   sum2 = 0.;

158:   i      = nv;
159:   nv_rem = nv&0x3;
160:   yy     = (Vec*)yin;
161:   j      = n;
162:   VecGetArrayRead(xin,&xbase);
163:   x      = xbase;

165:   switch (nv_rem) {
166:   case 3:
167:     VecGetArrayRead(yy[0],&yy0);
168:     VecGetArrayRead(yy[1],&yy1);
169:     VecGetArrayRead(yy[2],&yy2);
170:     switch (j_rem=j&0x3) {
171:     case 3:
172:       x2    = x[2];
173:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
174:       sum2 += x2*PetscConj(yy2[2]);
175:     case 2:
176:       x1    = x[1];
177:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
178:       sum2 += x1*PetscConj(yy2[1]);
179:     case 1:
180:       x0    = x[0];
181:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
182:       sum2 += x0*PetscConj(yy2[0]);
183:     case 0:
184:       x   += j_rem;
185:       yy0 += j_rem;
186:       yy1 += j_rem;
187:       yy2 += j_rem;
188:       j   -= j_rem;
189:       break;
190:     }
191:     while (j>0) {
192:       x0 = x[0];
193:       x1 = x[1];
194:       x2 = x[2];
195:       x3 = x[3];
196:       x += 4;

198:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
199:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
200:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
201:       j    -= 4;
202:     }
203:     z[0] = sum0;
204:     z[1] = sum1;
205:     z[2] = sum2;
206:     VecRestoreArrayRead(yy[0],&yy0);
207:     VecRestoreArrayRead(yy[1],&yy1);
208:     VecRestoreArrayRead(yy[2],&yy2);
209:     break;
210:   case 2:
211:     VecGetArrayRead(yy[0],&yy0);
212:     VecGetArrayRead(yy[1],&yy1);
213:     switch (j_rem=j&0x3) {
214:     case 3:
215:       x2    = x[2];
216:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
217:     case 2:
218:       x1    = x[1];
219:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
220:     case 1:
221:       x0    = x[0];
222:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
223:     case 0:
224:       x   += j_rem;
225:       yy0 += j_rem;
226:       yy1 += j_rem;
227:       j   -= j_rem;
228:       break;
229:     }
230:     while (j>0) {
231:       x0 = x[0];
232:       x1 = x[1];
233:       x2 = x[2];
234:       x3 = x[3];
235:       x += 4;

237:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
238:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
239:       j    -= 4;
240:     }
241:     z[0] = sum0;
242:     z[1] = sum1;

244:     VecRestoreArrayRead(yy[0],&yy0);
245:     VecRestoreArrayRead(yy[1],&yy1);
246:     break;
247:   case 1:
248:     VecGetArrayRead(yy[0],&yy0);
249:     switch (j_rem=j&0x3) {
250:     case 3:
251:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
252:     case 2:
253:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
254:     case 1:
255:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
256:     case 0:
257:       x   += j_rem;
258:       yy0 += j_rem;
259:       j   -= j_rem;
260:       break;
261:     }
262:     while (j>0) {
263:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
264:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
265:       yy0  +=4;
266:       j    -= 4; x+=4;
267:     }
268:     z[0] = sum0;

270:     VecRestoreArrayRead(yy[0],&yy0);
271:     break;
272:   case 0:
273:     break;
274:   }
275:   z  += nv_rem;
276:   i  -= nv_rem;
277:   yy += nv_rem;

279:   while (i >0) {
280:     sum0 = 0.;
281:     sum1 = 0.;
282:     sum2 = 0.;
283:     sum3 = 0.;
284:     VecGetArrayRead(yy[0],&yy0);
285:     VecGetArrayRead(yy[1],&yy1);
286:     VecGetArrayRead(yy[2],&yy2);
287:     VecGetArrayRead(yy[3],&yy3);

289:     j = n;
290:     x = xbase;
291:     switch (j_rem=j&0x3) {
292:     case 3:
293:       x2    = x[2];
294:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
295:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
296:     case 2:
297:       x1    = x[1];
298:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
299:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
300:     case 1:
301:       x0    = x[0];
302:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
303:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
304:     case 0:
305:       x   += j_rem;
306:       yy0 += j_rem;
307:       yy1 += j_rem;
308:       yy2 += j_rem;
309:       yy3 += j_rem;
310:       j   -= j_rem;
311:       break;
312:     }
313:     while (j>0) {
314:       x0 = x[0];
315:       x1 = x[1];
316:       x2 = x[2];
317:       x3 = x[3];
318:       x += 4;

320:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
321:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
322:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
323:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
324:       j    -= 4;
325:     }
326:     z[0] = sum0;
327:     z[1] = sum1;
328:     z[2] = sum2;
329:     z[3] = sum3;
330:     z   += 4;
331:     i   -= 4;
332:     VecRestoreArrayRead(yy[0],&yy0);
333:     VecRestoreArrayRead(yy[1],&yy1);
334:     VecRestoreArrayRead(yy[2],&yy2);
335:     VecRestoreArrayRead(yy[3],&yy3);
336:     yy  += 4;
337:   }
338:   VecRestoreArrayRead(xin,&xbase);
339:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
340:   return(0);
341: }
342: #endif
343: #endif
344: /* ----------------------------------------------------------------------------*/
347: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
348: {
349:   PetscErrorCode    ierr;
350:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
351:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
352:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
353:   Vec               *yy;

356:   sum0 = 0.;
357:   sum1 = 0.;
358:   sum2 = 0.;

360:   i      = nv;
361:   nv_rem = nv&0x3;
362:   yy     = (Vec*)yin;
363:   j      = n;
364:   VecGetArrayRead(xin,&xbase);
365:   x      = xbase;

367:   switch (nv_rem) {
368:   case 3:
369:     VecGetArrayRead(yy[0],&yy0);
370:     VecGetArrayRead(yy[1],&yy1);
371:     VecGetArrayRead(yy[2],&yy2);
372:     switch (j_rem=j&0x3) {
373:     case 3:
374:       x2    = x[2];
375:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
376:       sum2 += x2*yy2[2];
377:     case 2:
378:       x1    = x[1];
379:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
380:       sum2 += x1*yy2[1];
381:     case 1:
382:       x0    = x[0];
383:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
384:       sum2 += x0*yy2[0];
385:     case 0:
386:       x   += j_rem;
387:       yy0 += j_rem;
388:       yy1 += j_rem;
389:       yy2 += j_rem;
390:       j   -= j_rem;
391:       break;
392:     }
393:     while (j>0) {
394:       x0 = x[0];
395:       x1 = x[1];
396:       x2 = x[2];
397:       x3 = x[3];
398:       x += 4;

400:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
401:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
402:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
403:       j    -= 4;
404:     }
405:     z[0] = sum0;
406:     z[1] = sum1;
407:     z[2] = sum2;
408:     VecRestoreArrayRead(yy[0],&yy0);
409:     VecRestoreArrayRead(yy[1],&yy1);
410:     VecRestoreArrayRead(yy[2],&yy2);
411:     break;
412:   case 2:
413:     VecGetArrayRead(yy[0],&yy0);
414:     VecGetArrayRead(yy[1],&yy1);
415:     switch (j_rem=j&0x3) {
416:     case 3:
417:       x2    = x[2];
418:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
419:     case 2:
420:       x1    = x[1];
421:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
422:     case 1:
423:       x0    = x[0];
424:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
425:     case 0:
426:       x   += j_rem;
427:       yy0 += j_rem;
428:       yy1 += j_rem;
429:       j   -= j_rem;
430:       break;
431:     }
432:     while (j>0) {
433:       x0 = x[0];
434:       x1 = x[1];
435:       x2 = x[2];
436:       x3 = x[3];
437:       x += 4;

439:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
440:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
441:       j    -= 4;
442:     }
443:     z[0] = sum0;
444:     z[1] = sum1;

446:     VecRestoreArrayRead(yy[0],&yy0);
447:     VecRestoreArrayRead(yy[1],&yy1);
448:     break;
449:   case 1:
450:     VecGetArrayRead(yy[0],&yy0);
451:     switch (j_rem=j&0x3) {
452:     case 3:
453:       x2 = x[2]; sum0 += x2*yy0[2];
454:     case 2:
455:       x1 = x[1]; sum0 += x1*yy0[1];
456:     case 1:
457:       x0 = x[0]; sum0 += x0*yy0[0];
458:     case 0:
459:       x   += j_rem;
460:       yy0 += j_rem;
461:       j   -= j_rem;
462:       break;
463:     }
464:     while (j>0) {
465:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
466:       j    -= 4; x+=4;
467:     }
468:     z[0] = sum0;

470:     VecRestoreArrayRead(yy[0],&yy0);
471:     break;
472:   case 0:
473:     break;
474:   }
475:   z  += nv_rem;
476:   i  -= nv_rem;
477:   yy += nv_rem;

479:   while (i >0) {
480:     sum0 = 0.;
481:     sum1 = 0.;
482:     sum2 = 0.;
483:     sum3 = 0.;
484:     VecGetArrayRead(yy[0],&yy0);
485:     VecGetArrayRead(yy[1],&yy1);
486:     VecGetArrayRead(yy[2],&yy2);
487:     VecGetArrayRead(yy[3],&yy3);
488:     x    = xbase;

490:     j = n;
491:     switch (j_rem=j&0x3) {
492:     case 3:
493:       x2    = x[2];
494:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
495:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
496:     case 2:
497:       x1    = x[1];
498:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
499:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
500:     case 1:
501:       x0    = x[0];
502:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
503:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
504:     case 0:
505:       x   += j_rem;
506:       yy0 += j_rem;
507:       yy1 += j_rem;
508:       yy2 += j_rem;
509:       yy3 += j_rem;
510:       j   -= j_rem;
511:       break;
512:     }
513:     while (j>0) {
514:       x0 = x[0];
515:       x1 = x[1];
516:       x2 = x[2];
517:       x3 = x[3];
518:       x += 4;

520:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
521:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
522:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
523:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
524:       j    -= 4;
525:     }
526:     z[0] = sum0;
527:     z[1] = sum1;
528:     z[2] = sum2;
529:     z[3] = sum3;
530:     z   += 4;
531:     i   -= 4;
532:     VecRestoreArrayRead(yy[0],&yy0);
533:     VecRestoreArrayRead(yy[1],&yy1);
534:     VecRestoreArrayRead(yy[2],&yy2);
535:     VecRestoreArrayRead(yy[3],&yy3);
536:     yy  += 4;
537:   }
538:   VecRestoreArrayRead(xin,&xbase);
539:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
540:   return(0);
541: }

543: #if defined(PETSC_THREADCOMM_ACTIVE)
544: PetscErrorCode VecMax_kernel(PetscInt thread_id,Vec xin,PetscThreadCommReduction red)
545: {
546:   PetscErrorCode    ierr;
547:   PetscInt          *trstarts=xin->map->trstarts;
548:   PetscInt          start,end,i;
549:   const PetscScalar *xx;
550:   PetscReal         lred[2],tmp;

552:   start   = trstarts[thread_id];
553:   end     = trstarts[thread_id+1];
554:   VecGetArrayRead(xin,&xx);
555:   lred[0] = xx[start]; lred[1] = start;
556:   for (i=start+1; i<end; i++) {
557:     if ((tmp = PetscRealPart(xx[i])) > lred[0]) { lred[0] = tmp; lred[1] = i;}
558:   }
559:   VecRestoreArrayRead(xin,&xx);
560:   PetscThreadReductionKernelPost(thread_id,red,lred);
561:   return 0;
562: }

566: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
567: {
568:   PetscErrorCode           ierr;
569:   PetscInt                 n=xin->map->n;
570:   PetscThreadCommReduction red;
571:   PetscReal                out[2];

574:   if (!n) {
575:     *z   = PETSC_MIN_REAL;
576:     *idx = -1;
577:   } else {
578:     PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_MAXLOC,PETSC_REAL,1,&red);
579:     PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMax_kernel,xin,red);
580:     PetscThreadReductionEnd(red,out);
581:     *z   = out[0];
582:     if (idx) *idx = (PetscInt)out[1];
583:   }
584:   return(0);
585: }
586: #else
589: PetscErrorCode VecMax_Seq(Vec xin,PetscInt *idx,PetscReal *z)
590: {
591:   PetscInt          i,j=0,n = xin->map->n;
592:   PetscReal         max,tmp;
593:   const PetscScalar *xx;
594:   PetscErrorCode    ierr;

597:   VecGetArrayRead(xin,&xx);
598:   if (!n) {
599:     max = PETSC_MIN_REAL;
600:     j   = -1;
601:   } else {
602:     max = PetscRealPart(*xx++); j = 0;
603:     for (i=1; i<n; i++) {
604:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
605:     }
606:   }
607:   *z = max;
608:   if (idx) *idx = j;
609:   VecRestoreArrayRead(xin,&xx);
610:   return(0);
611: }
612: #endif

614: #if defined(PETSC_THREADCOMM_ACTIVE)
615: PetscErrorCode VecMin_kernel(PetscInt thread_id,Vec xin,PetscThreadCommReduction red)
616: {
617:   PetscErrorCode    ierr;
618:   PetscInt          *trstarts=xin->map->trstarts;
619:   PetscInt          start,end,i;
620:   const PetscScalar *xx;
621:   PetscReal         lred[2],tmp;

623:   start   = trstarts[thread_id];
624:   end     = trstarts[thread_id+1];
625:   VecGetArrayRead(xin,&xx);
626:   lred[0] = xx[start]; lred[1] = start;
627:   for (i=start+1;i<end;i++) {
628:     if ((tmp = PetscRealPart(xx[i])) < lred[0]) { lred[0] = tmp; lred[1] = i;}
629:   }
630:   VecRestoreArrayRead(xin,&xx);
631:   PetscThreadReductionKernelPost(thread_id,red,lred);
632:   return 0;
633: }

637: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
638: {
639:   PetscErrorCode           ierr;
640:   PetscInt                 n=xin->map->n;
641:   PetscThreadCommReduction red;
642:   PetscReal                out[2];

645:   if (!n) {
646:     *z   = PETSC_MAX_REAL;
647:     *idx = -1;
648:   } else {
649:     PetscThreadReductionBegin(PetscObjectComm((PetscObject)xin),THREADCOMM_MINLOC,PETSC_REAL,1,&red);
650:     PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMin_kernel,xin,red);
651:     PetscThreadReductionEnd(red,out);
652:     *z   = out[0];
653:     if (idx) *idx = (PetscInt)out[1];
654:   }
655:   return(0);
656: }
657: #else
660: PetscErrorCode VecMin_Seq(Vec xin,PetscInt *idx,PetscReal *z)
661: {
662:   PetscInt          i,j=0,n = xin->map->n;
663:   PetscReal         min,tmp;
664:   const PetscScalar *xx;
665:   PetscErrorCode    ierr;

668:   VecGetArrayRead(xin,&xx);
669:   if (!n) {
670:     min = PETSC_MAX_REAL;
671:     j   = -1;
672:   } else {
673:     min = PetscRealPart(*xx++); j = 0;
674:     for (i=1; i<n; i++) {
675:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
676:     }
677:   }
678:   *z = min;
679:   if (idx) *idx = j;
680:   VecGetArrayRead(xin,&xx);
681:   return(0);
682: }
683: #endif

685: #if defined(PETSC_THREADCOMM_ACTIVE)
686: PetscErrorCode VecSet_kernel(PetscInt thread_id,Vec xin,PetscScalar *alpha_p)
687: {
689:   PetscScalar    *xx;
690:   PetscInt       i,start,end;
691:   PetscInt       *trstarts=xin->map->trstarts;
692:   PetscScalar    alpha    = *alpha_p;

694:   start = trstarts[thread_id];
695:   end   = trstarts[thread_id+1];
696:   VecGetArray(xin,&xx);
697:   if (alpha == (PetscScalar)0.0) {
698:     PetscMemzero(xx+start,(end-start)*sizeof(PetscScalar));
699:   } else {
700:     for (i=start; i<end; i++) xx[i] = alpha;
701:   }
702:   VecRestoreArray(xin,&xx);
703:   return 0;
704: }

708: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
709: {
711:   PetscScalar    *scalar;

714:   PetscThreadCommGetScalars(PetscObjectComm((PetscObject)xin),&scalar,NULL,NULL);
715:   *scalar = alpha;
716:   PetscThreadCommRunKernel2(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecSet_kernel,xin,scalar);
717:   return(0);
718: }
719: #else
722: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
723: {
724:   PetscInt       i,n = xin->map->n;
725:   PetscScalar    *xx;

729:   VecGetArray(xin,&xx);
730:   if (alpha == (PetscScalar)0.0) {
731:     PetscMemzero(xx,n*sizeof(PetscScalar));
732:   } else {
733:     for (i=0; i<n; i++) xx[i] = alpha;
734:   }
735:   VecRestoreArray(xin,&xx);
736:   return(0);
737: }
738: #endif

740: #if defined(PETSC_THREADCOMM_ACTIVE)
741: PetscErrorCode VecMAXPY_kernel(PetscInt thread_id,Vec xin,PetscInt *nv_p,const PetscScalar *alpha,Vec *y)
742: {
743:   PetscErrorCode    ierr;
744:   PetscInt          *trstarts=xin->map->trstarts,j,j_rem,nv=*nv_p;
745:   PetscInt          start,end,n;
746:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
747:   PetscScalar       *xx,*x,alpha0,alpha1,alpha2,alpha3;
748:   const PetscScalar *y0,*y1,*y2,*y3;


751:   start = trstarts[thread_id];
752:   end   = trstarts[thread_id+1];
753:   n     = end-start;
754: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
755: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
756: #endif

758:   VecGetArray(xin,&xx);
759:   x    = xx+start;
760:   switch (j_rem=nv&0x3) {
761:   case 3:
762:     VecGetArrayRead(y[0],&yy0);
763:     VecGetArrayRead(y[1],&yy1);
764:     VecGetArrayRead(y[2],&yy2);
765:     alpha0 = alpha[0];
766:     alpha1 = alpha[1];
767:     alpha2 = alpha[2];
768:     alpha += 3;
769:     y0 = yy0 + start; y1 = yy1+start; y2 = yy2+start;
770:     PetscKernelAXPY3(x,alpha0,alpha1,alpha2,y0,y1,y2,n);
771:     VecRestoreArrayRead(y[0],&yy0);
772:     VecRestoreArrayRead(y[1],&yy1);
773:     VecRestoreArrayRead(y[2],&yy2);
774:     y   += 3;
775:     break;
776:   case 2:
777:     VecGetArrayRead(y[0],&yy0);
778:     VecGetArrayRead(y[1],&yy1);
779:     alpha0 = alpha[0];
780:     alpha1 = alpha[1];
781:     alpha +=2;
782:     y0 = yy0+start; y1=yy1+start;
783:     PetscKernelAXPY2(x,alpha0,alpha1,y0,y1,n);
784:     VecRestoreArrayRead(y[0],&yy0);
785:     VecRestoreArrayRead(y[1],&yy1);
786:     y   +=2;
787:     break;
788:   case 1:
789:     VecGetArrayRead(y[0],&yy0);
790:     alpha0 = *alpha++;
791:     y0     = yy0 + start;
792:     PetscKernelAXPY(x,alpha0,y0,n);
793:     VecRestoreArrayRead(y[0],&yy0);
794:     y   +=1;
795:     break;
796:   }
797:   for (j=j_rem; j<nv; j+=4) {
798:     VecGetArrayRead(y[0],&yy0);
799:     VecGetArrayRead(y[1],&yy1);
800:     VecGetArrayRead(y[2],&yy2);
801:     VecGetArrayRead(y[3],&yy3);
802:     alpha0 = alpha[0];
803:     alpha1 = alpha[1];
804:     alpha2 = alpha[2];
805:     alpha3 = alpha[3];
806:     alpha += 4;

808:     y0 = yy0+start; y1=yy1+start; y2=yy2+start; y3=yy3+start;
809:     PetscKernelAXPY4(x,alpha0,alpha1,alpha2,alpha3,y0,y1,y2,y3,n);
810:     VecRestoreArrayRead(y[0],&yy0);
811:     VecRestoreArrayRead(y[1],&yy1);
812:     VecRestoreArrayRead(y[2],&yy2);
813:     VecRestoreArrayRead(y[3],&yy3);
814:     y   += 4;
815:   }
816:   VecRestoreArray(xin,&xx);
817:   return 0;
818: }

822: PetscErrorCode VecMAXPY_Seq(Vec xin,PetscInt nv,const PetscScalar *alpha,Vec *y)
823: {
825:   PetscInt       *int1;

828:   PetscLogFlops(nv*2.0*xin->map->n);
829:   PetscThreadCommGetInts(PetscObjectComm((PetscObject)xin),&int1,NULL,NULL);
830:   *int1 = nv;
831:   PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)xin),(PetscThreadKernel)VecMAXPY_kernel,xin,int1,(void*)alpha,y);
832:   return(0);
833: }
834: #else
837: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
838: {
839:   PetscErrorCode    ierr;
840:   PetscInt          n = xin->map->n,j,j_rem;
841:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
842:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

844: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
845: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
846: #endif

849:   PetscLogFlops(nv*2.0*n);
850:   VecGetArray(xin,&xx);
851:   switch (j_rem=nv&0x3) {
852:   case 3:
853:     VecGetArrayRead(y[0],&yy0);
854:     VecGetArrayRead(y[1],&yy1);
855:     VecGetArrayRead(y[2],&yy2);
856:     alpha0 = alpha[0];
857:     alpha1 = alpha[1];
858:     alpha2 = alpha[2];
859:     alpha += 3;
860:     PetscKernelAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
861:     VecRestoreArrayRead(y[0],&yy0);
862:     VecRestoreArrayRead(y[1],&yy1);
863:     VecRestoreArrayRead(y[2],&yy2);
864:     y   += 3;
865:     break;
866:   case 2:
867:     VecGetArrayRead(y[0],&yy0);
868:     VecGetArrayRead(y[1],&yy1);
869:     alpha0 = alpha[0];
870:     alpha1 = alpha[1];
871:     alpha +=2;
872:     PetscKernelAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
873:     VecRestoreArrayRead(y[0],&yy0);
874:     VecRestoreArrayRead(y[1],&yy1);
875:     y   +=2;
876:     break;
877:   case 1:
878:     VecGetArrayRead(y[0],&yy0);
879:     alpha0 = *alpha++;
880:     PetscKernelAXPY(xx,alpha0,yy0,n);
881:     VecRestoreArrayRead(y[0],&yy0);
882:     y   +=1;
883:     break;
884:   }
885:   for (j=j_rem; j<nv; j+=4) {
886:     VecGetArrayRead(y[0],&yy0);
887:     VecGetArrayRead(y[1],&yy1);
888:     VecGetArrayRead(y[2],&yy2);
889:     VecGetArrayRead(y[3],&yy3);
890:     alpha0 = alpha[0];
891:     alpha1 = alpha[1];
892:     alpha2 = alpha[2];
893:     alpha3 = alpha[3];
894:     alpha += 4;

896:     PetscKernelAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
897:     VecRestoreArrayRead(y[0],&yy0);
898:     VecRestoreArrayRead(y[1],&yy1);
899:     VecRestoreArrayRead(y[2],&yy2);
900:     VecRestoreArrayRead(y[3],&yy3);
901:     y   += 4;
902:   }
903:   VecRestoreArray(xin,&xx);
904:   return(0);
905: }
906: #endif

908: #include <../src/vec/vec/impls/seq/ftn-kernels/faypx.h>
909: #if defined(PETSC_THREADCOMM_ACTIVE)
910: PetscErrorCode VecAYPX_kernel(PetscInt thread_id,Vec yin,PetscScalar *alpha_p,Vec xin)
911: {
912:   PetscErrorCode    ierr;
913:   PetscScalar       *yy;
914:   const PetscScalar *xx;
915:   PetscInt          *trstarts=yin->map->trstarts,i;
916:   PetscScalar       alpha = *alpha_p;

918:   VecGetArrayRead(xin,&xx);
919:   VecGetArray(yin,&yy);

921:   if (alpha == (PetscScalar)-1.0) {
922:     for (i=trstarts[thread_id]; i < trstarts[thread_id+1]; i++) {
923:       yy[i] = xx[i] - yy[i];
924:     }
925:   } else {
926: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
927:       PetscInt start=trstarts[thread_id],end=trstarts[thread_id+1],n;
928:       PetscScalar oalpha = alpha;
929:       n = end - start;
930:       fortranaypx_(&n,&oalpha,xx+start,yy+start);
931:   }
932: #else
933:     for (i=trstarts[thread_id]; i < trstarts[thread_id+1];i++) {
934:       yy[i] = xx[i] + alpha*yy[i];
935:     }
936:   }
937: #endif
938:   VecRestoreArrayRead(xin,&xx);
939:   VecRestoreArray(yin,&yy);
940:   return 0;
941: }
944: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
945: {
946:   PetscErrorCode    ierr;
947:   PetscInt          n=yin->map->n;

950:   if (alpha == (PetscScalar)0.0) {
951:     VecCopy(xin,yin);
952:   } else if (alpha == (PetscScalar)1.0) {
953:     VecAXPY_Seq(yin,alpha,xin);
954:   } else {
955:     PetscScalar *scal1;
956:     PetscThreadCommGetScalars(PetscObjectComm((PetscObject)yin),&scal1,NULL,NULL);
957:     *scal1 = alpha;
958:     PetscThreadCommRunKernel3(PetscObjectComm((PetscObject)yin),(PetscThreadKernel)VecAYPX_kernel,yin,scal1,xin);
959:     if (alpha == (PetscScalar)-1.0) {
960:       PetscLogFlops(1.0*n);
961:     } else {
962:       PetscLogFlops(2.0*n);
963:     }
964:   }
965:   return(0);
966: }
967: #else
970: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
971: {
972:   PetscErrorCode    ierr;
973:   PetscInt          n = yin->map->n;
974:   PetscScalar       *yy;
975:   const PetscScalar *xx;

978:   if (alpha == (PetscScalar)0.0) {
979:     VecCopy(xin,yin);
980:   } else if (alpha == (PetscScalar)1.0) {
981:     VecAXPY_Seq(yin,alpha,xin);
982:   } else if (alpha == (PetscScalar)-1.0) {
983:     PetscInt i;
984:     VecGetArrayRead(xin,&xx);
985:     VecGetArray(yin,&yy);

987:     for (i=0; i<n; i++) yy[i] = xx[i] - yy[i];

989:     VecRestoreArrayRead(xin,&xx);
990:     VecRestoreArray(yin,&yy);
991:     PetscLogFlops(1.0*n);
992:   } else {
993:     VecGetArrayRead(xin,&xx);
994:     VecGetArray(yin,&yy);
995: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
996:     {
997:       PetscScalar oalpha = alpha;
998:       fortranaypx_(&n,&oalpha,xx,yy);
999:     }
1000: #else
1001:     {
1002:       PetscInt i;

1004:       for (i=0; i<n; i++) yy[i] = xx[i] + alpha*yy[i];
1005:     }
1006: #endif
1007:     VecRestoreArrayRead(xin,&xx);
1008:     VecRestoreArray(yin,&yy);
1009:     PetscLogFlops(2.0*n);
1010:   }
1011:   return(0);
1012: }
1013: #endif

1015: #include <../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h>
1016: /*
1017:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
1018:   to be slower than a regular C loop.  Hence,we do not include it.
1019:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
1020: */
1021: #if defined(PETSC_THREADCOMM_ACTIVE)
1022: PetscErrorCode VecWAXPY_kernel(PetscInt thread_id,Vec win,PetscScalar *alpha_p,Vec xin,Vec yin)
1023: {
1024:   PetscErrorCode    ierr;
1025:   PetscScalar       alpha=*alpha_p;
1026:   PetscInt          *trstarts=win->map->trstarts,start,end,i;
1027:   const PetscScalar *xx,*yy;
1028:   PetscScalar       *ww;

1030:   start = trstarts[thread_id];
1031:   end   = trstarts[thread_id+1];

1033:   VecGetArrayRead(xin,&xx);
1034:   VecGetArrayRead(yin,&yy);
1035:   VecGetArray(win,&ww);
1036:   if (alpha == (PetscScalar)1.0) {
1037:     for (i=start; i < end; i++) ww[i] = yy[i] + xx[i];
1038:   } else if (alpha == (PetscScalar)-1.0) {
1039:     for (i=start; i < end; i++) ww[i] = yy[i] - xx[i];
1040:   } else if (alpha == (PetscScalar)0.0) {
1041:     PetscMemcpy(ww+start,yy+start,(end-start)*sizeof(PetscScalar));
1042:   } else {
1043:     PetscScalar oalpha = alpha;
1044: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
1045:     fortranwaxpy_(&n,&oalpha,xx+start,yy+start,ww+start);
1046: #else
1047:     for (i=start; i < end; i++) ww[i] = yy[i] + oalpha*xx[i];
1048: #endif
1049:   }
1050:   VecRestoreArrayRead(xin,&xx);
1051:   VecRestoreArrayRead(yin,&yy);
1052:   VecRestoreArray(win,&ww);
1053:   return 0;
1054: }

1058: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
1059: {
1060:   PetscErrorCode     ierr;
1061:   PetscInt           n = win->map->n;
1062:   PetscScalar        *scal1;

1065:   PetscThreadCommGetScalars(PetscObjectComm((PetscObject)win),&scal1,NULL,NULL);
1066:   *scal1 = alpha;
1067:   PetscThreadCommRunKernel4(PetscObjectComm((PetscObject)win),(PetscThreadKernel)VecWAXPY_kernel,win,scal1,xin,yin);
1068:   if (alpha == (PetscScalar)1.0) {
1069:     PetscLogFlops(n);
1070:   } else if (alpha == (PetscScalar)-1.0) {
1071:     PetscLogFlops(n);
1072:   } else {
1073:     PetscLogFlops(2.0*n);
1074:   }
1075:   return(0);
1076: }
1077: #else
1080: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
1081: {
1082:   PetscErrorCode     ierr;
1083:   PetscInt           i,n = win->map->n;
1084:   PetscScalar        *ww;
1085:   const PetscScalar  *yy,*xx;

1088:   VecGetArrayRead(xin,&xx);
1089:   VecGetArrayRead(yin,&yy);
1090:   VecGetArray(win,&ww);
1091:   if (alpha == (PetscScalar)1.0) {
1092:     PetscLogFlops(n);
1093:     /* could call BLAS axpy after call to memcopy, but may be slower */
1094:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
1095:   } else if (alpha == (PetscScalar)-1.0) {
1096:     PetscLogFlops(n);
1097:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
1098:   } else if (alpha == (PetscScalar)0.0) {
1099:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
1100:   } else {
1101:     PetscScalar oalpha = alpha;
1102: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
1103:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
1104: #else
1105:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
1106: #endif
1107:     PetscLogFlops(2.0*n);
1108:   }
1109:   VecRestoreArrayRead(xin,&xx);
1110:   VecRestoreArrayRead(yin,&yy);
1111:   VecRestoreArray(win,&ww);
1112:   return(0);
1113: }
1114: #endif

1118: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
1119: {
1120:   PetscErrorCode    ierr;
1121:   PetscInt          n = xin->map->n,i;
1122:   const PetscScalar *xx,*yy;
1123:   PetscReal         m = 0.0;

1126:   VecGetArrayRead(xin,&xx);
1127:   VecGetArrayRead(yin,&yy);
1128:   for (i = 0; i < n; i++) {
1129:     if (yy[i] != (PetscScalar)0.0) {
1130:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
1131:     } else {
1132:       m = PetscMax(PetscAbsScalar(xx[i]), m);
1133:     }
1134:   }
1135:   VecRestoreArrayRead(xin,&xx);
1136:   VecRestoreArrayRead(yin,&yy);
1137:   MPI_Allreduce(&m,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
1138:   PetscLogFlops(n);
1139:   return(0);
1140: }

1144: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
1145: {
1146:   Vec_Seq *v = (Vec_Seq*)vin->data;

1149:   if (v->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
1150:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
1151:   v->array         = (PetscScalar*)a;
1152:   return(0);
1153: }

1157: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
1158: {
1159:   Vec_Seq        *v = (Vec_Seq*)vin->data;

1163:   PetscFree(v->array_allocated);
1164:   v->array_allocated = v->array = (PetscScalar*)a;
1165:   return(0);
1166: }