Actual source code: dvec2.c

  2: /* 
  3:    Defines some vector operation functions that are shared by 
  4:   sequential and parallel vectors.
  5: */
 6:  #include src/vec/impls/dvecimpl.h
 7:  #include src/inline/dot.h
 8:  #include src/inline/setval.h
 9:  #include src/inline/axpy.h

 11: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 14: PetscErrorCode VecMDot_Seq(PetscInt nv,Vec xin,const Vec yin[],PetscScalar *z)
 15: {
 16:   Vec_Seq        *xv = (Vec_Seq *)xin->data;
 18:   PetscInt       i,nv_rem,n = xin->n;
 19:   PetscScalar    sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,*x;
 20:   Vec            *yy;

 23:   sum0 = 0;
 24:   sum1 = 0;
 25:   sum2 = 0;

 27:   i      = nv;
 28:   nv_rem = nv&0x3;
 29:   yy     = (Vec*)yin;
 30:   x      = xv->array;

 32:   switch (nv_rem) {
 33:   case 3:
 34:     VecGetArray(yy[0],&yy0);
 35:     VecGetArray(yy[1],&yy1);
 36:     VecGetArray(yy[2],&yy2);
 37:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 38:     VecRestoreArray(yy[0],&yy0);
 39:     VecRestoreArray(yy[1],&yy1);
 40:     VecRestoreArray(yy[2],&yy2);
 41:     z[0] = sum0;
 42:     z[1] = sum1;
 43:     z[2] = sum2;
 44:     break;
 45:   case 2:
 46:     VecGetArray(yy[0],&yy0);
 47:     VecGetArray(yy[1],&yy1);
 48:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 49:     VecRestoreArray(yy[0],&yy0);
 50:     VecRestoreArray(yy[1],&yy1);
 51:     z[0] = sum0;
 52:     z[1] = sum1;
 53:     break;
 54:   case 1:
 55:     VecGetArray(yy[0],&yy0);
 56:     fortranmdot1_(x,yy0,&n,&sum0);
 57:     VecRestoreArray(yy[0],&yy0);
 58:     z[0] = sum0;
 59:     break;
 60:   case 0:
 61:     break;
 62:   }
 63:   z  += nv_rem;
 64:   i  -= nv_rem;
 65:   yy += nv_rem;

 67:   while (i >0) {
 68:     sum0 = 0;
 69:     sum1 = 0;
 70:     sum2 = 0;
 71:     sum3 = 0;
 72:     VecGetArray(yy[0],&yy0);
 73:     VecGetArray(yy[1],&yy1);
 74:     VecGetArray(yy[2],&yy2);
 75:     VecGetArray(yy[3],&yy3);
 76:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 77:     VecRestoreArray(yy[0],&yy0);
 78:     VecRestoreArray(yy[1],&yy1);
 79:     VecRestoreArray(yy[2],&yy2);
 80:     VecRestoreArray(yy[3],&yy3);
 81:     yy  += 4;
 82:     z[0] = sum0;
 83:     z[1] = sum1;
 84:     z[2] = sum2;
 85:     z[3] = sum3;
 86:     z   += 4;
 87:     i   -= 4;
 88:   }
 89:   PetscLogFlops(nv*(2*xin->n-1));
 90:   return(0);
 91: }

 93: #else
 96: PetscErrorCode VecMDot_Seq(PetscInt nv,Vec xin,const Vec yin[],PetscScalar * restrict z)
 97: {
 98:   Vec_Seq        *xv = (Vec_Seq *)xin->data;
100:   PetscInt       n = xin->n,i,j,nv_rem,j_rem;
101:   PetscScalar    sum0,sum1,sum2,sum3,x0,x1,x2,x3,* restrict x;
102:   PetscScalar    * restrict yy0,* restrict yy1,* restrict yy2,*restrict yy3;
103:   Vec            *yy;

106:   sum0 = 0;
107:   sum1 = 0;
108:   sum2 = 0;

110:   i      = nv;
111:   nv_rem = nv&0x3;
112:   yy     = (Vec *)yin;
113:   j      = n;
114:   x      = xv->array;

116:   switch (nv_rem) {
117:   case 3:
118:     VecGetArray(yy[0],&yy0);
119:     VecGetArray(yy[1],&yy1);
120:     VecGetArray(yy[2],&yy2);
121:     switch (j_rem=j&0x3) {
122:     case 3:
123:       x2 = x[2];
124:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
125:       sum2 += x2*PetscConj(yy2[2]);
126:     case 2:
127:       x1 = x[1];
128:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
129:       sum2 += x1*PetscConj(yy2[1]);
130:     case 1:
131:       x0 = x[0];
132:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
133:       sum2 += x0*PetscConj(yy2[0]);
134:     case 0:
135:       x   += j_rem;
136:       yy0 += j_rem;
137:       yy1 += j_rem;
138:       yy2 += j_rem;
139:       j   -= j_rem;
140:       break;
141:     }
142:     while (j>0) {
143:       x0 = x[0];
144:       x1 = x[1];
145:       x2 = x[2];
146:       x3 = x[3];
147:       x += 4;
148: 
149:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
150:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
151:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
152:       j -= 4;
153:     }
154:     z[0] = sum0;
155:     z[1] = sum1;
156:     z[2] = sum2;
157:     VecRestoreArray(yy[0],&yy0);
158:     VecRestoreArray(yy[1],&yy1);
159:     VecRestoreArray(yy[2],&yy2);
160:     break;
161:   case 2:
162:     VecGetArray(yy[0],&yy0);
163:     VecGetArray(yy[1],&yy1);
164:     switch (j_rem=j&0x3) {
165:     case 3:
166:       x2 = x[2];
167:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
168:     case 2:
169:       x1 = x[1];
170:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
171:     case 1:
172:       x0 = x[0];
173:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
174:     case 0:
175:       x   += j_rem;
176:       yy0 += j_rem;
177:       yy1 += j_rem;
178:       j   -= j_rem;
179:       break;
180:     }
181:     while (j>0) {
182:       x0 = x[0];
183:       x1 = x[1];
184:       x2 = x[2];
185:       x3 = x[3];
186:       x += 4;
187: 
188:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
189:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
190:       j -= 4;
191:     }
192:     z[0] = sum0;
193:     z[1] = sum1;
194: 
195:     VecRestoreArray(yy[0],&yy0);
196:     VecRestoreArray(yy[1],&yy1);
197:     break;
198:   case 1:
199:     VecGetArray(yy[0],&yy0);
200:     switch (j_rem=j&0x3) {
201:     case 3:
202:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
203:     case 2:
204:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
205:     case 1:
206:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
207:     case 0:
208:       x   += j_rem;
209:       yy0 += j_rem;
210:       j   -= j_rem;
211:       break;
212:     }
213:     while (j>0) {
214:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
215:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
216:       yy0+=4;
217:       j -= 4; x+=4;
218:     }
219:     z[0] = sum0;

221:     VecRestoreArray(yy[0],&yy0);
222:     break;
223:   case 0:
224:     break;
225:   }
226:   z  += nv_rem;
227:   i  -= nv_rem;
228:   yy += nv_rem;

230:   while (i >0) {
231:     sum0 = 0;
232:     sum1 = 0;
233:     sum2 = 0;
234:     sum3 = 0;
235:     VecGetArray(yy[0],&yy0);
236:     VecGetArray(yy[1],&yy1);
237:     VecGetArray(yy[2],&yy2);
238:     VecGetArray(yy[3],&yy3);

240:     j = n;
241:     x = xv->array;
242:     switch (j_rem=j&0x3) {
243:     case 3:
244:       x2 = x[2];
245:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
246:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
247:     case 2:
248:       x1 = x[1];
249:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
250:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
251:     case 1:
252:       x0 = x[0];
253:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
254:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
255:     case 0:
256:       x   += j_rem;
257:       yy0 += j_rem;
258:       yy1 += j_rem;
259:       yy2 += j_rem;
260:       yy3 += j_rem;
261:       j   -= j_rem;
262:       break;
263:     }
264:     while (j>0) {
265:       x0 = x[0];
266:       x1 = x[1];
267:       x2 = x[2];
268:       x3 = x[3];
269:       x += 4;
270: 
271:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
272:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
273:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
274:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
275:       j -= 4;
276:     }
277:     z[0] = sum0;
278:     z[1] = sum1;
279:     z[2] = sum2;
280:     z[3] = sum3;
281:     z   += 4;
282:     i   -= 4;
283:     VecRestoreArray(yy[0],&yy0);
284:     VecRestoreArray(yy[1],&yy1);
285:     VecRestoreArray(yy[2],&yy2);
286:     VecRestoreArray(yy[3],&yy3);
287:     yy  += 4;
288:   }
289:   PetscLogFlops(nv*(2*xin->n-1));
290:   return(0);
291: }
292: #endif

294: /* ----------------------------------------------------------------------------*/
297: PetscErrorCode VecMTDot_Seq(PetscInt nv,Vec xin,const Vec yin[],PetscScalar *z)
298: {
299:   Vec_Seq        *xv = (Vec_Seq *)xin->data;
301:   PetscInt       n = xin->n,i,j,nv_rem,j_rem;
302:   PetscScalar    sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
303:   Vec            *yy;
304: 

307:   sum0 = 0;
308:   sum1 = 0;
309:   sum2 = 0;

311:   i      = nv;
312:   nv_rem = nv&0x3;
313:   yy     = (Vec*)yin;
314:   j    = n;
315:   x    = xv->array;

317:   switch (nv_rem) {
318:   case 3:
319:     VecGetArray(yy[0],&yy0);
320:     VecGetArray(yy[1],&yy1);
321:     VecGetArray(yy[2],&yy2);
322:     switch (j_rem=j&0x3) {
323:     case 3:
324:       x2 = x[2];
325:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
326:       sum2 += x2*yy2[2];
327:     case 2:
328:       x1 = x[1];
329:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
330:       sum2 += x1*yy2[1];
331:     case 1:
332:       x0 = x[0];
333:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
334:       sum2 += x0*yy2[0];
335:     case 0:
336:       x  += j_rem;
337:       yy0 += j_rem;
338:       yy1 += j_rem;
339:       yy2 += j_rem;
340:       j  -= j_rem;
341:       break;
342:     }
343:     while (j>0) {
344:       x0 = x[0];
345:       x1 = x[1];
346:       x2 = x[2];
347:       x3 = x[3];
348:       x += 4;
349: 
350:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
351:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
352:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
353:       j -= 4;
354:     }
355:     z[0] = sum0;
356:     z[1] = sum1;
357:     z[2] = sum2;
358:     VecRestoreArray(yy[0],&yy0);
359:     VecRestoreArray(yy[1],&yy1);
360:     VecRestoreArray(yy[2],&yy2);
361:     break;
362:   case 2:
363:     VecGetArray(yy[0],&yy0);
364:     VecGetArray(yy[1],&yy1);
365:     switch (j_rem=j&0x3) {
366:     case 3:
367:       x2 = x[2];
368:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
369:     case 2:
370:       x1 = x[1];
371:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
372:     case 1:
373:       x0 = x[0];
374:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
375:     case 0:
376:       x  += j_rem;
377:       yy0 += j_rem;
378:       yy1 += j_rem;
379:       j  -= j_rem;
380:       break;
381:     }
382:     while (j>0) {
383:       x0 = x[0];
384:       x1 = x[1];
385:       x2 = x[2];
386:       x3 = x[3];
387:       x += 4;
388: 
389:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
390:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
391:       j -= 4;
392:     }
393:     z[0] = sum0;
394:     z[1] = sum1;
395: 
396:     VecRestoreArray(yy[0],&yy0);
397:     VecRestoreArray(yy[1],&yy1);
398:     break;
399:   case 1:
400:     VecGetArray(yy[0],&yy0);
401:     switch (j_rem=j&0x3) {
402:     case 3:
403:       x2 = x[2]; sum0 += x2*yy0[2];
404:     case 2:
405:       x1 = x[1]; sum0 += x1*yy0[1];
406:     case 1:
407:       x0 = x[0]; sum0 += x0*yy0[0];
408:     case 0:
409:       x  += j_rem;
410:       yy0 += j_rem;
411:       j  -= j_rem;
412:       break;
413:     }
414:     while (j>0) {
415:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
416:       j -= 4; x+=4;
417:     }
418:     z[0] = sum0;

420:     VecRestoreArray(yy[0],&yy0);
421:     break;
422:   case 0:
423:     break;
424:   }
425:   z  += nv_rem;
426:   i  -= nv_rem;
427:   yy += nv_rem;

429:   while (i >0) {
430:     sum0 = 0;
431:     sum1 = 0;
432:     sum2 = 0;
433:     sum3 = 0;
434:     VecGetArray(yy[0],&yy0);
435:     VecGetArray(yy[1],&yy1);
436:     VecGetArray(yy[2],&yy2);
437:     VecGetArray(yy[3],&yy3);

439:     j = n;
440:     x = xv->array;
441:     switch (j_rem=j&0x3) {
442:     case 3:
443:       x2 = x[2];
444:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
445:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
446:     case 2:
447:       x1 = x[1];
448:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
449:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
450:     case 1:
451:       x0 = x[0];
452:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
453:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
454:     case 0:
455:       x  += j_rem;
456:       yy0 += j_rem;
457:       yy1 += j_rem;
458:       yy2 += j_rem;
459:       yy3 += j_rem;
460:       j  -= j_rem;
461:       break;
462:     }
463:     while (j>0) {
464:       x0 = x[0];
465:       x1 = x[1];
466:       x2 = x[2];
467:       x3 = x[3];
468:       x += 4;
469: 
470:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
471:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
472:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
473:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
474:       j -= 4;
475:     }
476:     z[0] = sum0;
477:     z[1] = sum1;
478:     z[2] = sum2;
479:     z[3] = sum3;
480:     z   += 4;
481:     i   -= 4;
482:     VecRestoreArray(yy[0],&yy0);
483:     VecRestoreArray(yy[1],&yy1);
484:     VecRestoreArray(yy[2],&yy2);
485:     VecRestoreArray(yy[3],&yy3);
486:     yy  += 4;
487:   }
488:   PetscLogFlops(nv*(2*xin->n-1));
489:   return(0);
490: }
491: 

495: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
496: {
497:   Vec_Seq      *x = (Vec_Seq*)xin->data;
498:   PetscInt     i,j=0,n = xin->n;
499:   PetscReal    max,tmp;
500:   PetscScalar  *xx = x->array;

503:   if (!n) {
504:     max = PETSC_MIN;
505:     j   = -1;
506:   } else {
507: #if defined(PETSC_USE_COMPLEX)
508:       max = PetscRealPart(*xx++); j = 0;
509: #else
510:       max = *xx++; j = 0;
511: #endif
512:     for (i=1; i<n; i++) {
513: #if defined(PETSC_USE_COMPLEX)
514:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
515: #else
516:       if ((tmp = *xx++) > max) { j = i; max = tmp; }
517: #endif
518:     }
519:   }
520:   *z   = max;
521:   if (idx) *idx = j;
522:   return(0);
523: }

527: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
528: {
529:   Vec_Seq      *x = (Vec_Seq*)xin->data;
530:   PetscInt     i,j=0,n = xin->n;
531:   PetscReal    min,tmp;
532:   PetscScalar  *xx = x->array;

535:   if (!n) {
536:     min = PETSC_MAX;
537:     j   = -1;
538:   } else {
539: #if defined(PETSC_USE_COMPLEX)
540:     min = PetscRealPart(*xx++); j = 0;
541: #else
542:     min = *xx++; j = 0;
543: #endif
544:     for (i=1; i<n; i++) {
545: #if defined(PETSC_USE_COMPLEX)
546:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
547: #else
548:       if ((tmp = *xx++) < min) { j = i; min = tmp; }
549: #endif
550:     }
551:   }
552:   *z   = min;
553:   if (idx) *idx = j;
554:   return(0);
555: }

559: PetscErrorCode VecSet_Seq(const PetscScalar* alpha,Vec xin)
560: {
561:   Vec_Seq        *x = (Vec_Seq *)xin->data;
563:   PetscInt       n = xin->n;
564:   PetscScalar    *xx = x->array,oalpha = *alpha;

567:   if (oalpha == 0.0) {
568:     PetscMemzero(xx,n*sizeof(PetscScalar));
569:   }
570:   else {
571:     SET(xx,n,oalpha);
572:   }
573:   return(0);
574: }

578: PetscErrorCode VecSetRandom_Seq(PetscRandom r,Vec xin)
579: {
581:   PetscInt       n = xin->n,i;
582:   PetscScalar    *xx;

585:   VecGetArray(xin,&xx);
586:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
587:   VecRestoreArray(xin,&xx);
588:   return(0);
589: }

593: PetscErrorCode VecMAXPY_Seq(PetscInt nv,const PetscScalar *alpha,Vec xin,Vec *y)
594: {
595:   Vec_Seq        *xdata = (Vec_Seq*)xin->data;
597:   PetscInt       n = xin->n,j,j_rem;
598:   PetscScalar    *xx,*yy0,*yy1,*yy2,*yy3,alpha0,alpha1,alpha2,alpha3;

600: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
601: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
602: #endif

605:   PetscLogFlops(nv*2*n);

607:   xx = xdata->array;
608:   switch (j_rem=nv&0x3) {
609:   case 3:
610:     VecGetArray(y[0],&yy0);
611:     VecGetArray(y[1],&yy1);
612:     VecGetArray(y[2],&yy2);
613:     alpha0 = alpha[0];
614:     alpha1 = alpha[1];
615:     alpha2 = alpha[2];
616:     alpha += 3;
617:     APXY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
618:     VecRestoreArray(y[0],&yy0);
619:     VecRestoreArray(y[1],&yy1);
620:     VecRestoreArray(y[2],&yy2);
621:     y     += 3;
622:     break;
623:   case 2:
624:     VecGetArray(y[0],&yy0);
625:     VecGetArray(y[1],&yy1);
626:     alpha0 = alpha[0];
627:     alpha1 = alpha[1];
628:     alpha +=2;
629:     APXY2(xx,alpha0,alpha1,yy0,yy1,n);
630:     VecRestoreArray(y[0],&yy0);
631:     VecRestoreArray(y[1],&yy1);
632:     y     +=2;
633:     break;
634:   case 1:
635:     VecGetArray(y[0],&yy0);
636:     alpha0 = *alpha++; APXY(xx,alpha0,yy0,n);
637:     VecRestoreArray(y[0],&yy0);
638:     y     +=1;
639:     break;
640:   }
641:   for (j=j_rem; j<nv; j+=4) {
642:     VecGetArray(y[0],&yy0);
643:     VecGetArray(y[1],&yy1);
644:     VecGetArray(y[2],&yy2);
645:     VecGetArray(y[3],&yy3);
646:     alpha0 = alpha[0];
647:     alpha1 = alpha[1];
648:     alpha2 = alpha[2];
649:     alpha3 = alpha[3];
650:     alpha  += 4;

652:     APXY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
653:     VecRestoreArray(y[0],&yy0);
654:     VecRestoreArray(y[1],&yy1);
655:     VecRestoreArray(y[2],&yy2);
656:     VecRestoreArray(y[3],&yy3);
657:     y      += 4;
658:   }
659:   return(0);
660: }

664: PetscErrorCode VecAYPX_Seq(const PetscScalar *alpha,Vec xin,Vec yin)
665: {
666:   Vec_Seq        *x = (Vec_Seq *)xin->data;
668:   PetscInt       n = xin->n;
669:   PetscScalar    *xx = x->array,*yy;

672:   VecGetArray(yin,&yy);
673: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
674:   fortranaypx_(&n,alpha,xx,yy);
675: #else
676:   {
677:     PetscInt i;
678:     PetscScalar oalpha = *alpha;
679:     for (i=0; i<n; i++) {
680:       yy[i] = xx[i] + oalpha*yy[i];
681:     }
682:   }
683: #endif
684:   VecRestoreArray(yin,&yy);
685:   PetscLogFlops(2*n);
686:   return(0);
687: }

689: /*
690:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
691:   to be slower than a regular C loop.  Hence,we do not include it.
692:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
693: */

697: PetscErrorCode VecWAXPY_Seq(const PetscScalar* alpha,Vec xin,Vec yin,Vec win)
698: {
699:   Vec_Seq        *x = (Vec_Seq *)xin->data;
701:   PetscInt       i,n = xin->n;
702:   PetscScalar    *xx = x->array,*yy,*ww,oalpha = *alpha;

705:   VecGetArray(yin,&yy);
706:   VecGetArray(win,&ww);
707:   if (oalpha == 1.0) {
708:     PetscLogFlops(n);
709:     /* could call BLAS axpy after call to memcopy, but may be slower */
710:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
711:   } else if (oalpha == -1.0) {
712:     PetscLogFlops(n);
713:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
714:   } else if (oalpha == 0.0) {
715:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
716:   } else {
717: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
718:     fortranwaxpy_(&n,alpha,xx,yy,ww);
719: #else
720:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
721: #endif
722:     PetscLogFlops(2*n);
723:   }
724:   VecRestoreArray(yin,&yy);
725:   VecRestoreArray(win,&ww);
726:   return(0);
727: }

731: PetscErrorCode VecPointwiseMult_Seq(Vec xin,Vec yin,Vec win)
732: {
733:   Vec_Seq        *x = (Vec_Seq *)xin->data;
735:   PetscInt       n = xin->n,i;
736:   PetscScalar    *xx = x->array,*yy,*ww;

739:   VecGetArray(yin,&yy);
740:   if (yin != win) {VecGetArray(win,&ww);}
741:   else ww = yy;

743:   if (ww == xx) {
744:     for (i=0; i<n; i++) ww[i] *= yy[i];
745:   } else if (ww == yy) {
746:     for (i=0; i<n; i++) ww[i] *= xx[i];
747:   } else {
748:     /*  This was suppose to help on SGI but didn't really seem to
749:           PetscReal * __restrict www = ww;
750:           PetscReal * __restrict yyy = yy;
751:           PetscReal * __restrict xxx = xx;
752:           for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
753:     */
754: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
755:     fortranxtimesy_(xx,yy,ww,&n);
756: #else
757:     for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
758: #endif
759:   }
760:   VecRestoreArray(yin,&yy);
761:   if (yin != win) {VecRestoreArray(win,&ww);}
762:   PetscLogFlops(n);
763:   return(0);
764: }

768: PetscErrorCode VecPointwiseDivide_Seq(Vec xin,Vec yin,Vec win)
769: {
770:   Vec_Seq        *x = (Vec_Seq *)xin->data;
772:   PetscInt       n = xin->n,i;
773:   PetscScalar    *xx = x->array,*yy,*ww;

776:   VecGetArray(yin,&yy);
777:   if (yin != win) {VecGetArray(win,&ww);}
778:   else {ww = yy;}
779:   for (i=0; i<n; i++) ww[i] = xx[i] / yy[i];
780:   VecRestoreArray(yin,&yy);
781:   if (yin != win) {VecRestoreArray(win,&ww);}
782:   PetscLogFlops(n);
783:   return(0);
784: }

788: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
789: {
790:   Vec_Seq        *x = (Vec_Seq *)xin->data;
792:   PetscInt       n = xin->n,i;
793:   PetscScalar    *xx = x->array,*yy;
794:   PetscReal      m = 0.0;

797:   VecGetArray(yin,&yy);
798:   for(i = 0; i < n; i++) {
799:     if (yy[i] != 0.0) {
800:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
801:     } else {
802:       m = PetscMax(PetscAbsScalar(xx[i]), m);
803:     }
804:   }
805:   MPI_Allreduce(&m,max,1,MPIU_REAL,MPI_MAX,xin->comm);
806:   VecRestoreArray(yin,&yy);
807:   PetscLogFlops(n);
808:   return(0);
809: }

813: PetscErrorCode VecGetArray_Seq(Vec vin,PetscScalar *a[])
814: {
815:   Vec_Seq        *v = (Vec_Seq *)vin->data;

819:   if (vin->array_gotten) {
820:     SETERRQ(PETSC_ERR_ORDER,"Array has already been gotten for this vector,you may\n\
821:     have forgotten a call to VecRestoreArray()");
822:   }
823:   vin->array_gotten = PETSC_TRUE;

825:   *a =  v->array;
826:   PetscObjectTakeAccess(vin);
827:   return(0);
828: }

832: PetscErrorCode VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
833: {

837:   if (!vin->array_gotten) {
838:     SETERRQ(PETSC_ERR_ORDER,"Array has not been gotten for this vector, you may\n\
839:     have forgotten a call to VecGetArray()");
840:   }
841:   vin->array_gotten = PETSC_FALSE;
842:   if (a) *a         = 0; /* now user cannot accidently use it again */

844:   PetscObjectGrantAccess(vin);
845:   return(0);
846: }

850: PetscErrorCode VecResetArray_Seq(Vec vin)
851: {
852:   Vec_Seq *v = (Vec_Seq *)vin->data;

855:   v->array = v->array_allocated;
856:   return(0);
857: }

861: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
862: {
863:   Vec_Seq *v = (Vec_Seq *)vin->data;

866:   v->array = (PetscScalar *)a;
867:   return(0);
868: }

872: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
873: {
874:   Vec_Seq        *v = (Vec_Seq *)vin->data;

878:   if (v->array_allocated) {PetscFree(v->array_allocated);}
879:   v->array_allocated = v->array = (PetscScalar *)a;
880:   return(0);
881: }

885: PetscErrorCode VecGetSize_Seq(Vec vin,PetscInt *size)
886: {
888:   *size = vin->n;
889:   return(0);
890: }

894: PetscErrorCode VecConjugate_Seq(Vec xin)
895: {
896:   PetscScalar *x = ((Vec_Seq *)xin->data)->array;
897:   PetscInt    n = xin->n;

900:   while (n-->0) {
901:     *x = PetscConj(*x);
902:     x++;
903:   }
904:   return(0);
905: }
906: