Actual source code: maij.c

  1: /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for 
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19:  #include src/mat/impls/maij/maij.h
 20:  #include src/vec/vecimpl.h

 24: int MatMAIJGetAIJ(Mat A,Mat *B)
 25: {
 26:   int         ierr;
 27:   PetscTruth  ismpimaij,isseqmaij;

 30:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 31:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 32:   if (ismpimaij) {
 33:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 35:     *B = b->A;
 36:   } else if (isseqmaij) {
 37:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 39:     *B = b->AIJ;
 40:   } else {
 41:     *B = A;
 42:   }
 43:   return(0);
 44: }

 48: int MatMAIJRedimension(Mat A,int dof,Mat *B)
 49: {
 51:   Mat Aij;

 54:   MatMAIJGetAIJ(A,&Aij);
 55:   MatCreateMAIJ(Aij,dof,B);
 56:   return(0);
 57: }

 61: int MatDestroy_SeqMAIJ(Mat A)
 62: {
 63:   int         ierr;
 64:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 67:   if (b->AIJ) {
 68:     MatDestroy(b->AIJ);
 69:   }
 70:   PetscFree(b);
 71:   return(0);
 72: }

 76: int MatDestroy_MPIMAIJ(Mat A)
 77: {
 78:   int         ierr;
 79:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 82:   if (b->AIJ) {
 83:     MatDestroy(b->AIJ);
 84:   }
 85:   if (b->OAIJ) {
 86:     MatDestroy(b->OAIJ);
 87:   }
 88:   if (b->A) {
 89:     MatDestroy(b->A);
 90:   }
 91:   if (b->ctx) {
 92:     VecScatterDestroy(b->ctx);
 93:   }
 94:   if (b->w) {
 95:     VecDestroy(b->w);
 96:   }
 97:   PetscFree(b);
 98:   return(0);
 99: }

101: /*MC
102:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
103:   multicomponent problems, interpolating or restricting each component the same way independently.
104:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

106:   Operations provided:
107: . MatMult
108: . MatMultTranspose
109: . MatMultAdd
110: . MatMultTransposeAdd

112:   Level: advanced

114: .seealso: MatCreateSeqDense
115: M*/

117: EXTERN_C_BEGIN
120: int MatCreate_MAIJ(Mat A)
121: {
122:   int         ierr;
123:   Mat_MPIMAIJ *b;

126:   PetscNew(Mat_MPIMAIJ,&b);
127:   A->data  = (void*)b;
128:   PetscMemzero(b,sizeof(Mat_MPIMAIJ));
129:   PetscMemzero(A->ops,sizeof(struct _MatOps));
130:   A->factor           = 0;
131:   A->mapping          = 0;

133:   b->AIJ  = 0;
134:   b->dof  = 0;
135:   b->OAIJ = 0;
136:   b->ctx  = 0;
137:   b->w    = 0;
138:   return(0);
139: }
140: EXTERN_C_END

142: /* --------------------------------------------------------------------------------------*/
145: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
146: {
147:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
148:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
149:   PetscScalar   *x,*y,*v,sum1, sum2;
150:   int           ierr,m = b->AIJ->m,*idx,*ii;
151:   int           n,i,jrow,j;

154:   VecGetArrayFast(xx,&x);
155:   VecGetArrayFast(yy,&y);
156:   idx  = a->j;
157:   v    = a->a;
158:   ii   = a->i;

160:   for (i=0; i<m; i++) {
161:     jrow = ii[i];
162:     n    = ii[i+1] - jrow;
163:     sum1  = 0.0;
164:     sum2  = 0.0;
165:     for (j=0; j<n; j++) {
166:       sum1 += v[jrow]*x[2*idx[jrow]];
167:       sum2 += v[jrow]*x[2*idx[jrow]+1];
168:       jrow++;
169:      }
170:     y[2*i]   = sum1;
171:     y[2*i+1] = sum2;
172:   }

174:   PetscLogFlops(4*a->nz - 2*m);
175:   VecRestoreArrayFast(xx,&x);
176:   VecRestoreArrayFast(yy,&y);
177:   return(0);
178: }

182: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
185:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
186:   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
187:   int           ierr,m = b->AIJ->m,n,i,*idx;

190:   VecSet(&zero,yy);
191:   VecGetArrayFast(xx,&x);
192:   VecGetArrayFast(yy,&y);
193: 
194:   for (i=0; i<m; i++) {
195:     idx    = a->j + a->i[i] ;
196:     v      = a->a + a->i[i] ;
197:     n      = a->i[i+1] - a->i[i];
198:     alpha1 = x[2*i];
199:     alpha2 = x[2*i+1];
200:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
201:   }
202:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
203:   VecRestoreArrayFast(xx,&x);
204:   VecRestoreArrayFast(yy,&y);
205:   return(0);
206: }

210: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
211: {
212:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
213:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
214:   PetscScalar   *x,*y,*v,sum1, sum2;
215:   int           ierr,m = b->AIJ->m,*idx,*ii;
216:   int           n,i,jrow,j;

219:   if (yy != zz) {VecCopy(yy,zz);}
220:   VecGetArrayFast(xx,&x);
221:   VecGetArrayFast(zz,&y);
222:   idx  = a->j;
223:   v    = a->a;
224:   ii   = a->i;

226:   for (i=0; i<m; i++) {
227:     jrow = ii[i];
228:     n    = ii[i+1] - jrow;
229:     sum1  = 0.0;
230:     sum2  = 0.0;
231:     for (j=0; j<n; j++) {
232:       sum1 += v[jrow]*x[2*idx[jrow]];
233:       sum2 += v[jrow]*x[2*idx[jrow]+1];
234:       jrow++;
235:      }
236:     y[2*i]   += sum1;
237:     y[2*i+1] += sum2;
238:   }

240:   PetscLogFlops(4*a->nz - 2*m);
241:   VecRestoreArrayFast(xx,&x);
242:   VecRestoreArrayFast(zz,&y);
243:   return(0);
244: }
247: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
248: {
249:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
250:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
251:   PetscScalar   *x,*y,*v,alpha1,alpha2;
252:   int           ierr,m = b->AIJ->m,n,i,*idx;

255:   if (yy != zz) {VecCopy(yy,zz);}
256:   VecGetArrayFast(xx,&x);
257:   VecGetArrayFast(zz,&y);
258: 
259:   for (i=0; i<m; i++) {
260:     idx   = a->j + a->i[i] ;
261:     v     = a->a + a->i[i] ;
262:     n     = a->i[i+1] - a->i[i];
263:     alpha1 = x[2*i];
264:     alpha2 = x[2*i+1];
265:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
266:   }
267:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
268:   VecRestoreArrayFast(xx,&x);
269:   VecRestoreArrayFast(zz,&y);
270:   return(0);
271: }
272: /* --------------------------------------------------------------------------------------*/
275: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
276: {
277:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
278:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
279:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
280:   int           ierr,m = b->AIJ->m,*idx,*ii;
281:   int           n,i,jrow,j;

284:   VecGetArrayFast(xx,&x);
285:   VecGetArrayFast(yy,&y);
286:   idx  = a->j;
287:   v    = a->a;
288:   ii   = a->i;

290:   for (i=0; i<m; i++) {
291:     jrow = ii[i];
292:     n    = ii[i+1] - jrow;
293:     sum1  = 0.0;
294:     sum2  = 0.0;
295:     sum3  = 0.0;
296:     for (j=0; j<n; j++) {
297:       sum1 += v[jrow]*x[3*idx[jrow]];
298:       sum2 += v[jrow]*x[3*idx[jrow]+1];
299:       sum3 += v[jrow]*x[3*idx[jrow]+2];
300:       jrow++;
301:      }
302:     y[3*i]   = sum1;
303:     y[3*i+1] = sum2;
304:     y[3*i+2] = sum3;
305:   }

307:   PetscLogFlops(6*a->nz - 3*m);
308:   VecRestoreArrayFast(xx,&x);
309:   VecRestoreArrayFast(yy,&y);
310:   return(0);
311: }

315: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316: {
317:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
318:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
319:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
320:   int           ierr,m = b->AIJ->m,n,i,*idx;

323:   VecSet(&zero,yy);
324:   VecGetArrayFast(xx,&x);
325:   VecGetArrayFast(yy,&y);
326: 
327:   for (i=0; i<m; i++) {
328:     idx    = a->j + a->i[i];
329:     v      = a->a + a->i[i];
330:     n      = a->i[i+1] - a->i[i];
331:     alpha1 = x[3*i];
332:     alpha2 = x[3*i+1];
333:     alpha3 = x[3*i+2];
334:     while (n-->0) {
335:       y[3*(*idx)]   += alpha1*(*v);
336:       y[3*(*idx)+1] += alpha2*(*v);
337:       y[3*(*idx)+2] += alpha3*(*v);
338:       idx++; v++;
339:     }
340:   }
341:   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
342:   VecRestoreArrayFast(xx,&x);
343:   VecRestoreArrayFast(yy,&y);
344:   return(0);
345: }

349: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
350: {
351:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
352:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
353:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
354:   int           ierr,m = b->AIJ->m,*idx,*ii;
355:   int           n,i,jrow,j;

358:   if (yy != zz) {VecCopy(yy,zz);}
359:   VecGetArrayFast(xx,&x);
360:   VecGetArrayFast(zz,&y);
361:   idx  = a->j;
362:   v    = a->a;
363:   ii   = a->i;

365:   for (i=0; i<m; i++) {
366:     jrow = ii[i];
367:     n    = ii[i+1] - jrow;
368:     sum1  = 0.0;
369:     sum2  = 0.0;
370:     sum3  = 0.0;
371:     for (j=0; j<n; j++) {
372:       sum1 += v[jrow]*x[3*idx[jrow]];
373:       sum2 += v[jrow]*x[3*idx[jrow]+1];
374:       sum3 += v[jrow]*x[3*idx[jrow]+2];
375:       jrow++;
376:      }
377:     y[3*i]   += sum1;
378:     y[3*i+1] += sum2;
379:     y[3*i+2] += sum3;
380:   }

382:   PetscLogFlops(6*a->nz);
383:   VecRestoreArrayFast(xx,&x);
384:   VecRestoreArrayFast(zz,&y);
385:   return(0);
386: }
389: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
390: {
391:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
392:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
393:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
394:   int           ierr,m = b->AIJ->m,n,i,*idx;

397:   if (yy != zz) {VecCopy(yy,zz);}
398:   VecGetArrayFast(xx,&x);
399:   VecGetArrayFast(zz,&y);
400:   for (i=0; i<m; i++) {
401:     idx    = a->j + a->i[i] ;
402:     v      = a->a + a->i[i] ;
403:     n      = a->i[i+1] - a->i[i];
404:     alpha1 = x[3*i];
405:     alpha2 = x[3*i+1];
406:     alpha3 = x[3*i+2];
407:     while (n-->0) {
408:       y[3*(*idx)]   += alpha1*(*v);
409:       y[3*(*idx)+1] += alpha2*(*v);
410:       y[3*(*idx)+2] += alpha3*(*v);
411:       idx++; v++;
412:     }
413:   }
414:   PetscLogFlops(6*a->nz);
415:   VecRestoreArrayFast(xx,&x);
416:   VecRestoreArrayFast(zz,&y);
417:   return(0);
418: }

420: /* ------------------------------------------------------------------------------*/
423: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
424: {
425:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
426:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
427:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
428:   int           ierr,m = b->AIJ->m,*idx,*ii;
429:   int           n,i,jrow,j;

432:   VecGetArrayFast(xx,&x);
433:   VecGetArrayFast(yy,&y);
434:   idx  = a->j;
435:   v    = a->a;
436:   ii   = a->i;

438:   for (i=0; i<m; i++) {
439:     jrow = ii[i];
440:     n    = ii[i+1] - jrow;
441:     sum1  = 0.0;
442:     sum2  = 0.0;
443:     sum3  = 0.0;
444:     sum4  = 0.0;
445:     for (j=0; j<n; j++) {
446:       sum1 += v[jrow]*x[4*idx[jrow]];
447:       sum2 += v[jrow]*x[4*idx[jrow]+1];
448:       sum3 += v[jrow]*x[4*idx[jrow]+2];
449:       sum4 += v[jrow]*x[4*idx[jrow]+3];
450:       jrow++;
451:      }
452:     y[4*i]   = sum1;
453:     y[4*i+1] = sum2;
454:     y[4*i+2] = sum3;
455:     y[4*i+3] = sum4;
456:   }

458:   PetscLogFlops(8*a->nz - 4*m);
459:   VecRestoreArrayFast(xx,&x);
460:   VecRestoreArrayFast(yy,&y);
461:   return(0);
462: }

466: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467: {
468:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
469:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
470:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471:   int           ierr,m = b->AIJ->m,n,i,*idx;

474:   VecSet(&zero,yy);
475:   VecGetArrayFast(xx,&x);
476:   VecGetArrayFast(yy,&y);
477:   for (i=0; i<m; i++) {
478:     idx    = a->j + a->i[i] ;
479:     v      = a->a + a->i[i] ;
480:     n      = a->i[i+1] - a->i[i];
481:     alpha1 = x[4*i];
482:     alpha2 = x[4*i+1];
483:     alpha3 = x[4*i+2];
484:     alpha4 = x[4*i+3];
485:     while (n-->0) {
486:       y[4*(*idx)]   += alpha1*(*v);
487:       y[4*(*idx)+1] += alpha2*(*v);
488:       y[4*(*idx)+2] += alpha3*(*v);
489:       y[4*(*idx)+3] += alpha4*(*v);
490:       idx++; v++;
491:     }
492:   }
493:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
494:   VecRestoreArrayFast(xx,&x);
495:   VecRestoreArrayFast(yy,&y);
496:   return(0);
497: }

501: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502: {
503:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
504:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
505:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
506:   int           ierr,m = b->AIJ->m,*idx,*ii;
507:   int           n,i,jrow,j;

510:   if (yy != zz) {VecCopy(yy,zz);}
511:   VecGetArrayFast(xx,&x);
512:   VecGetArrayFast(zz,&y);
513:   idx  = a->j;
514:   v    = a->a;
515:   ii   = a->i;

517:   for (i=0; i<m; i++) {
518:     jrow = ii[i];
519:     n    = ii[i+1] - jrow;
520:     sum1  = 0.0;
521:     sum2  = 0.0;
522:     sum3  = 0.0;
523:     sum4  = 0.0;
524:     for (j=0; j<n; j++) {
525:       sum1 += v[jrow]*x[4*idx[jrow]];
526:       sum2 += v[jrow]*x[4*idx[jrow]+1];
527:       sum3 += v[jrow]*x[4*idx[jrow]+2];
528:       sum4 += v[jrow]*x[4*idx[jrow]+3];
529:       jrow++;
530:      }
531:     y[4*i]   += sum1;
532:     y[4*i+1] += sum2;
533:     y[4*i+2] += sum3;
534:     y[4*i+3] += sum4;
535:   }

537:   PetscLogFlops(8*a->nz - 4*m);
538:   VecRestoreArrayFast(xx,&x);
539:   VecRestoreArrayFast(zz,&y);
540:   return(0);
541: }
544: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
545: {
546:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
547:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
548:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
549:   int           ierr,m = b->AIJ->m,n,i,*idx;

552:   if (yy != zz) {VecCopy(yy,zz);}
553:   VecGetArrayFast(xx,&x);
554:   VecGetArrayFast(zz,&y);
555: 
556:   for (i=0; i<m; i++) {
557:     idx    = a->j + a->i[i] ;
558:     v      = a->a + a->i[i] ;
559:     n      = a->i[i+1] - a->i[i];
560:     alpha1 = x[4*i];
561:     alpha2 = x[4*i+1];
562:     alpha3 = x[4*i+2];
563:     alpha4 = x[4*i+3];
564:     while (n-->0) {
565:       y[4*(*idx)]   += alpha1*(*v);
566:       y[4*(*idx)+1] += alpha2*(*v);
567:       y[4*(*idx)+2] += alpha3*(*v);
568:       y[4*(*idx)+3] += alpha4*(*v);
569:       idx++; v++;
570:     }
571:   }
572:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
573:   VecRestoreArrayFast(xx,&x);
574:   VecRestoreArrayFast(zz,&y);
575:   return(0);
576: }
577: /* ------------------------------------------------------------------------------*/

581: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
582: {
583:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
584:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
585:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
586:   int           ierr,m = b->AIJ->m,*idx,*ii;
587:   int           n,i,jrow,j;

590:   VecGetArrayFast(xx,&x);
591:   VecGetArrayFast(yy,&y);
592:   idx  = a->j;
593:   v    = a->a;
594:   ii   = a->i;

596:   for (i=0; i<m; i++) {
597:     jrow = ii[i];
598:     n    = ii[i+1] - jrow;
599:     sum1  = 0.0;
600:     sum2  = 0.0;
601:     sum3  = 0.0;
602:     sum4  = 0.0;
603:     sum5  = 0.0;
604:     for (j=0; j<n; j++) {
605:       sum1 += v[jrow]*x[5*idx[jrow]];
606:       sum2 += v[jrow]*x[5*idx[jrow]+1];
607:       sum3 += v[jrow]*x[5*idx[jrow]+2];
608:       sum4 += v[jrow]*x[5*idx[jrow]+3];
609:       sum5 += v[jrow]*x[5*idx[jrow]+4];
610:       jrow++;
611:      }
612:     y[5*i]   = sum1;
613:     y[5*i+1] = sum2;
614:     y[5*i+2] = sum3;
615:     y[5*i+3] = sum4;
616:     y[5*i+4] = sum5;
617:   }

619:   PetscLogFlops(10*a->nz - 5*m);
620:   VecRestoreArrayFast(xx,&x);
621:   VecRestoreArrayFast(yy,&y);
622:   return(0);
623: }

627: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
628: {
629:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
630:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
631:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
632:   int           ierr,m = b->AIJ->m,n,i,*idx;

635:   VecSet(&zero,yy);
636:   VecGetArrayFast(xx,&x);
637:   VecGetArrayFast(yy,&y);
638: 
639:   for (i=0; i<m; i++) {
640:     idx    = a->j + a->i[i] ;
641:     v      = a->a + a->i[i] ;
642:     n      = a->i[i+1] - a->i[i];
643:     alpha1 = x[5*i];
644:     alpha2 = x[5*i+1];
645:     alpha3 = x[5*i+2];
646:     alpha4 = x[5*i+3];
647:     alpha5 = x[5*i+4];
648:     while (n-->0) {
649:       y[5*(*idx)]   += alpha1*(*v);
650:       y[5*(*idx)+1] += alpha2*(*v);
651:       y[5*(*idx)+2] += alpha3*(*v);
652:       y[5*(*idx)+3] += alpha4*(*v);
653:       y[5*(*idx)+4] += alpha5*(*v);
654:       idx++; v++;
655:     }
656:   }
657:   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
658:   VecRestoreArrayFast(xx,&x);
659:   VecRestoreArrayFast(yy,&y);
660:   return(0);
661: }

665: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
666: {
667:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
668:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
669:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
670:   int           ierr,m = b->AIJ->m,*idx,*ii;
671:   int           n,i,jrow,j;

674:   if (yy != zz) {VecCopy(yy,zz);}
675:   VecGetArrayFast(xx,&x);
676:   VecGetArrayFast(zz,&y);
677:   idx  = a->j;
678:   v    = a->a;
679:   ii   = a->i;

681:   for (i=0; i<m; i++) {
682:     jrow = ii[i];
683:     n    = ii[i+1] - jrow;
684:     sum1  = 0.0;
685:     sum2  = 0.0;
686:     sum3  = 0.0;
687:     sum4  = 0.0;
688:     sum5  = 0.0;
689:     for (j=0; j<n; j++) {
690:       sum1 += v[jrow]*x[5*idx[jrow]];
691:       sum2 += v[jrow]*x[5*idx[jrow]+1];
692:       sum3 += v[jrow]*x[5*idx[jrow]+2];
693:       sum4 += v[jrow]*x[5*idx[jrow]+3];
694:       sum5 += v[jrow]*x[5*idx[jrow]+4];
695:       jrow++;
696:      }
697:     y[5*i]   += sum1;
698:     y[5*i+1] += sum2;
699:     y[5*i+2] += sum3;
700:     y[5*i+3] += sum4;
701:     y[5*i+4] += sum5;
702:   }

704:   PetscLogFlops(10*a->nz);
705:   VecRestoreArrayFast(xx,&x);
706:   VecRestoreArrayFast(zz,&y);
707:   return(0);
708: }

712: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
713: {
714:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
715:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
716:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
717:   int           ierr,m = b->AIJ->m,n,i,*idx;

720:   if (yy != zz) {VecCopy(yy,zz);}
721:   VecGetArrayFast(xx,&x);
722:   VecGetArrayFast(zz,&y);
723: 
724:   for (i=0; i<m; i++) {
725:     idx    = a->j + a->i[i] ;
726:     v      = a->a + a->i[i] ;
727:     n      = a->i[i+1] - a->i[i];
728:     alpha1 = x[5*i];
729:     alpha2 = x[5*i+1];
730:     alpha3 = x[5*i+2];
731:     alpha4 = x[5*i+3];
732:     alpha5 = x[5*i+4];
733:     while (n-->0) {
734:       y[5*(*idx)]   += alpha1*(*v);
735:       y[5*(*idx)+1] += alpha2*(*v);
736:       y[5*(*idx)+2] += alpha3*(*v);
737:       y[5*(*idx)+3] += alpha4*(*v);
738:       y[5*(*idx)+4] += alpha5*(*v);
739:       idx++; v++;
740:     }
741:   }
742:   PetscLogFlops(10*a->nz);
743:   VecRestoreArrayFast(xx,&x);
744:   VecRestoreArrayFast(zz,&y);
745:   return(0);
746: }

748: /* ------------------------------------------------------------------------------*/
751: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
752: {
753:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
754:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
755:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
756:   int           ierr,m = b->AIJ->m,*idx,*ii;
757:   int           n,i,jrow,j;

760:   VecGetArrayFast(xx,&x);
761:   VecGetArrayFast(yy,&y);
762:   idx  = a->j;
763:   v    = a->a;
764:   ii   = a->i;

766:   for (i=0; i<m; i++) {
767:     jrow = ii[i];
768:     n    = ii[i+1] - jrow;
769:     sum1  = 0.0;
770:     sum2  = 0.0;
771:     sum3  = 0.0;
772:     sum4  = 0.0;
773:     sum5  = 0.0;
774:     sum6  = 0.0;
775:     for (j=0; j<n; j++) {
776:       sum1 += v[jrow]*x[6*idx[jrow]];
777:       sum2 += v[jrow]*x[6*idx[jrow]+1];
778:       sum3 += v[jrow]*x[6*idx[jrow]+2];
779:       sum4 += v[jrow]*x[6*idx[jrow]+3];
780:       sum5 += v[jrow]*x[6*idx[jrow]+4];
781:       sum6 += v[jrow]*x[6*idx[jrow]+5];
782:       jrow++;
783:      }
784:     y[6*i]   = sum1;
785:     y[6*i+1] = sum2;
786:     y[6*i+2] = sum3;
787:     y[6*i+3] = sum4;
788:     y[6*i+4] = sum5;
789:     y[6*i+5] = sum6;
790:   }

792:   PetscLogFlops(12*a->nz - 6*m);
793:   VecRestoreArrayFast(xx,&x);
794:   VecRestoreArrayFast(yy,&y);
795:   return(0);
796: }

800: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
801: {
802:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
803:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
804:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
805:   int           ierr,m = b->AIJ->m,n,i,*idx;

808:   VecSet(&zero,yy);
809:   VecGetArrayFast(xx,&x);
810:   VecGetArrayFast(yy,&y);

812:   for (i=0; i<m; i++) {
813:     idx    = a->j + a->i[i] ;
814:     v      = a->a + a->i[i] ;
815:     n      = a->i[i+1] - a->i[i];
816:     alpha1 = x[6*i];
817:     alpha2 = x[6*i+1];
818:     alpha3 = x[6*i+2];
819:     alpha4 = x[6*i+3];
820:     alpha5 = x[6*i+4];
821:     alpha6 = x[6*i+5];
822:     while (n-->0) {
823:       y[6*(*idx)]   += alpha1*(*v);
824:       y[6*(*idx)+1] += alpha2*(*v);
825:       y[6*(*idx)+2] += alpha3*(*v);
826:       y[6*(*idx)+3] += alpha4*(*v);
827:       y[6*(*idx)+4] += alpha5*(*v);
828:       y[6*(*idx)+5] += alpha6*(*v);
829:       idx++; v++;
830:     }
831:   }
832:   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
833:   VecRestoreArrayFast(xx,&x);
834:   VecRestoreArrayFast(yy,&y);
835:   return(0);
836: }

840: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
841: {
842:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
843:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
844:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
845:   int           ierr,m = b->AIJ->m,*idx,*ii;
846:   int           n,i,jrow,j;

849:   if (yy != zz) {VecCopy(yy,zz);}
850:   VecGetArrayFast(xx,&x);
851:   VecGetArrayFast(zz,&y);
852:   idx  = a->j;
853:   v    = a->a;
854:   ii   = a->i;

856:   for (i=0; i<m; i++) {
857:     jrow = ii[i];
858:     n    = ii[i+1] - jrow;
859:     sum1  = 0.0;
860:     sum2  = 0.0;
861:     sum3  = 0.0;
862:     sum4  = 0.0;
863:     sum5  = 0.0;
864:     sum6  = 0.0;
865:     for (j=0; j<n; j++) {
866:       sum1 += v[jrow]*x[6*idx[jrow]];
867:       sum2 += v[jrow]*x[6*idx[jrow]+1];
868:       sum3 += v[jrow]*x[6*idx[jrow]+2];
869:       sum4 += v[jrow]*x[6*idx[jrow]+3];
870:       sum5 += v[jrow]*x[6*idx[jrow]+4];
871:       sum6 += v[jrow]*x[6*idx[jrow]+5];
872:       jrow++;
873:      }
874:     y[6*i]   += sum1;
875:     y[6*i+1] += sum2;
876:     y[6*i+2] += sum3;
877:     y[6*i+3] += sum4;
878:     y[6*i+4] += sum5;
879:     y[6*i+5] += sum6;
880:   }

882:   PetscLogFlops(12*a->nz);
883:   VecRestoreArrayFast(xx,&x);
884:   VecRestoreArrayFast(zz,&y);
885:   return(0);
886: }

890: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
891: {
892:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
893:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
894:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
895:   int           ierr,m = b->AIJ->m,n,i,*idx;

898:   if (yy != zz) {VecCopy(yy,zz);}
899:   VecGetArrayFast(xx,&x);
900:   VecGetArrayFast(zz,&y);
901: 
902:   for (i=0; i<m; i++) {
903:     idx    = a->j + a->i[i] ;
904:     v      = a->a + a->i[i] ;
905:     n      = a->i[i+1] - a->i[i];
906:     alpha1 = x[6*i];
907:     alpha2 = x[6*i+1];
908:     alpha3 = x[6*i+2];
909:     alpha4 = x[6*i+3];
910:     alpha5 = x[6*i+4];
911:     alpha6 = x[6*i+5];
912:     while (n-->0) {
913:       y[6*(*idx)]   += alpha1*(*v);
914:       y[6*(*idx)+1] += alpha2*(*v);
915:       y[6*(*idx)+2] += alpha3*(*v);
916:       y[6*(*idx)+3] += alpha4*(*v);
917:       y[6*(*idx)+4] += alpha5*(*v);
918:       y[6*(*idx)+5] += alpha6*(*v);
919:       idx++; v++;
920:     }
921:   }
922:   PetscLogFlops(12*a->nz);
923:   VecRestoreArrayFast(xx,&x);
924:   VecRestoreArrayFast(zz,&y);
925:   return(0);
926: }

928: /* ------------------------------------------------------------------------------*/
931: int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
932: {
933:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
934:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
935:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
936:   int           ierr,m = b->AIJ->m,*idx,*ii;
937:   int           n,i,jrow,j;

940:   VecGetArrayFast(xx,&x);
941:   VecGetArrayFast(yy,&y);
942:   idx  = a->j;
943:   v    = a->a;
944:   ii   = a->i;

946:   for (i=0; i<m; i++) {
947:     jrow = ii[i];
948:     n    = ii[i+1] - jrow;
949:     sum1  = 0.0;
950:     sum2  = 0.0;
951:     sum3  = 0.0;
952:     sum4  = 0.0;
953:     sum5  = 0.0;
954:     sum6  = 0.0;
955:     sum7  = 0.0;
956:     sum8  = 0.0;
957:     for (j=0; j<n; j++) {
958:       sum1 += v[jrow]*x[8*idx[jrow]];
959:       sum2 += v[jrow]*x[8*idx[jrow]+1];
960:       sum3 += v[jrow]*x[8*idx[jrow]+2];
961:       sum4 += v[jrow]*x[8*idx[jrow]+3];
962:       sum5 += v[jrow]*x[8*idx[jrow]+4];
963:       sum6 += v[jrow]*x[8*idx[jrow]+5];
964:       sum7 += v[jrow]*x[8*idx[jrow]+6];
965:       sum8 += v[jrow]*x[8*idx[jrow]+7];
966:       jrow++;
967:      }
968:     y[8*i]   = sum1;
969:     y[8*i+1] = sum2;
970:     y[8*i+2] = sum3;
971:     y[8*i+3] = sum4;
972:     y[8*i+4] = sum5;
973:     y[8*i+5] = sum6;
974:     y[8*i+6] = sum7;
975:     y[8*i+7] = sum8;
976:   }

978:   PetscLogFlops(16*a->nz - 8*m);
979:   VecRestoreArrayFast(xx,&x);
980:   VecRestoreArrayFast(yy,&y);
981:   return(0);
982: }

986: int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
987: {
988:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
989:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
990:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
991:   int           ierr,m = b->AIJ->m,n,i,*idx;

994:   VecSet(&zero,yy);
995:   VecGetArrayFast(xx,&x);
996:   VecGetArrayFast(yy,&y);

998:   for (i=0; i<m; i++) {
999:     idx    = a->j + a->i[i] ;
1000:     v      = a->a + a->i[i] ;
1001:     n      = a->i[i+1] - a->i[i];
1002:     alpha1 = x[8*i];
1003:     alpha2 = x[8*i+1];
1004:     alpha3 = x[8*i+2];
1005:     alpha4 = x[8*i+3];
1006:     alpha5 = x[8*i+4];
1007:     alpha6 = x[8*i+5];
1008:     alpha7 = x[8*i+6];
1009:     alpha8 = x[8*i+7];
1010:     while (n-->0) {
1011:       y[8*(*idx)]   += alpha1*(*v);
1012:       y[8*(*idx)+1] += alpha2*(*v);
1013:       y[8*(*idx)+2] += alpha3*(*v);
1014:       y[8*(*idx)+3] += alpha4*(*v);
1015:       y[8*(*idx)+4] += alpha5*(*v);
1016:       y[8*(*idx)+5] += alpha6*(*v);
1017:       y[8*(*idx)+6] += alpha7*(*v);
1018:       y[8*(*idx)+7] += alpha8*(*v);
1019:       idx++; v++;
1020:     }
1021:   }
1022:   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1023:   VecRestoreArrayFast(xx,&x);
1024:   VecRestoreArrayFast(yy,&y);
1025:   return(0);
1026: }

1030: int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1031: {
1032:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1033:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1034:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1035:   int           ierr,m = b->AIJ->m,*idx,*ii;
1036:   int           n,i,jrow,j;

1039:   if (yy != zz) {VecCopy(yy,zz);}
1040:   VecGetArrayFast(xx,&x);
1041:   VecGetArrayFast(zz,&y);
1042:   idx  = a->j;
1043:   v    = a->a;
1044:   ii   = a->i;

1046:   for (i=0; i<m; i++) {
1047:     jrow = ii[i];
1048:     n    = ii[i+1] - jrow;
1049:     sum1  = 0.0;
1050:     sum2  = 0.0;
1051:     sum3  = 0.0;
1052:     sum4  = 0.0;
1053:     sum5  = 0.0;
1054:     sum6  = 0.0;
1055:     sum7  = 0.0;
1056:     sum8  = 0.0;
1057:     for (j=0; j<n; j++) {
1058:       sum1 += v[jrow]*x[8*idx[jrow]];
1059:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1060:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1061:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1062:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1063:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1064:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1065:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1066:       jrow++;
1067:      }
1068:     y[8*i]   += sum1;
1069:     y[8*i+1] += sum2;
1070:     y[8*i+2] += sum3;
1071:     y[8*i+3] += sum4;
1072:     y[8*i+4] += sum5;
1073:     y[8*i+5] += sum6;
1074:     y[8*i+6] += sum7;
1075:     y[8*i+7] += sum8;
1076:   }

1078:   PetscLogFlops(16*a->nz);
1079:   VecRestoreArrayFast(xx,&x);
1080:   VecRestoreArrayFast(zz,&y);
1081:   return(0);
1082: }

1086: int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1087: {
1088:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1089:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1090:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1091:   int           ierr,m = b->AIJ->m,n,i,*idx;

1094:   if (yy != zz) {VecCopy(yy,zz);}
1095:   VecGetArrayFast(xx,&x);
1096:   VecGetArrayFast(zz,&y);
1097:   for (i=0; i<m; i++) {
1098:     idx    = a->j + a->i[i] ;
1099:     v      = a->a + a->i[i] ;
1100:     n      = a->i[i+1] - a->i[i];
1101:     alpha1 = x[8*i];
1102:     alpha2 = x[8*i+1];
1103:     alpha3 = x[8*i+2];
1104:     alpha4 = x[8*i+3];
1105:     alpha5 = x[8*i+4];
1106:     alpha6 = x[8*i+5];
1107:     alpha7 = x[8*i+6];
1108:     alpha8 = x[8*i+7];
1109:     while (n-->0) {
1110:       y[8*(*idx)]   += alpha1*(*v);
1111:       y[8*(*idx)+1] += alpha2*(*v);
1112:       y[8*(*idx)+2] += alpha3*(*v);
1113:       y[8*(*idx)+3] += alpha4*(*v);
1114:       y[8*(*idx)+4] += alpha5*(*v);
1115:       y[8*(*idx)+5] += alpha6*(*v);
1116:       y[8*(*idx)+6] += alpha7*(*v);
1117:       y[8*(*idx)+7] += alpha8*(*v);
1118:       idx++; v++;
1119:     }
1120:   }
1121:   PetscLogFlops(16*a->nz);
1122:   VecRestoreArrayFast(xx,&x);
1123:   VecRestoreArrayFast(zz,&y);
1124:   return(0);
1125: }

1127: /*--------------------------------------------------------------------------------------------*/
1130: int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1131: {
1132:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1133:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1134:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1135:   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1136:   int           ierr,m = b->AIJ->m,*idx,*ii;
1137:   int           n,i,jrow,j;

1140:   VecGetArrayFast(xx,&x);
1141:   VecGetArrayFast(yy,&y);
1142:   idx  = a->j;
1143:   v    = a->a;
1144:   ii   = a->i;

1146:   for (i=0; i<m; i++) {
1147:     jrow = ii[i];
1148:     n    = ii[i+1] - jrow;
1149:     sum1  = 0.0;
1150:     sum2  = 0.0;
1151:     sum3  = 0.0;
1152:     sum4  = 0.0;
1153:     sum5  = 0.0;
1154:     sum6  = 0.0;
1155:     sum7  = 0.0;
1156:     sum8  = 0.0;
1157:     sum9  = 0.0;
1158:     sum10 = 0.0;
1159:     sum11 = 0.0;
1160:     sum12 = 0.0;
1161:     sum13 = 0.0;
1162:     sum14 = 0.0;
1163:     sum15 = 0.0;
1164:     sum16 = 0.0;
1165:     for (j=0; j<n; j++) {
1166:       sum1  += v[jrow]*x[16*idx[jrow]];
1167:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1168:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1169:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1170:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1171:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1172:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1173:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1174:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1175:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1176:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1177:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1178:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1179:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1180:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1181:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1182:       jrow++;
1183:      }
1184:     y[16*i]    = sum1;
1185:     y[16*i+1]  = sum2;
1186:     y[16*i+2]  = sum3;
1187:     y[16*i+3]  = sum4;
1188:     y[16*i+4]  = sum5;
1189:     y[16*i+5]  = sum6;
1190:     y[16*i+6]  = sum7;
1191:     y[16*i+7]  = sum8;
1192:     y[16*i+8]  = sum9;
1193:     y[16*i+9]  = sum10;
1194:     y[16*i+10] = sum11;
1195:     y[16*i+11] = sum12;
1196:     y[16*i+12] = sum13;
1197:     y[16*i+13] = sum14;
1198:     y[16*i+14] = sum15;
1199:     y[16*i+15] = sum16;
1200:   }

1202:   PetscLogFlops(32*a->nz - 16*m);
1203:   VecRestoreArrayFast(xx,&x);
1204:   VecRestoreArrayFast(yy,&y);
1205:   return(0);
1206: }

1210: int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1211: {
1212:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1213:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1214:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1215:   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1216:   int           ierr,m = b->AIJ->m,n,i,*idx;

1219:   VecSet(&zero,yy);
1220:   VecGetArrayFast(xx,&x);
1221:   VecGetArrayFast(yy,&y);

1223:   for (i=0; i<m; i++) {
1224:     idx    = a->j + a->i[i] ;
1225:     v      = a->a + a->i[i] ;
1226:     n      = a->i[i+1] - a->i[i];
1227:     alpha1  = x[16*i];
1228:     alpha2  = x[16*i+1];
1229:     alpha3  = x[16*i+2];
1230:     alpha4  = x[16*i+3];
1231:     alpha5  = x[16*i+4];
1232:     alpha6  = x[16*i+5];
1233:     alpha7  = x[16*i+6];
1234:     alpha8  = x[16*i+7];
1235:     alpha9  = x[16*i+8];
1236:     alpha10 = x[16*i+9];
1237:     alpha11 = x[16*i+10];
1238:     alpha12 = x[16*i+11];
1239:     alpha13 = x[16*i+12];
1240:     alpha14 = x[16*i+13];
1241:     alpha15 = x[16*i+14];
1242:     alpha16 = x[16*i+15];
1243:     while (n-->0) {
1244:       y[16*(*idx)]    += alpha1*(*v);
1245:       y[16*(*idx)+1]  += alpha2*(*v);
1246:       y[16*(*idx)+2]  += alpha3*(*v);
1247:       y[16*(*idx)+3]  += alpha4*(*v);
1248:       y[16*(*idx)+4]  += alpha5*(*v);
1249:       y[16*(*idx)+5]  += alpha6*(*v);
1250:       y[16*(*idx)+6]  += alpha7*(*v);
1251:       y[16*(*idx)+7]  += alpha8*(*v);
1252:       y[16*(*idx)+8]  += alpha9*(*v);
1253:       y[16*(*idx)+9]  += alpha10*(*v);
1254:       y[16*(*idx)+10] += alpha11*(*v);
1255:       y[16*(*idx)+11] += alpha12*(*v);
1256:       y[16*(*idx)+12] += alpha13*(*v);
1257:       y[16*(*idx)+13] += alpha14*(*v);
1258:       y[16*(*idx)+14] += alpha15*(*v);
1259:       y[16*(*idx)+15] += alpha16*(*v);
1260:       idx++; v++;
1261:     }
1262:   }
1263:   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1264:   VecRestoreArrayFast(xx,&x);
1265:   VecRestoreArrayFast(yy,&y);
1266:   return(0);
1267: }

1271: int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1272: {
1273:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1274:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1275:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1276:   PetscScalar   sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1277:   int           ierr,m = b->AIJ->m,*idx,*ii;
1278:   int           n,i,jrow,j;

1281:   if (yy != zz) {VecCopy(yy,zz);}
1282:   VecGetArrayFast(xx,&x);
1283:   VecGetArrayFast(zz,&y);
1284:   idx  = a->j;
1285:   v    = a->a;
1286:   ii   = a->i;

1288:   for (i=0; i<m; i++) {
1289:     jrow = ii[i];
1290:     n    = ii[i+1] - jrow;
1291:     sum1  = 0.0;
1292:     sum2  = 0.0;
1293:     sum3  = 0.0;
1294:     sum4  = 0.0;
1295:     sum5  = 0.0;
1296:     sum6  = 0.0;
1297:     sum7  = 0.0;
1298:     sum8  = 0.0;
1299:     sum9  = 0.0;
1300:     sum10 = 0.0;
1301:     sum11 = 0.0;
1302:     sum12 = 0.0;
1303:     sum13 = 0.0;
1304:     sum14 = 0.0;
1305:     sum15 = 0.0;
1306:     sum16 = 0.0;
1307:     for (j=0; j<n; j++) {
1308:       sum1  += v[jrow]*x[16*idx[jrow]];
1309:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1310:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1311:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1312:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1313:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1314:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1315:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1316:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1317:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1318:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1319:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1320:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1321:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1322:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1323:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1324:       jrow++;
1325:      }
1326:     y[16*i]    += sum1;
1327:     y[16*i+1]  += sum2;
1328:     y[16*i+2]  += sum3;
1329:     y[16*i+3]  += sum4;
1330:     y[16*i+4]  += sum5;
1331:     y[16*i+5]  += sum6;
1332:     y[16*i+6]  += sum7;
1333:     y[16*i+7]  += sum8;
1334:     y[16*i+8]  += sum9;
1335:     y[16*i+9]  += sum10;
1336:     y[16*i+10] += sum11;
1337:     y[16*i+11] += sum12;
1338:     y[16*i+12] += sum13;
1339:     y[16*i+13] += sum14;
1340:     y[16*i+14] += sum15;
1341:     y[16*i+15] += sum16;
1342:   }

1344:   PetscLogFlops(32*a->nz);
1345:   VecRestoreArrayFast(xx,&x);
1346:   VecRestoreArrayFast(zz,&y);
1347:   return(0);
1348: }

1352: int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1353: {
1354:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1355:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1356:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1357:   PetscScalar   alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1358:   int           ierr,m = b->AIJ->m,n,i,*idx;

1361:   if (yy != zz) {VecCopy(yy,zz);}
1362:   VecGetArrayFast(xx,&x);
1363:   VecGetArrayFast(zz,&y);
1364:   for (i=0; i<m; i++) {
1365:     idx    = a->j + a->i[i] ;
1366:     v      = a->a + a->i[i] ;
1367:     n      = a->i[i+1] - a->i[i];
1368:     alpha1 = x[16*i];
1369:     alpha2 = x[16*i+1];
1370:     alpha3 = x[16*i+2];
1371:     alpha4 = x[16*i+3];
1372:     alpha5 = x[16*i+4];
1373:     alpha6 = x[16*i+5];
1374:     alpha7 = x[16*i+6];
1375:     alpha8 = x[16*i+7];
1376:     alpha9  = x[16*i+8];
1377:     alpha10 = x[16*i+9];
1378:     alpha11 = x[16*i+10];
1379:     alpha12 = x[16*i+11];
1380:     alpha13 = x[16*i+12];
1381:     alpha14 = x[16*i+13];
1382:     alpha15 = x[16*i+14];
1383:     alpha16 = x[16*i+15];
1384:     while (n-->0) {
1385:       y[16*(*idx)]   += alpha1*(*v);
1386:       y[16*(*idx)+1] += alpha2*(*v);
1387:       y[16*(*idx)+2] += alpha3*(*v);
1388:       y[16*(*idx)+3] += alpha4*(*v);
1389:       y[16*(*idx)+4] += alpha5*(*v);
1390:       y[16*(*idx)+5] += alpha6*(*v);
1391:       y[16*(*idx)+6] += alpha7*(*v);
1392:       y[16*(*idx)+7] += alpha8*(*v);
1393:       y[16*(*idx)+8]  += alpha9*(*v);
1394:       y[16*(*idx)+9]  += alpha10*(*v);
1395:       y[16*(*idx)+10] += alpha11*(*v);
1396:       y[16*(*idx)+11] += alpha12*(*v);
1397:       y[16*(*idx)+12] += alpha13*(*v);
1398:       y[16*(*idx)+13] += alpha14*(*v);
1399:       y[16*(*idx)+14] += alpha15*(*v);
1400:       y[16*(*idx)+15] += alpha16*(*v);
1401:       idx++; v++;
1402:     }
1403:   }
1404:   PetscLogFlops(32*a->nz);
1405:   VecRestoreArrayFast(xx,&x);
1406:   VecRestoreArrayFast(zz,&y);
1407:   return(0);
1408: }

1410: /*===================================================================================*/
1413: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1414: {
1415:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1416:   int         ierr;

1419:   /* start the scatter */
1420:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1421:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1422:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1423:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1424:   return(0);
1425: }

1429: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1430: {
1431:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1432:   int         ierr;
1434:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1435:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1436:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1437:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1438:   return(0);
1439: }

1443: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1444: {
1445:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1446:   int         ierr;

1449:   /* start the scatter */
1450:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1451:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1452:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1453:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
1454:   return(0);
1455: }

1459: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1460: {
1461:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1462:   int         ierr;
1464:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1465:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1466:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1467:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1468:   return(0);
1469: }

1471: /* ---------------------------------------------------------------------------------- */
1472: /*MC
1473:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
1474:   operations for multicomponent problems.  It interpolates each component the same
1475:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
1476:   and MATMPIAIJ for distributed matrices.

1478:   Operations provided:
1479: . MatMult
1480: . MatMultTranspose
1481: . MatMultAdd
1482: . MatMultTransposeAdd

1484:   Level: advanced

1486: M*/
1489: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1490: {
1491:   int         ierr,size,n;
1492:   Mat_MPIMAIJ *b;
1493:   Mat         B;

1496:   PetscObjectReference((PetscObject)A);

1498:   if (dof == 1) {
1499:     *maij = A;
1500:   } else {
1501:     MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
1502:     B->assembled    = PETSC_TRUE;

1504:     MPI_Comm_size(A->comm,&size);
1505:     if (size == 1) {
1506:       MatSetType(B,MATSEQMAIJ);
1507:       B->ops->destroy = MatDestroy_SeqMAIJ;
1508:       b      = (Mat_MPIMAIJ*)B->data;
1509:       b->dof = dof;
1510:       b->AIJ = A;
1511:       if (dof == 2) {
1512:         B->ops->mult             = MatMult_SeqMAIJ_2;
1513:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1514:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1515:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1516:       } else if (dof == 3) {
1517:         B->ops->mult             = MatMult_SeqMAIJ_3;
1518:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1519:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1520:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1521:       } else if (dof == 4) {
1522:         B->ops->mult             = MatMult_SeqMAIJ_4;
1523:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1524:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1525:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1526:       } else if (dof == 5) {
1527:         B->ops->mult             = MatMult_SeqMAIJ_5;
1528:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1529:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1530:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1531:       } else if (dof == 6) {
1532:         B->ops->mult             = MatMult_SeqMAIJ_6;
1533:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1534:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1535:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1536:       } else if (dof == 8) {
1537:         B->ops->mult             = MatMult_SeqMAIJ_8;
1538:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1539:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1540:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1541:       } else if (dof == 16) {
1542:         B->ops->mult             = MatMult_SeqMAIJ_16;
1543:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1544:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1545:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1546:       } else {
1547:         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1548:       }
1549:     } else {
1550:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1551:       IS         from,to;
1552:       Vec        gvec;
1553:       int        *garray,i;

1555:       MatSetType(B,MATMPIMAIJ);
1556:       B->ops->destroy = MatDestroy_MPIMAIJ;
1557:       b      = (Mat_MPIMAIJ*)B->data;
1558:       b->dof = dof;
1559:       b->A   = A;
1560:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1561:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

1563:       VecGetSize(mpiaij->lvec,&n);
1564:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);

1566:       /* create two temporary Index sets for build scatter gather */
1567:       PetscMalloc((n+1)*sizeof(int),&garray);
1568:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1569:       ISCreateBlock(A->comm,dof,n,garray,&from);
1570:       PetscFree(garray);
1571:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

1573:       /* create temporary global vector to generate scatter context */
1574:       VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);

1576:       /* generate the scatter context */
1577:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

1579:       ISDestroy(from);
1580:       ISDestroy(to);
1581:       VecDestroy(gvec);

1583:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1584:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1585:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1586:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1587:     }
1588:     *maij = B;
1589:   }
1590:   return(0);
1591: }