Actual source code: maij.c

  1: /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for 
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19:  #include src/mat/impls/aij/mpi/mpiaij.h

 21: typedef struct {
 22:   int        dof;         /* number of components */
 23:   Mat        AIJ;        /* representation of interpolation for one component */
 24: } Mat_SeqMAIJ;

 26: typedef struct {
 27:   int        dof;         /* number of components */
 28:   Mat        AIJ,OAIJ;    /* representation of interpolation for one component */
 29:   Mat        A;
 30:   VecScatter ctx;         /* update ghost points for parallel case */
 31:   Vec        w;           /* work space for ghost values for parallel case */
 32: } Mat_MPIMAIJ;

 34: int MatMAIJGetAIJ(Mat A,Mat *B)
 35: {
 36:   int         ierr;
 37:   PetscTruth  ismpimaij,isseqmaij;

 40:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 41:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 42:   if (ismpimaij) {
 43:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 45:     *B = b->A;
 46:   } else if (isseqmaij) {
 47:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 49:     *B = b->AIJ;
 50:   } else {
 51:     *B = A;
 52:   }
 53:   return(0);
 54: }

 56: int MatMAIJRedimension(Mat A,int dof,Mat *B)
 57: {
 59:   Mat Aij;

 62:   MatMAIJGetAIJ(A,&Aij);
 63:   MatCreateMAIJ(Aij,dof,B);
 64:   return(0);
 65: }

 67: int MatDestroy_SeqMAIJ(Mat A)
 68: {
 69:   int         ierr;
 70:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 73:   if (b->AIJ) {
 74:     MatDestroy(b->AIJ);
 75:   }
 76:   PetscFree(b);
 77:   return(0);
 78: }

 80: int MatDestroy_MPIMAIJ(Mat A)
 81: {
 82:   int         ierr;
 83:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 86:   if (b->AIJ) {
 87:     MatDestroy(b->AIJ);
 88:   }
 89:   if (b->OAIJ) {
 90:     MatDestroy(b->OAIJ);
 91:   }
 92:   if (b->A) {
 93:     MatDestroy(b->A);
 94:   }
 95:   if (b->ctx) {
 96:     VecScatterDestroy(b->ctx);
 97:   }
 98:   if (b->w) {
 99:     VecDestroy(b->w);
100:   }
101:   PetscFree(b);
102:   return(0);
103: }

105: EXTERN_C_BEGIN
106: int MatCreate_MAIJ(Mat A)
107: {
108:   int         ierr;
109:   Mat_MPIMAIJ *b;

112:   ierr     = PetscNew(Mat_MPIMAIJ,&b);
113:   A->data  = (void*)b;
114:   PetscMemzero(b,sizeof(Mat_MPIMAIJ));
115:   PetscMemzero(A->ops,sizeof(struct _MatOps));
116:   A->factor           = 0;
117:   A->mapping          = 0;

119:   b->AIJ  = 0;
120:   b->dof  = 0;
121:   b->OAIJ = 0;
122:   b->ctx  = 0;
123:   b->w    = 0;
124:   return(0);
125: }
126: EXTERN_C_END

128: /* --------------------------------------------------------------------------------------*/
129: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
130: {
131:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
132:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
133:   PetscScalar   *x,*y,*v,sum1, sum2;
134:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
135:   int           n,i,jrow,j;

138:   VecGetArray(xx,&x);
139:   VecGetArray(yy,&y);
140:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
141:   idx  = a->j;
142:   v    = a->a;
143:   ii   = a->i;

145:   v    += shift; /* shift for Fortran start by 1 indexing */
146:   idx  += shift;
147:   for (i=0; i<m; i++) {
148:     jrow = ii[i];
149:     n    = ii[i+1] - jrow;
150:     sum1  = 0.0;
151:     sum2  = 0.0;
152:     for (j=0; j<n; j++) {
153:       sum1 += v[jrow]*x[2*idx[jrow]];
154:       sum2 += v[jrow]*x[2*idx[jrow]+1];
155:       jrow++;
156:      }
157:     y[2*i]   = sum1;
158:     y[2*i+1] = sum2;
159:   }

161:   PetscLogFlops(4*a->nz - 2*m);
162:   VecRestoreArray(xx,&x);
163:   VecRestoreArray(yy,&y);
164:   return(0);
165: }

167: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
168: {
169:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
170:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
171:   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
172:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

175:   VecSet(&zero,yy);
176:   VecGetArray(xx,&x);
177:   VecGetArray(yy,&y);
178:   y = y + shift; /* shift for Fortran start by 1 indexing */
179:   for (i=0; i<m; i++) {
180:     idx    = a->j + a->i[i] + shift;
181:     v      = a->a + a->i[i] + shift;
182:     n      = a->i[i+1] - a->i[i];
183:     alpha1 = x[2*i];
184:     alpha2 = x[2*i+1];
185:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
186:   }
187:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
188:   VecRestoreArray(xx,&x);
189:   VecRestoreArray(yy,&y);
190:   return(0);
191: }

193: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
194: {
195:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
196:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
197:   PetscScalar   *x,*y,*v,sum1, sum2;
198:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
199:   int           n,i,jrow,j;

202:   if (yy != zz) {VecCopy(yy,zz);}
203:   VecGetArray(xx,&x);
204:   VecGetArray(zz,&y);
205:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
206:   idx  = a->j;
207:   v    = a->a;
208:   ii   = a->i;

210:   v    += shift; /* shift for Fortran start by 1 indexing */
211:   idx  += shift;
212:   for (i=0; i<m; i++) {
213:     jrow = ii[i];
214:     n    = ii[i+1] - jrow;
215:     sum1  = 0.0;
216:     sum2  = 0.0;
217:     for (j=0; j<n; j++) {
218:       sum1 += v[jrow]*x[2*idx[jrow]];
219:       sum2 += v[jrow]*x[2*idx[jrow]+1];
220:       jrow++;
221:      }
222:     y[2*i]   += sum1;
223:     y[2*i+1] += sum2;
224:   }

226:   PetscLogFlops(4*a->nz - 2*m);
227:   VecRestoreArray(xx,&x);
228:   VecRestoreArray(zz,&y);
229:   return(0);
230: }
231: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
232: {
233:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
234:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
235:   PetscScalar   *x,*y,*v,alpha1,alpha2;
236:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

239:   if (yy != zz) {VecCopy(yy,zz);}
240:   VecGetArray(xx,&x);
241:   VecGetArray(zz,&y);
242:   y = y + shift; /* shift for Fortran start by 1 indexing */
243:   for (i=0; i<m; i++) {
244:     idx   = a->j + a->i[i] + shift;
245:     v     = a->a + a->i[i] + shift;
246:     n     = a->i[i+1] - a->i[i];
247:     alpha1 = x[2*i];
248:     alpha2 = x[2*i+1];
249:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
250:   }
251:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
252:   VecRestoreArray(xx,&x);
253:   VecRestoreArray(zz,&y);
254:   return(0);
255: }
256: /* --------------------------------------------------------------------------------------*/
257: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
258: {
259:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
260:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
261:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
262:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
263:   int           n,i,jrow,j;

266:   VecGetArray(xx,&x);
267:   VecGetArray(yy,&y);
268:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
269:   idx  = a->j;
270:   v    = a->a;
271:   ii   = a->i;

273:   v    += shift; /* shift for Fortran start by 1 indexing */
274:   idx  += shift;
275:   for (i=0; i<m; i++) {
276:     jrow = ii[i];
277:     n    = ii[i+1] - jrow;
278:     sum1  = 0.0;
279:     sum2  = 0.0;
280:     sum3  = 0.0;
281:     for (j=0; j<n; j++) {
282:       sum1 += v[jrow]*x[3*idx[jrow]];
283:       sum2 += v[jrow]*x[3*idx[jrow]+1];
284:       sum3 += v[jrow]*x[3*idx[jrow]+2];
285:       jrow++;
286:      }
287:     y[3*i]   = sum1;
288:     y[3*i+1] = sum2;
289:     y[3*i+2] = sum3;
290:   }

292:   PetscLogFlops(6*a->nz - 3*m);
293:   VecRestoreArray(xx,&x);
294:   VecRestoreArray(yy,&y);
295:   return(0);
296: }

298: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
299: {
300:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
301:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
302:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
303:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

306:   VecSet(&zero,yy);
307:   VecGetArray(xx,&x);
308:   VecGetArray(yy,&y);
309:   y = y + shift; /* shift for Fortran start by 1 indexing */
310:   for (i=0; i<m; i++) {
311:     idx    = a->j + a->i[i] + shift;
312:     v      = a->a + a->i[i] + shift;
313:     n      = a->i[i+1] - a->i[i];
314:     alpha1 = x[3*i];
315:     alpha2 = x[3*i+1];
316:     alpha3 = x[3*i+2];
317:     while (n-->0) {
318:       y[3*(*idx)]   += alpha1*(*v);
319:       y[3*(*idx)+1] += alpha2*(*v);
320:       y[3*(*idx)+2] += alpha3*(*v);
321:       idx++; v++;
322:     }
323:   }
324:   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
325:   VecRestoreArray(xx,&x);
326:   VecRestoreArray(yy,&y);
327:   return(0);
328: }

330: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
331: {
332:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
333:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
334:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
335:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
336:   int           n,i,jrow,j;

339:   if (yy != zz) {VecCopy(yy,zz);}
340:   VecGetArray(xx,&x);
341:   VecGetArray(zz,&y);
342:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
343:   idx  = a->j;
344:   v    = a->a;
345:   ii   = a->i;

347:   v    += shift; /* shift for Fortran start by 1 indexing */
348:   idx  += shift;
349:   for (i=0; i<m; i++) {
350:     jrow = ii[i];
351:     n    = ii[i+1] - jrow;
352:     sum1  = 0.0;
353:     sum2  = 0.0;
354:     sum3  = 0.0;
355:     for (j=0; j<n; j++) {
356:       sum1 += v[jrow]*x[3*idx[jrow]];
357:       sum2 += v[jrow]*x[3*idx[jrow]+1];
358:       sum3 += v[jrow]*x[3*idx[jrow]+2];
359:       jrow++;
360:      }
361:     y[3*i]   += sum1;
362:     y[3*i+1] += sum2;
363:     y[3*i+2] += sum3;
364:   }

366:   PetscLogFlops(6*a->nz);
367:   VecRestoreArray(xx,&x);
368:   VecRestoreArray(zz,&y);
369:   return(0);
370: }
371: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
372: {
373:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
374:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
375:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
376:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

379:   if (yy != zz) {VecCopy(yy,zz);}
380:   VecGetArray(xx,&x);
381:   VecGetArray(zz,&y);
382:   y = y + shift; /* shift for Fortran start by 1 indexing */
383:   for (i=0; i<m; i++) {
384:     idx    = a->j + a->i[i] + shift;
385:     v      = a->a + a->i[i] + shift;
386:     n      = a->i[i+1] - a->i[i];
387:     alpha1 = x[3*i];
388:     alpha2 = x[3*i+1];
389:     alpha3 = x[3*i+2];
390:     while (n-->0) {
391:       y[3*(*idx)]   += alpha1*(*v);
392:       y[3*(*idx)+1] += alpha2*(*v);
393:       y[3*(*idx)+2] += alpha3*(*v);
394:       idx++; v++;
395:     }
396:   }
397:   PetscLogFlops(6*a->nz);
398:   VecRestoreArray(xx,&x);
399:   VecRestoreArray(zz,&y);
400:   return(0);
401: }

403: /* ------------------------------------------------------------------------------*/
404: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
405: {
406:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
407:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
408:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
409:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
410:   int           n,i,jrow,j;

413:   VecGetArray(xx,&x);
414:   VecGetArray(yy,&y);
415:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
416:   idx  = a->j;
417:   v    = a->a;
418:   ii   = a->i;

420:   v    += shift; /* shift for Fortran start by 1 indexing */
421:   idx  += shift;
422:   for (i=0; i<m; i++) {
423:     jrow = ii[i];
424:     n    = ii[i+1] - jrow;
425:     sum1  = 0.0;
426:     sum2  = 0.0;
427:     sum3  = 0.0;
428:     sum4  = 0.0;
429:     for (j=0; j<n; j++) {
430:       sum1 += v[jrow]*x[4*idx[jrow]];
431:       sum2 += v[jrow]*x[4*idx[jrow]+1];
432:       sum3 += v[jrow]*x[4*idx[jrow]+2];
433:       sum4 += v[jrow]*x[4*idx[jrow]+3];
434:       jrow++;
435:      }
436:     y[4*i]   = sum1;
437:     y[4*i+1] = sum2;
438:     y[4*i+2] = sum3;
439:     y[4*i+3] = sum4;
440:   }

442:   PetscLogFlops(8*a->nz - 4*m);
443:   VecRestoreArray(xx,&x);
444:   VecRestoreArray(yy,&y);
445:   return(0);
446: }

448: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
449: {
450:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
451:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
452:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
453:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

456:   VecSet(&zero,yy);
457:   VecGetArray(xx,&x);
458:   VecGetArray(yy,&y);
459:   y = y + shift; /* shift for Fortran start by 1 indexing */
460:   for (i=0; i<m; i++) {
461:     idx    = a->j + a->i[i] + shift;
462:     v      = a->a + a->i[i] + shift;
463:     n      = a->i[i+1] - a->i[i];
464:     alpha1 = x[4*i];
465:     alpha2 = x[4*i+1];
466:     alpha3 = x[4*i+2];
467:     alpha4 = x[4*i+3];
468:     while (n-->0) {
469:       y[4*(*idx)]   += alpha1*(*v);
470:       y[4*(*idx)+1] += alpha2*(*v);
471:       y[4*(*idx)+2] += alpha3*(*v);
472:       y[4*(*idx)+3] += alpha4*(*v);
473:       idx++; v++;
474:     }
475:   }
476:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
477:   VecRestoreArray(xx,&x);
478:   VecRestoreArray(yy,&y);
479:   return(0);
480: }

482: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
483: {
484:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
485:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
486:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
487:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
488:   int           n,i,jrow,j;

491:   if (yy != zz) {VecCopy(yy,zz);}
492:   VecGetArray(xx,&x);
493:   VecGetArray(zz,&y);
494:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
495:   idx  = a->j;
496:   v    = a->a;
497:   ii   = a->i;

499:   v    += shift; /* shift for Fortran start by 1 indexing */
500:   idx  += shift;
501:   for (i=0; i<m; i++) {
502:     jrow = ii[i];
503:     n    = ii[i+1] - jrow;
504:     sum1  = 0.0;
505:     sum2  = 0.0;
506:     sum3  = 0.0;
507:     sum4  = 0.0;
508:     for (j=0; j<n; j++) {
509:       sum1 += v[jrow]*x[4*idx[jrow]];
510:       sum2 += v[jrow]*x[4*idx[jrow]+1];
511:       sum3 += v[jrow]*x[4*idx[jrow]+2];
512:       sum4 += v[jrow]*x[4*idx[jrow]+3];
513:       jrow++;
514:      }
515:     y[4*i]   += sum1;
516:     y[4*i+1] += sum2;
517:     y[4*i+2] += sum3;
518:     y[4*i+3] += sum4;
519:   }

521:   PetscLogFlops(8*a->nz - 4*m);
522:   VecRestoreArray(xx,&x);
523:   VecRestoreArray(zz,&y);
524:   return(0);
525: }
526: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
527: {
528:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
529:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
530:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
531:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

534:   if (yy != zz) {VecCopy(yy,zz);}
535:   VecGetArray(xx,&x);
536:   VecGetArray(zz,&y);
537:   y = y + shift; /* shift for Fortran start by 1 indexing */
538:   for (i=0; i<m; i++) {
539:     idx    = a->j + a->i[i] + shift;
540:     v      = a->a + a->i[i] + shift;
541:     n      = a->i[i+1] - a->i[i];
542:     alpha1 = x[4*i];
543:     alpha2 = x[4*i+1];
544:     alpha3 = x[4*i+2];
545:     alpha4 = x[4*i+3];
546:     while (n-->0) {
547:       y[4*(*idx)]   += alpha1*(*v);
548:       y[4*(*idx)+1] += alpha2*(*v);
549:       y[4*(*idx)+2] += alpha3*(*v);
550:       y[4*(*idx)+3] += alpha4*(*v);
551:       idx++; v++;
552:     }
553:   }
554:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
555:   VecRestoreArray(xx,&x);
556:   VecRestoreArray(zz,&y);
557:   return(0);
558: }
559: /* ------------------------------------------------------------------------------*/

561: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
562: {
563:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
564:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
565:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
566:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
567:   int           n,i,jrow,j;

570:   VecGetArray(xx,&x);
571:   VecGetArray(yy,&y);
572:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
573:   idx  = a->j;
574:   v    = a->a;
575:   ii   = a->i;

577:   v    += shift; /* shift for Fortran start by 1 indexing */
578:   idx  += shift;
579:   for (i=0; i<m; i++) {
580:     jrow = ii[i];
581:     n    = ii[i+1] - jrow;
582:     sum1  = 0.0;
583:     sum2  = 0.0;
584:     sum3  = 0.0;
585:     sum4  = 0.0;
586:     sum5  = 0.0;
587:     for (j=0; j<n; j++) {
588:       sum1 += v[jrow]*x[5*idx[jrow]];
589:       sum2 += v[jrow]*x[5*idx[jrow]+1];
590:       sum3 += v[jrow]*x[5*idx[jrow]+2];
591:       sum4 += v[jrow]*x[5*idx[jrow]+3];
592:       sum5 += v[jrow]*x[5*idx[jrow]+4];
593:       jrow++;
594:      }
595:     y[5*i]   = sum1;
596:     y[5*i+1] = sum2;
597:     y[5*i+2] = sum3;
598:     y[5*i+3] = sum4;
599:     y[5*i+4] = sum5;
600:   }

602:   PetscLogFlops(10*a->nz - 5*m);
603:   VecRestoreArray(xx,&x);
604:   VecRestoreArray(yy,&y);
605:   return(0);
606: }

608: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
609: {
610:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
611:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
612:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
613:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

616:   VecSet(&zero,yy);
617:   VecGetArray(xx,&x);
618:   VecGetArray(yy,&y);
619:   y = y + shift; /* shift for Fortran start by 1 indexing */
620:   for (i=0; i<m; i++) {
621:     idx    = a->j + a->i[i] + shift;
622:     v      = a->a + a->i[i] + shift;
623:     n      = a->i[i+1] - a->i[i];
624:     alpha1 = x[5*i];
625:     alpha2 = x[5*i+1];
626:     alpha3 = x[5*i+2];
627:     alpha4 = x[5*i+3];
628:     alpha5 = x[5*i+4];
629:     while (n-->0) {
630:       y[5*(*idx)]   += alpha1*(*v);
631:       y[5*(*idx)+1] += alpha2*(*v);
632:       y[5*(*idx)+2] += alpha3*(*v);
633:       y[5*(*idx)+3] += alpha4*(*v);
634:       y[5*(*idx)+4] += alpha5*(*v);
635:       idx++; v++;
636:     }
637:   }
638:   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
639:   VecRestoreArray(xx,&x);
640:   VecRestoreArray(yy,&y);
641:   return(0);
642: }

644: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
645: {
646:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
647:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
648:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
649:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
650:   int           n,i,jrow,j;

653:   if (yy != zz) {VecCopy(yy,zz);}
654:   VecGetArray(xx,&x);
655:   VecGetArray(zz,&y);
656:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
657:   idx  = a->j;
658:   v    = a->a;
659:   ii   = a->i;

661:   v    += shift; /* shift for Fortran start by 1 indexing */
662:   idx  += shift;
663:   for (i=0; i<m; i++) {
664:     jrow = ii[i];
665:     n    = ii[i+1] - jrow;
666:     sum1  = 0.0;
667:     sum2  = 0.0;
668:     sum3  = 0.0;
669:     sum4  = 0.0;
670:     sum5  = 0.0;
671:     for (j=0; j<n; j++) {
672:       sum1 += v[jrow]*x[5*idx[jrow]];
673:       sum2 += v[jrow]*x[5*idx[jrow]+1];
674:       sum3 += v[jrow]*x[5*idx[jrow]+2];
675:       sum4 += v[jrow]*x[5*idx[jrow]+3];
676:       sum5 += v[jrow]*x[5*idx[jrow]+4];
677:       jrow++;
678:      }
679:     y[5*i]   += sum1;
680:     y[5*i+1] += sum2;
681:     y[5*i+2] += sum3;
682:     y[5*i+3] += sum4;
683:     y[5*i+4] += sum5;
684:   }

686:   PetscLogFlops(10*a->nz);
687:   VecRestoreArray(xx,&x);
688:   VecRestoreArray(zz,&y);
689:   return(0);
690: }

692: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
693: {
694:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
695:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
696:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
697:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

700:   if (yy != zz) {VecCopy(yy,zz);}
701:   VecGetArray(xx,&x);
702:   VecGetArray(zz,&y);
703:   y = y + shift; /* shift for Fortran start by 1 indexing */
704:   for (i=0; i<m; i++) {
705:     idx    = a->j + a->i[i] + shift;
706:     v      = a->a + a->i[i] + shift;
707:     n      = a->i[i+1] - a->i[i];
708:     alpha1 = x[5*i];
709:     alpha2 = x[5*i+1];
710:     alpha3 = x[5*i+2];
711:     alpha4 = x[5*i+3];
712:     alpha5 = x[5*i+4];
713:     while (n-->0) {
714:       y[5*(*idx)]   += alpha1*(*v);
715:       y[5*(*idx)+1] += alpha2*(*v);
716:       y[5*(*idx)+2] += alpha3*(*v);
717:       y[5*(*idx)+3] += alpha4*(*v);
718:       y[5*(*idx)+4] += alpha5*(*v);
719:       idx++; v++;
720:     }
721:   }
722:   PetscLogFlops(10*a->nz);
723:   VecRestoreArray(xx,&x);
724:   VecRestoreArray(zz,&y);
725:   return(0);
726: }

728: /* ------------------------------------------------------------------------------*/
729: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
730: {
731:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
732:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
733:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
734:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
735:   int           n,i,jrow,j;

738:   VecGetArray(xx,&x);
739:   VecGetArray(yy,&y);
740:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
741:   idx  = a->j;
742:   v    = a->a;
743:   ii   = a->i;

745:   v    += shift; /* shift for Fortran start by 1 indexing */
746:   idx  += shift;
747:   for (i=0; i<m; i++) {
748:     jrow = ii[i];
749:     n    = ii[i+1] - jrow;
750:     sum1  = 0.0;
751:     sum2  = 0.0;
752:     sum3  = 0.0;
753:     sum4  = 0.0;
754:     sum5  = 0.0;
755:     sum6  = 0.0;
756:     for (j=0; j<n; j++) {
757:       sum1 += v[jrow]*x[6*idx[jrow]];
758:       sum2 += v[jrow]*x[6*idx[jrow]+1];
759:       sum3 += v[jrow]*x[6*idx[jrow]+2];
760:       sum4 += v[jrow]*x[6*idx[jrow]+3];
761:       sum5 += v[jrow]*x[6*idx[jrow]+4];
762:       sum6 += v[jrow]*x[6*idx[jrow]+5];
763:       jrow++;
764:      }
765:     y[6*i]   = sum1;
766:     y[6*i+1] = sum2;
767:     y[6*i+2] = sum3;
768:     y[6*i+3] = sum4;
769:     y[6*i+4] = sum5;
770:     y[6*i+5] = sum6;
771:   }

773:   PetscLogFlops(12*a->nz - 6*m);
774:   VecRestoreArray(xx,&x);
775:   VecRestoreArray(yy,&y);
776:   return(0);
777: }

779: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
780: {
781:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
782:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
783:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
784:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

787:   VecSet(&zero,yy);
788:   VecGetArray(xx,&x);
789:   VecGetArray(yy,&y);
790:   y = y + shift; /* shift for Fortran start by 1 indexing */
791:   for (i=0; i<m; i++) {
792:     idx    = a->j + a->i[i] + shift;
793:     v      = a->a + a->i[i] + shift;
794:     n      = a->i[i+1] - a->i[i];
795:     alpha1 = x[6*i];
796:     alpha2 = x[6*i+1];
797:     alpha3 = x[6*i+2];
798:     alpha4 = x[6*i+3];
799:     alpha5 = x[6*i+4];
800:     alpha6 = x[6*i+5];
801:     while (n-->0) {
802:       y[6*(*idx)]   += alpha1*(*v);
803:       y[6*(*idx)+1] += alpha2*(*v);
804:       y[6*(*idx)+2] += alpha3*(*v);
805:       y[6*(*idx)+3] += alpha4*(*v);
806:       y[6*(*idx)+4] += alpha5*(*v);
807:       y[6*(*idx)+5] += alpha6*(*v);
808:       idx++; v++;
809:     }
810:   }
811:   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
812:   VecRestoreArray(xx,&x);
813:   VecRestoreArray(yy,&y);
814:   return(0);
815: }

817: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
818: {
819:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
820:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
821:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
822:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
823:   int           n,i,jrow,j;

826:   if (yy != zz) {VecCopy(yy,zz);}
827:   VecGetArray(xx,&x);
828:   VecGetArray(zz,&y);
829:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
830:   idx  = a->j;
831:   v    = a->a;
832:   ii   = a->i;

834:   v    += shift; /* shift for Fortran start by 1 indexing */
835:   idx  += shift;
836:   for (i=0; i<m; i++) {
837:     jrow = ii[i];
838:     n    = ii[i+1] - jrow;
839:     sum1  = 0.0;
840:     sum2  = 0.0;
841:     sum3  = 0.0;
842:     sum4  = 0.0;
843:     sum5  = 0.0;
844:     sum6  = 0.0;
845:     for (j=0; j<n; j++) {
846:       sum1 += v[jrow]*x[6*idx[jrow]];
847:       sum2 += v[jrow]*x[6*idx[jrow]+1];
848:       sum3 += v[jrow]*x[6*idx[jrow]+2];
849:       sum4 += v[jrow]*x[6*idx[jrow]+3];
850:       sum5 += v[jrow]*x[6*idx[jrow]+4];
851:       sum6 += v[jrow]*x[6*idx[jrow]+5];
852:       jrow++;
853:      }
854:     y[6*i]   += sum1;
855:     y[6*i+1] += sum2;
856:     y[6*i+2] += sum3;
857:     y[6*i+3] += sum4;
858:     y[6*i+4] += sum5;
859:     y[6*i+5] += sum6;
860:   }

862:   PetscLogFlops(12*a->nz);
863:   VecRestoreArray(xx,&x);
864:   VecRestoreArray(zz,&y);
865:   return(0);
866: }

868: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
869: {
870:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
871:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
872:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
873:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

876:   if (yy != zz) {VecCopy(yy,zz);}
877:   VecGetArray(xx,&x);
878:   VecGetArray(zz,&y);
879:   y = y + shift; /* shift for Fortran start by 1 indexing */
880:   for (i=0; i<m; i++) {
881:     idx    = a->j + a->i[i] + shift;
882:     v      = a->a + a->i[i] + shift;
883:     n      = a->i[i+1] - a->i[i];
884:     alpha1 = x[6*i];
885:     alpha2 = x[6*i+1];
886:     alpha3 = x[6*i+2];
887:     alpha4 = x[6*i+3];
888:     alpha5 = x[6*i+4];
889:     alpha6 = x[6*i+5];
890:     while (n-->0) {
891:       y[6*(*idx)]   += alpha1*(*v);
892:       y[6*(*idx)+1] += alpha2*(*v);
893:       y[6*(*idx)+2] += alpha3*(*v);
894:       y[6*(*idx)+3] += alpha4*(*v);
895:       y[6*(*idx)+4] += alpha5*(*v);
896:       y[6*(*idx)+5] += alpha6*(*v);
897:       idx++; v++;
898:     }
899:   }
900:   PetscLogFlops(12*a->nz);
901:   VecRestoreArray(xx,&x);
902:   VecRestoreArray(zz,&y);
903:   return(0);
904: }

906: /* ------------------------------------------------------------------------------*/
907: int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
908: {
909:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
910:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
911:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
912:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
913:   int           n,i,jrow,j;

916:   VecGetArray(xx,&x);
917:   VecGetArray(yy,&y);
918:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
919:   idx  = a->j;
920:   v    = a->a;
921:   ii   = a->i;

923:   v    += shift; /* shift for Fortran start by 1 indexing */
924:   idx  += shift;
925:   for (i=0; i<m; i++) {
926:     jrow = ii[i];
927:     n    = ii[i+1] - jrow;
928:     sum1  = 0.0;
929:     sum2  = 0.0;
930:     sum3  = 0.0;
931:     sum4  = 0.0;
932:     sum5  = 0.0;
933:     sum6  = 0.0;
934:     sum7  = 0.0;
935:     sum8  = 0.0;
936:     for (j=0; j<n; j++) {
937:       sum1 += v[jrow]*x[8*idx[jrow]];
938:       sum2 += v[jrow]*x[8*idx[jrow]+1];
939:       sum3 += v[jrow]*x[8*idx[jrow]+2];
940:       sum4 += v[jrow]*x[8*idx[jrow]+3];
941:       sum5 += v[jrow]*x[8*idx[jrow]+4];
942:       sum6 += v[jrow]*x[8*idx[jrow]+5];
943:       sum7 += v[jrow]*x[8*idx[jrow]+6];
944:       sum8 += v[jrow]*x[8*idx[jrow]+7];
945:       jrow++;
946:      }
947:     y[8*i]   = sum1;
948:     y[8*i+1] = sum2;
949:     y[8*i+2] = sum3;
950:     y[8*i+3] = sum4;
951:     y[8*i+4] = sum5;
952:     y[8*i+5] = sum6;
953:     y[8*i+6] = sum7;
954:     y[8*i+7] = sum8;
955:   }

957:   PetscLogFlops(16*a->nz - 8*m);
958:   VecRestoreArray(xx,&x);
959:   VecRestoreArray(yy,&y);
960:   return(0);
961: }

963: int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
964: {
965:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
966:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
967:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
968:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

971:   VecSet(&zero,yy);
972:   VecGetArray(xx,&x);
973:   VecGetArray(yy,&y);
974:   y = y + shift; /* shift for Fortran start by 1 indexing */
975:   for (i=0; i<m; i++) {
976:     idx    = a->j + a->i[i] + shift;
977:     v      = a->a + a->i[i] + shift;
978:     n      = a->i[i+1] - a->i[i];
979:     alpha1 = x[8*i];
980:     alpha2 = x[8*i+1];
981:     alpha3 = x[8*i+2];
982:     alpha4 = x[8*i+3];
983:     alpha5 = x[8*i+4];
984:     alpha6 = x[8*i+5];
985:     alpha7 = x[8*i+6];
986:     alpha8 = x[8*i+7];
987:     while (n-->0) {
988:       y[8*(*idx)]   += alpha1*(*v);
989:       y[8*(*idx)+1] += alpha2*(*v);
990:       y[8*(*idx)+2] += alpha3*(*v);
991:       y[8*(*idx)+3] += alpha4*(*v);
992:       y[8*(*idx)+4] += alpha5*(*v);
993:       y[8*(*idx)+5] += alpha6*(*v);
994:       y[8*(*idx)+6] += alpha7*(*v);
995:       y[8*(*idx)+7] += alpha8*(*v);
996:       idx++; v++;
997:     }
998:   }
999:   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1000:   VecRestoreArray(xx,&x);
1001:   VecRestoreArray(yy,&y);
1002:   return(0);
1003: }

1005: int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1006: {
1007:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1008:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1009:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1010:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
1011:   int           n,i,jrow,j;

1014:   if (yy != zz) {VecCopy(yy,zz);}
1015:   VecGetArray(xx,&x);
1016:   VecGetArray(zz,&y);
1017:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1018:   idx  = a->j;
1019:   v    = a->a;
1020:   ii   = a->i;

1022:   v    += shift; /* shift for Fortran start by 1 indexing */
1023:   idx  += shift;
1024:   for (i=0; i<m; i++) {
1025:     jrow = ii[i];
1026:     n    = ii[i+1] - jrow;
1027:     sum1  = 0.0;
1028:     sum2  = 0.0;
1029:     sum3  = 0.0;
1030:     sum4  = 0.0;
1031:     sum5  = 0.0;
1032:     sum6  = 0.0;
1033:     sum7  = 0.0;
1034:     sum8  = 0.0;
1035:     for (j=0; j<n; j++) {
1036:       sum1 += v[jrow]*x[8*idx[jrow]];
1037:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1038:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1039:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1040:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1041:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1042:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1043:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1044:       jrow++;
1045:      }
1046:     y[8*i]   += sum1;
1047:     y[8*i+1] += sum2;
1048:     y[8*i+2] += sum3;
1049:     y[8*i+3] += sum4;
1050:     y[8*i+4] += sum5;
1051:     y[8*i+5] += sum6;
1052:     y[8*i+6] += sum7;
1053:     y[8*i+7] += sum8;
1054:   }

1056:   PetscLogFlops(16*a->nz);
1057:   VecRestoreArray(xx,&x);
1058:   VecRestoreArray(zz,&y);
1059:   return(0);
1060: }

1062: int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1063: {
1064:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1065:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1066:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1067:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

1070:   if (yy != zz) {VecCopy(yy,zz);}
1071:   VecGetArray(xx,&x);
1072:   VecGetArray(zz,&y);
1073:   y = y + shift; /* shift for Fortran start by 1 indexing */
1074:   for (i=0; i<m; i++) {
1075:     idx    = a->j + a->i[i] + shift;
1076:     v      = a->a + a->i[i] + shift;
1077:     n      = a->i[i+1] - a->i[i];
1078:     alpha1 = x[8*i];
1079:     alpha2 = x[8*i+1];
1080:     alpha3 = x[8*i+2];
1081:     alpha4 = x[8*i+3];
1082:     alpha5 = x[8*i+4];
1083:     alpha6 = x[8*i+5];
1084:     alpha7 = x[8*i+6];
1085:     alpha8 = x[8*i+7];
1086:     while (n-->0) {
1087:       y[8*(*idx)]   += alpha1*(*v);
1088:       y[8*(*idx)+1] += alpha2*(*v);
1089:       y[8*(*idx)+2] += alpha3*(*v);
1090:       y[8*(*idx)+3] += alpha4*(*v);
1091:       y[8*(*idx)+4] += alpha5*(*v);
1092:       y[8*(*idx)+5] += alpha6*(*v);
1093:       y[8*(*idx)+6] += alpha7*(*v);
1094:       y[8*(*idx)+7] += alpha8*(*v);
1095:       idx++; v++;
1096:     }
1097:   }
1098:   PetscLogFlops(16*a->nz);
1099:   VecRestoreArray(xx,&x);
1100:   VecRestoreArray(zz,&y);
1101:   return(0);
1102: }

1104: /*===================================================================================*/
1105: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1106: {
1107:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1108:   int         ierr;

1111:   /* start the scatter */
1112:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1113:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1114:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1115:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1116:   return(0);
1117: }

1119: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1120: {
1121:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1122:   int         ierr;
1124:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1125:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1126:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1127:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1128:   return(0);
1129: }

1131: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1132: {
1133:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1134:   int         ierr;

1137:   /* start the scatter */
1138:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1139:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1140:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1141:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
1142:   return(0);
1143: }

1145: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1146: {
1147:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1148:   int         ierr;
1150:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1151:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1152:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1153:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1154:   return(0);
1155: }

1157: /* ---------------------------------------------------------------------------------- */
1158: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1159: {
1160:   int         ierr,size,n;
1161:   Mat_MPIMAIJ *b;
1162:   Mat         B;

1165:   ierr   = PetscObjectReference((PetscObject)A);

1167:   if (dof == 1) {
1168:     *maij = A;
1169:   } else {
1170:     MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
1171:     B->assembled    = PETSC_TRUE;

1173:     MPI_Comm_size(A->comm,&size);
1174:     if (size == 1) {
1175:       MatSetType(B,MATSEQMAIJ);
1176:       B->ops->destroy = MatDestroy_SeqMAIJ;
1177:       b      = (Mat_MPIMAIJ*)B->data;
1178:       b->dof = dof;
1179:       b->AIJ = A;
1180:       if (dof == 2) {
1181:         B->ops->mult             = MatMult_SeqMAIJ_2;
1182:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1183:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1184:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1185:       } else if (dof == 3) {
1186:         B->ops->mult             = MatMult_SeqMAIJ_3;
1187:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1188:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1189:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1190:       } else if (dof == 4) {
1191:         B->ops->mult             = MatMult_SeqMAIJ_4;
1192:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1193:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1194:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1195:       } else if (dof == 5) {
1196:         B->ops->mult             = MatMult_SeqMAIJ_5;
1197:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1198:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1199:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1200:       } else if (dof == 6) {
1201:         B->ops->mult             = MatMult_SeqMAIJ_6;
1202:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1203:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1204:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1205:       } else if (dof == 8) {
1206:         B->ops->mult             = MatMult_SeqMAIJ_8;
1207:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1208:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1209:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1210:       } else {
1211:         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.govn",dof);
1212:       }
1213:     } else {
1214:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1215:       IS         from,to;
1216:       Vec        gvec;
1217:       int        *garray,i;

1219:       MatSetType(B,MATMPIMAIJ);
1220:       B->ops->destroy = MatDestroy_MPIMAIJ;
1221:       b      = (Mat_MPIMAIJ*)B->data;
1222:       b->dof = dof;
1223:       b->A   = A;
1224:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1225:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

1227:       VecGetSize(mpiaij->lvec,&n);
1228:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);

1230:       /* create two temporary Index sets for build scatter gather */
1231:       PetscMalloc((n+1)*sizeof(int),&garray);
1232:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1233:       ISCreateBlock(A->comm,dof,n,garray,&from);
1234:       PetscFree(garray);
1235:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

1237:       /* create temporary global vector to generate scatter context */
1238:       VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);

1240:       /* generate the scatter context */
1241:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

1243:       ISDestroy(from);
1244:       ISDestroy(to);
1245:       VecDestroy(gvec);

1247:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1248:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1249:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1250:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1251:     }
1252:     *maij = B;
1253:   }
1254:   return(0);
1255: }