Actual source code: maij.c

  1: /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for 
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19:  #include src/mat/impls/maij/maij.h

 21: #undef __FUNCT__  
 23: int MatMAIJGetAIJ(Mat A,Mat *B)
 24: {
 25:   int         ierr;
 26:   PetscTruth  ismpimaij,isseqmaij;

 29:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 30:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 31:   if (ismpimaij) {
 32:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 34:     *B = b->A;
 35:   } else if (isseqmaij) {
 36:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 38:     *B = b->AIJ;
 39:   } else {
 40:     *B = A;
 41:   }
 42:   return(0);
 43: }

 45: #undef __FUNCT__  
 47: int MatMAIJRedimension(Mat A,int dof,Mat *B)
 48: {
 50:   Mat Aij;

 53:   MatMAIJGetAIJ(A,&Aij);
 54:   MatCreateMAIJ(Aij,dof,B);
 55:   return(0);
 56: }

 58: #undef __FUNCT__  
 60: int MatDestroy_SeqMAIJ(Mat A)
 61: {
 62:   int         ierr;
 63:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 66:   if (b->AIJ) {
 67:     MatDestroy(b->AIJ);
 68:   }
 69:   PetscFree(b);
 70:   return(0);
 71: }

 73: #undef __FUNCT__  
 75: int MatDestroy_MPIMAIJ(Mat A)
 76: {
 77:   int         ierr;
 78:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 81:   if (b->AIJ) {
 82:     MatDestroy(b->AIJ);
 83:   }
 84:   if (b->OAIJ) {
 85:     MatDestroy(b->OAIJ);
 86:   }
 87:   if (b->A) {
 88:     MatDestroy(b->A);
 89:   }
 90:   if (b->ctx) {
 91:     VecScatterDestroy(b->ctx);
 92:   }
 93:   if (b->w) {
 94:     VecDestroy(b->w);
 95:   }
 96:   PetscFree(b);
 97:   return(0);
 98: }

100: EXTERN_C_BEGIN
101: #undef __FUNCT__  
103: int MatCreate_MAIJ(Mat A)
104: {
105:   int         ierr;
106:   Mat_MPIMAIJ *b;

109:   ierr     = PetscNew(Mat_MPIMAIJ,&b);
110:   A->data  = (void*)b;
111:   PetscMemzero(b,sizeof(Mat_MPIMAIJ));
112:   PetscMemzero(A->ops,sizeof(struct _MatOps));
113:   A->factor           = 0;
114:   A->mapping          = 0;

116:   b->AIJ  = 0;
117:   b->dof  = 0;
118:   b->OAIJ = 0;
119:   b->ctx  = 0;
120:   b->w    = 0;
121:   return(0);
122: }
123: EXTERN_C_END

125: /* --------------------------------------------------------------------------------------*/
126: #undef __FUNCT__  
128: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
129: {
130:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
131:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
132:   PetscScalar   *x,*y,*v,sum1, sum2;
133:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
134:   int           n,i,jrow,j;

137:   VecGetArray(xx,&x);
138:   VecGetArray(yy,&y);
139:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
140:   idx  = a->j;
141:   v    = a->a;
142:   ii   = a->i;

144:   v    += shift; /* shift for Fortran start by 1 indexing */
145:   idx  += shift;
146:   for (i=0; i<m; i++) {
147:     jrow = ii[i];
148:     n    = ii[i+1] - jrow;
149:     sum1  = 0.0;
150:     sum2  = 0.0;
151:     for (j=0; j<n; j++) {
152:       sum1 += v[jrow]*x[2*idx[jrow]];
153:       sum2 += v[jrow]*x[2*idx[jrow]+1];
154:       jrow++;
155:      }
156:     y[2*i]   = sum1;
157:     y[2*i+1] = sum2;
158:   }

160:   PetscLogFlops(4*a->nz - 2*m);
161:   VecRestoreArray(xx,&x);
162:   VecRestoreArray(yy,&y);
163:   return(0);
164: }

166: #undef __FUNCT__  
168: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
169: {
170:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
171:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
172:   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
173:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

176:   VecSet(&zero,yy);
177:   VecGetArray(xx,&x);
178:   VecGetArray(yy,&y);
179:   y = y + shift; /* shift for Fortran start by 1 indexing */
180:   for (i=0; i<m; i++) {
181:     idx    = a->j + a->i[i] + shift;
182:     v      = a->a + a->i[i] + shift;
183:     n      = a->i[i+1] - a->i[i];
184:     alpha1 = x[2*i];
185:     alpha2 = x[2*i+1];
186:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
187:   }
188:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
189:   VecRestoreArray(xx,&x);
190:   VecRestoreArray(yy,&y);
191:   return(0);
192: }

194: #undef __FUNCT__  
196: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
197: {
198:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
199:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
200:   PetscScalar   *x,*y,*v,sum1, sum2;
201:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
202:   int           n,i,jrow,j;

205:   if (yy != zz) {VecCopy(yy,zz);}
206:   VecGetArray(xx,&x);
207:   VecGetArray(zz,&y);
208:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
209:   idx  = a->j;
210:   v    = a->a;
211:   ii   = a->i;

213:   v    += shift; /* shift for Fortran start by 1 indexing */
214:   idx  += shift;
215:   for (i=0; i<m; i++) {
216:     jrow = ii[i];
217:     n    = ii[i+1] - jrow;
218:     sum1  = 0.0;
219:     sum2  = 0.0;
220:     for (j=0; j<n; j++) {
221:       sum1 += v[jrow]*x[2*idx[jrow]];
222:       sum2 += v[jrow]*x[2*idx[jrow]+1];
223:       jrow++;
224:      }
225:     y[2*i]   += sum1;
226:     y[2*i+1] += sum2;
227:   }

229:   PetscLogFlops(4*a->nz - 2*m);
230:   VecRestoreArray(xx,&x);
231:   VecRestoreArray(zz,&y);
232:   return(0);
233: }
234: #undef __FUNCT__  
236: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
237: {
238:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
239:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
240:   PetscScalar   *x,*y,*v,alpha1,alpha2;
241:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

244:   if (yy != zz) {VecCopy(yy,zz);}
245:   VecGetArray(xx,&x);
246:   VecGetArray(zz,&y);
247:   y = y + shift; /* shift for Fortran start by 1 indexing */
248:   for (i=0; i<m; i++) {
249:     idx   = a->j + a->i[i] + shift;
250:     v     = a->a + a->i[i] + shift;
251:     n     = a->i[i+1] - a->i[i];
252:     alpha1 = x[2*i];
253:     alpha2 = x[2*i+1];
254:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
255:   }
256:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
257:   VecRestoreArray(xx,&x);
258:   VecRestoreArray(zz,&y);
259:   return(0);
260: }
261: /* --------------------------------------------------------------------------------------*/
262: #undef __FUNCT__  
264: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
265: {
266:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
267:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
268:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
269:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
270:   int           n,i,jrow,j;

273:   VecGetArray(xx,&x);
274:   VecGetArray(yy,&y);
275:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
276:   idx  = a->j;
277:   v    = a->a;
278:   ii   = a->i;

280:   v    += shift; /* shift for Fortran start by 1 indexing */
281:   idx  += shift;
282:   for (i=0; i<m; i++) {
283:     jrow = ii[i];
284:     n    = ii[i+1] - jrow;
285:     sum1  = 0.0;
286:     sum2  = 0.0;
287:     sum3  = 0.0;
288:     for (j=0; j<n; j++) {
289:       sum1 += v[jrow]*x[3*idx[jrow]];
290:       sum2 += v[jrow]*x[3*idx[jrow]+1];
291:       sum3 += v[jrow]*x[3*idx[jrow]+2];
292:       jrow++;
293:      }
294:     y[3*i]   = sum1;
295:     y[3*i+1] = sum2;
296:     y[3*i+2] = sum3;
297:   }

299:   PetscLogFlops(6*a->nz - 3*m);
300:   VecRestoreArray(xx,&x);
301:   VecRestoreArray(yy,&y);
302:   return(0);
303: }

305: #undef __FUNCT__  
307: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
308: {
309:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
310:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
311:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
312:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

315:   VecSet(&zero,yy);
316:   VecGetArray(xx,&x);
317:   VecGetArray(yy,&y);
318:   y = y + shift; /* shift for Fortran start by 1 indexing */
319:   for (i=0; i<m; i++) {
320:     idx    = a->j + a->i[i] + shift;
321:     v      = a->a + a->i[i] + shift;
322:     n      = a->i[i+1] - a->i[i];
323:     alpha1 = x[3*i];
324:     alpha2 = x[3*i+1];
325:     alpha3 = x[3*i+2];
326:     while (n-->0) {
327:       y[3*(*idx)]   += alpha1*(*v);
328:       y[3*(*idx)+1] += alpha2*(*v);
329:       y[3*(*idx)+2] += alpha3*(*v);
330:       idx++; v++;
331:     }
332:   }
333:   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
334:   VecRestoreArray(xx,&x);
335:   VecRestoreArray(yy,&y);
336:   return(0);
337: }

339: #undef __FUNCT__  
341: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
342: {
343:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
344:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
345:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
346:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
347:   int           n,i,jrow,j;

350:   if (yy != zz) {VecCopy(yy,zz);}
351:   VecGetArray(xx,&x);
352:   VecGetArray(zz,&y);
353:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
354:   idx  = a->j;
355:   v    = a->a;
356:   ii   = a->i;

358:   v    += shift; /* shift for Fortran start by 1 indexing */
359:   idx  += shift;
360:   for (i=0; i<m; i++) {
361:     jrow = ii[i];
362:     n    = ii[i+1] - jrow;
363:     sum1  = 0.0;
364:     sum2  = 0.0;
365:     sum3  = 0.0;
366:     for (j=0; j<n; j++) {
367:       sum1 += v[jrow]*x[3*idx[jrow]];
368:       sum2 += v[jrow]*x[3*idx[jrow]+1];
369:       sum3 += v[jrow]*x[3*idx[jrow]+2];
370:       jrow++;
371:      }
372:     y[3*i]   += sum1;
373:     y[3*i+1] += sum2;
374:     y[3*i+2] += sum3;
375:   }

377:   PetscLogFlops(6*a->nz);
378:   VecRestoreArray(xx,&x);
379:   VecRestoreArray(zz,&y);
380:   return(0);
381: }
382: #undef __FUNCT__  
384: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
385: {
386:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
387:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
388:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
389:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

392:   if (yy != zz) {VecCopy(yy,zz);}
393:   VecGetArray(xx,&x);
394:   VecGetArray(zz,&y);
395:   y = y + shift; /* shift for Fortran start by 1 indexing */
396:   for (i=0; i<m; i++) {
397:     idx    = a->j + a->i[i] + shift;
398:     v      = a->a + a->i[i] + shift;
399:     n      = a->i[i+1] - a->i[i];
400:     alpha1 = x[3*i];
401:     alpha2 = x[3*i+1];
402:     alpha3 = x[3*i+2];
403:     while (n-->0) {
404:       y[3*(*idx)]   += alpha1*(*v);
405:       y[3*(*idx)+1] += alpha2*(*v);
406:       y[3*(*idx)+2] += alpha3*(*v);
407:       idx++; v++;
408:     }
409:   }
410:   PetscLogFlops(6*a->nz);
411:   VecRestoreArray(xx,&x);
412:   VecRestoreArray(zz,&y);
413:   return(0);
414: }

416: /* ------------------------------------------------------------------------------*/
417: #undef __FUNCT__  
419: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
420: {
421:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
422:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
423:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
424:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
425:   int           n,i,jrow,j;

428:   VecGetArray(xx,&x);
429:   VecGetArray(yy,&y);
430:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
431:   idx  = a->j;
432:   v    = a->a;
433:   ii   = a->i;

435:   v    += shift; /* shift for Fortran start by 1 indexing */
436:   idx  += shift;
437:   for (i=0; i<m; i++) {
438:     jrow = ii[i];
439:     n    = ii[i+1] - jrow;
440:     sum1  = 0.0;
441:     sum2  = 0.0;
442:     sum3  = 0.0;
443:     sum4  = 0.0;
444:     for (j=0; j<n; j++) {
445:       sum1 += v[jrow]*x[4*idx[jrow]];
446:       sum2 += v[jrow]*x[4*idx[jrow]+1];
447:       sum3 += v[jrow]*x[4*idx[jrow]+2];
448:       sum4 += v[jrow]*x[4*idx[jrow]+3];
449:       jrow++;
450:      }
451:     y[4*i]   = sum1;
452:     y[4*i+1] = sum2;
453:     y[4*i+2] = sum3;
454:     y[4*i+3] = sum4;
455:   }

457:   PetscLogFlops(8*a->nz - 4*m);
458:   VecRestoreArray(xx,&x);
459:   VecRestoreArray(yy,&y);
460:   return(0);
461: }

463: #undef __FUNCT__  
465: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
466: {
467:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
468:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
469:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
470:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

473:   VecSet(&zero,yy);
474:   VecGetArray(xx,&x);
475:   VecGetArray(yy,&y);
476:   y = y + shift; /* shift for Fortran start by 1 indexing */
477:   for (i=0; i<m; i++) {
478:     idx    = a->j + a->i[i] + shift;
479:     v      = a->a + a->i[i] + shift;
480:     n      = a->i[i+1] - a->i[i];
481:     alpha1 = x[4*i];
482:     alpha2 = x[4*i+1];
483:     alpha3 = x[4*i+2];
484:     alpha4 = x[4*i+3];
485:     while (n-->0) {
486:       y[4*(*idx)]   += alpha1*(*v);
487:       y[4*(*idx)+1] += alpha2*(*v);
488:       y[4*(*idx)+2] += alpha3*(*v);
489:       y[4*(*idx)+3] += alpha4*(*v);
490:       idx++; v++;
491:     }
492:   }
493:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
494:   VecRestoreArray(xx,&x);
495:   VecRestoreArray(yy,&y);
496:   return(0);
497: }

499: #undef __FUNCT__  
501: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502: {
503:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
504:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
505:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
506:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
507:   int           n,i,jrow,j;

510:   if (yy != zz) {VecCopy(yy,zz);}
511:   VecGetArray(xx,&x);
512:   VecGetArray(zz,&y);
513:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
514:   idx  = a->j;
515:   v    = a->a;
516:   ii   = a->i;

518:   v    += shift; /* shift for Fortran start by 1 indexing */
519:   idx  += shift;
520:   for (i=0; i<m; i++) {
521:     jrow = ii[i];
522:     n    = ii[i+1] - jrow;
523:     sum1  = 0.0;
524:     sum2  = 0.0;
525:     sum3  = 0.0;
526:     sum4  = 0.0;
527:     for (j=0; j<n; j++) {
528:       sum1 += v[jrow]*x[4*idx[jrow]];
529:       sum2 += v[jrow]*x[4*idx[jrow]+1];
530:       sum3 += v[jrow]*x[4*idx[jrow]+2];
531:       sum4 += v[jrow]*x[4*idx[jrow]+3];
532:       jrow++;
533:      }
534:     y[4*i]   += sum1;
535:     y[4*i+1] += sum2;
536:     y[4*i+2] += sum3;
537:     y[4*i+3] += sum4;
538:   }

540:   PetscLogFlops(8*a->nz - 4*m);
541:   VecRestoreArray(xx,&x);
542:   VecRestoreArray(zz,&y);
543:   return(0);
544: }
545: #undef __FUNCT__  
547: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
548: {
549:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
550:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
551:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
552:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

555:   if (yy != zz) {VecCopy(yy,zz);}
556:   VecGetArray(xx,&x);
557:   VecGetArray(zz,&y);
558:   y = y + shift; /* shift for Fortran start by 1 indexing */
559:   for (i=0; i<m; i++) {
560:     idx    = a->j + a->i[i] + shift;
561:     v      = a->a + a->i[i] + shift;
562:     n      = a->i[i+1] - a->i[i];
563:     alpha1 = x[4*i];
564:     alpha2 = x[4*i+1];
565:     alpha3 = x[4*i+2];
566:     alpha4 = x[4*i+3];
567:     while (n-->0) {
568:       y[4*(*idx)]   += alpha1*(*v);
569:       y[4*(*idx)+1] += alpha2*(*v);
570:       y[4*(*idx)+2] += alpha3*(*v);
571:       y[4*(*idx)+3] += alpha4*(*v);
572:       idx++; v++;
573:     }
574:   }
575:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
576:   VecRestoreArray(xx,&x);
577:   VecRestoreArray(zz,&y);
578:   return(0);
579: }
580: /* ------------------------------------------------------------------------------*/

582: #undef __FUNCT__  
584: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
585: {
586:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
587:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
588:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
589:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
590:   int           n,i,jrow,j;

593:   VecGetArray(xx,&x);
594:   VecGetArray(yy,&y);
595:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
596:   idx  = a->j;
597:   v    = a->a;
598:   ii   = a->i;

600:   v    += shift; /* shift for Fortran start by 1 indexing */
601:   idx  += shift;
602:   for (i=0; i<m; i++) {
603:     jrow = ii[i];
604:     n    = ii[i+1] - jrow;
605:     sum1  = 0.0;
606:     sum2  = 0.0;
607:     sum3  = 0.0;
608:     sum4  = 0.0;
609:     sum5  = 0.0;
610:     for (j=0; j<n; j++) {
611:       sum1 += v[jrow]*x[5*idx[jrow]];
612:       sum2 += v[jrow]*x[5*idx[jrow]+1];
613:       sum3 += v[jrow]*x[5*idx[jrow]+2];
614:       sum4 += v[jrow]*x[5*idx[jrow]+3];
615:       sum5 += v[jrow]*x[5*idx[jrow]+4];
616:       jrow++;
617:      }
618:     y[5*i]   = sum1;
619:     y[5*i+1] = sum2;
620:     y[5*i+2] = sum3;
621:     y[5*i+3] = sum4;
622:     y[5*i+4] = sum5;
623:   }

625:   PetscLogFlops(10*a->nz - 5*m);
626:   VecRestoreArray(xx,&x);
627:   VecRestoreArray(yy,&y);
628:   return(0);
629: }

631: #undef __FUNCT__  
633: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634: {
635:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
636:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
637:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
638:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

641:   VecSet(&zero,yy);
642:   VecGetArray(xx,&x);
643:   VecGetArray(yy,&y);
644:   y = y + shift; /* shift for Fortran start by 1 indexing */
645:   for (i=0; i<m; i++) {
646:     idx    = a->j + a->i[i] + shift;
647:     v      = a->a + a->i[i] + shift;
648:     n      = a->i[i+1] - a->i[i];
649:     alpha1 = x[5*i];
650:     alpha2 = x[5*i+1];
651:     alpha3 = x[5*i+2];
652:     alpha4 = x[5*i+3];
653:     alpha5 = x[5*i+4];
654:     while (n-->0) {
655:       y[5*(*idx)]   += alpha1*(*v);
656:       y[5*(*idx)+1] += alpha2*(*v);
657:       y[5*(*idx)+2] += alpha3*(*v);
658:       y[5*(*idx)+3] += alpha4*(*v);
659:       y[5*(*idx)+4] += alpha5*(*v);
660:       idx++; v++;
661:     }
662:   }
663:   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
664:   VecRestoreArray(xx,&x);
665:   VecRestoreArray(yy,&y);
666:   return(0);
667: }

669: #undef __FUNCT__  
671: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
672: {
673:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
674:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
675:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
676:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
677:   int           n,i,jrow,j;

680:   if (yy != zz) {VecCopy(yy,zz);}
681:   VecGetArray(xx,&x);
682:   VecGetArray(zz,&y);
683:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
684:   idx  = a->j;
685:   v    = a->a;
686:   ii   = a->i;

688:   v    += shift; /* shift for Fortran start by 1 indexing */
689:   idx  += shift;
690:   for (i=0; i<m; i++) {
691:     jrow = ii[i];
692:     n    = ii[i+1] - jrow;
693:     sum1  = 0.0;
694:     sum2  = 0.0;
695:     sum3  = 0.0;
696:     sum4  = 0.0;
697:     sum5  = 0.0;
698:     for (j=0; j<n; j++) {
699:       sum1 += v[jrow]*x[5*idx[jrow]];
700:       sum2 += v[jrow]*x[5*idx[jrow]+1];
701:       sum3 += v[jrow]*x[5*idx[jrow]+2];
702:       sum4 += v[jrow]*x[5*idx[jrow]+3];
703:       sum5 += v[jrow]*x[5*idx[jrow]+4];
704:       jrow++;
705:      }
706:     y[5*i]   += sum1;
707:     y[5*i+1] += sum2;
708:     y[5*i+2] += sum3;
709:     y[5*i+3] += sum4;
710:     y[5*i+4] += sum5;
711:   }

713:   PetscLogFlops(10*a->nz);
714:   VecRestoreArray(xx,&x);
715:   VecRestoreArray(zz,&y);
716:   return(0);
717: }

719: #undef __FUNCT__  
721: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
722: {
723:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
724:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
725:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
726:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

729:   if (yy != zz) {VecCopy(yy,zz);}
730:   VecGetArray(xx,&x);
731:   VecGetArray(zz,&y);
732:   y = y + shift; /* shift for Fortran start by 1 indexing */
733:   for (i=0; i<m; i++) {
734:     idx    = a->j + a->i[i] + shift;
735:     v      = a->a + a->i[i] + shift;
736:     n      = a->i[i+1] - a->i[i];
737:     alpha1 = x[5*i];
738:     alpha2 = x[5*i+1];
739:     alpha3 = x[5*i+2];
740:     alpha4 = x[5*i+3];
741:     alpha5 = x[5*i+4];
742:     while (n-->0) {
743:       y[5*(*idx)]   += alpha1*(*v);
744:       y[5*(*idx)+1] += alpha2*(*v);
745:       y[5*(*idx)+2] += alpha3*(*v);
746:       y[5*(*idx)+3] += alpha4*(*v);
747:       y[5*(*idx)+4] += alpha5*(*v);
748:       idx++; v++;
749:     }
750:   }
751:   PetscLogFlops(10*a->nz);
752:   VecRestoreArray(xx,&x);
753:   VecRestoreArray(zz,&y);
754:   return(0);
755: }

757: /* ------------------------------------------------------------------------------*/
758: #undef __FUNCT__  
760: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
761: {
762:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
763:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
764:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
765:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
766:   int           n,i,jrow,j;

769:   VecGetArray(xx,&x);
770:   VecGetArray(yy,&y);
771:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
772:   idx  = a->j;
773:   v    = a->a;
774:   ii   = a->i;

776:   v    += shift; /* shift for Fortran start by 1 indexing */
777:   idx  += shift;
778:   for (i=0; i<m; i++) {
779:     jrow = ii[i];
780:     n    = ii[i+1] - jrow;
781:     sum1  = 0.0;
782:     sum2  = 0.0;
783:     sum3  = 0.0;
784:     sum4  = 0.0;
785:     sum5  = 0.0;
786:     sum6  = 0.0;
787:     for (j=0; j<n; j++) {
788:       sum1 += v[jrow]*x[6*idx[jrow]];
789:       sum2 += v[jrow]*x[6*idx[jrow]+1];
790:       sum3 += v[jrow]*x[6*idx[jrow]+2];
791:       sum4 += v[jrow]*x[6*idx[jrow]+3];
792:       sum5 += v[jrow]*x[6*idx[jrow]+4];
793:       sum6 += v[jrow]*x[6*idx[jrow]+5];
794:       jrow++;
795:      }
796:     y[6*i]   = sum1;
797:     y[6*i+1] = sum2;
798:     y[6*i+2] = sum3;
799:     y[6*i+3] = sum4;
800:     y[6*i+4] = sum5;
801:     y[6*i+5] = sum6;
802:   }

804:   PetscLogFlops(12*a->nz - 6*m);
805:   VecRestoreArray(xx,&x);
806:   VecRestoreArray(yy,&y);
807:   return(0);
808: }

810: #undef __FUNCT__  
812: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
813: {
814:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
815:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
816:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
817:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

820:   VecSet(&zero,yy);
821:   VecGetArray(xx,&x);
822:   VecGetArray(yy,&y);
823:   y = y + shift; /* shift for Fortran start by 1 indexing */
824:   for (i=0; i<m; i++) {
825:     idx    = a->j + a->i[i] + shift;
826:     v      = a->a + a->i[i] + shift;
827:     n      = a->i[i+1] - a->i[i];
828:     alpha1 = x[6*i];
829:     alpha2 = x[6*i+1];
830:     alpha3 = x[6*i+2];
831:     alpha4 = x[6*i+3];
832:     alpha5 = x[6*i+4];
833:     alpha6 = x[6*i+5];
834:     while (n-->0) {
835:       y[6*(*idx)]   += alpha1*(*v);
836:       y[6*(*idx)+1] += alpha2*(*v);
837:       y[6*(*idx)+2] += alpha3*(*v);
838:       y[6*(*idx)+3] += alpha4*(*v);
839:       y[6*(*idx)+4] += alpha5*(*v);
840:       y[6*(*idx)+5] += alpha6*(*v);
841:       idx++; v++;
842:     }
843:   }
844:   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
845:   VecRestoreArray(xx,&x);
846:   VecRestoreArray(yy,&y);
847:   return(0);
848: }

850: #undef __FUNCT__  
852: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
853: {
854:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
855:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
856:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
857:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
858:   int           n,i,jrow,j;

861:   if (yy != zz) {VecCopy(yy,zz);}
862:   VecGetArray(xx,&x);
863:   VecGetArray(zz,&y);
864:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
865:   idx  = a->j;
866:   v    = a->a;
867:   ii   = a->i;

869:   v    += shift; /* shift for Fortran start by 1 indexing */
870:   idx  += shift;
871:   for (i=0; i<m; i++) {
872:     jrow = ii[i];
873:     n    = ii[i+1] - jrow;
874:     sum1  = 0.0;
875:     sum2  = 0.0;
876:     sum3  = 0.0;
877:     sum4  = 0.0;
878:     sum5  = 0.0;
879:     sum6  = 0.0;
880:     for (j=0; j<n; j++) {
881:       sum1 += v[jrow]*x[6*idx[jrow]];
882:       sum2 += v[jrow]*x[6*idx[jrow]+1];
883:       sum3 += v[jrow]*x[6*idx[jrow]+2];
884:       sum4 += v[jrow]*x[6*idx[jrow]+3];
885:       sum5 += v[jrow]*x[6*idx[jrow]+4];
886:       sum6 += v[jrow]*x[6*idx[jrow]+5];
887:       jrow++;
888:      }
889:     y[6*i]   += sum1;
890:     y[6*i+1] += sum2;
891:     y[6*i+2] += sum3;
892:     y[6*i+3] += sum4;
893:     y[6*i+4] += sum5;
894:     y[6*i+5] += sum6;
895:   }

897:   PetscLogFlops(12*a->nz);
898:   VecRestoreArray(xx,&x);
899:   VecRestoreArray(zz,&y);
900:   return(0);
901: }

903: #undef __FUNCT__  
905: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
906: {
907:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
908:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
909:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
910:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

913:   if (yy != zz) {VecCopy(yy,zz);}
914:   VecGetArray(xx,&x);
915:   VecGetArray(zz,&y);
916:   y = y + shift; /* shift for Fortran start by 1 indexing */
917:   for (i=0; i<m; i++) {
918:     idx    = a->j + a->i[i] + shift;
919:     v      = a->a + a->i[i] + shift;
920:     n      = a->i[i+1] - a->i[i];
921:     alpha1 = x[6*i];
922:     alpha2 = x[6*i+1];
923:     alpha3 = x[6*i+2];
924:     alpha4 = x[6*i+3];
925:     alpha5 = x[6*i+4];
926:     alpha6 = x[6*i+5];
927:     while (n-->0) {
928:       y[6*(*idx)]   += alpha1*(*v);
929:       y[6*(*idx)+1] += alpha2*(*v);
930:       y[6*(*idx)+2] += alpha3*(*v);
931:       y[6*(*idx)+3] += alpha4*(*v);
932:       y[6*(*idx)+4] += alpha5*(*v);
933:       y[6*(*idx)+5] += alpha6*(*v);
934:       idx++; v++;
935:     }
936:   }
937:   PetscLogFlops(12*a->nz);
938:   VecRestoreArray(xx,&x);
939:   VecRestoreArray(zz,&y);
940:   return(0);
941: }

943: /* ------------------------------------------------------------------------------*/
944: #undef __FUNCT__  
946: int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
947: {
948:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
949:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
950:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
951:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
952:   int           n,i,jrow,j;

955:   VecGetArray(xx,&x);
956:   VecGetArray(yy,&y);
957:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
958:   idx  = a->j;
959:   v    = a->a;
960:   ii   = a->i;

962:   v    += shift; /* shift for Fortran start by 1 indexing */
963:   idx  += shift;
964:   for (i=0; i<m; i++) {
965:     jrow = ii[i];
966:     n    = ii[i+1] - jrow;
967:     sum1  = 0.0;
968:     sum2  = 0.0;
969:     sum3  = 0.0;
970:     sum4  = 0.0;
971:     sum5  = 0.0;
972:     sum6  = 0.0;
973:     sum7  = 0.0;
974:     sum8  = 0.0;
975:     for (j=0; j<n; j++) {
976:       sum1 += v[jrow]*x[8*idx[jrow]];
977:       sum2 += v[jrow]*x[8*idx[jrow]+1];
978:       sum3 += v[jrow]*x[8*idx[jrow]+2];
979:       sum4 += v[jrow]*x[8*idx[jrow]+3];
980:       sum5 += v[jrow]*x[8*idx[jrow]+4];
981:       sum6 += v[jrow]*x[8*idx[jrow]+5];
982:       sum7 += v[jrow]*x[8*idx[jrow]+6];
983:       sum8 += v[jrow]*x[8*idx[jrow]+7];
984:       jrow++;
985:      }
986:     y[8*i]   = sum1;
987:     y[8*i+1] = sum2;
988:     y[8*i+2] = sum3;
989:     y[8*i+3] = sum4;
990:     y[8*i+4] = sum5;
991:     y[8*i+5] = sum6;
992:     y[8*i+6] = sum7;
993:     y[8*i+7] = sum8;
994:   }

996:   PetscLogFlops(16*a->nz - 8*m);
997:   VecRestoreArray(xx,&x);
998:   VecRestoreArray(yy,&y);
999:   return(0);
1000: }

1002: #undef __FUNCT__  
1004: int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1005: {
1006:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1007:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1008:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1009:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

1012:   VecSet(&zero,yy);
1013:   VecGetArray(xx,&x);
1014:   VecGetArray(yy,&y);
1015:   y = y + shift; /* shift for Fortran start by 1 indexing */
1016:   for (i=0; i<m; i++) {
1017:     idx    = a->j + a->i[i] + shift;
1018:     v      = a->a + a->i[i] + shift;
1019:     n      = a->i[i+1] - a->i[i];
1020:     alpha1 = x[8*i];
1021:     alpha2 = x[8*i+1];
1022:     alpha3 = x[8*i+2];
1023:     alpha4 = x[8*i+3];
1024:     alpha5 = x[8*i+4];
1025:     alpha6 = x[8*i+5];
1026:     alpha7 = x[8*i+6];
1027:     alpha8 = x[8*i+7];
1028:     while (n-->0) {
1029:       y[8*(*idx)]   += alpha1*(*v);
1030:       y[8*(*idx)+1] += alpha2*(*v);
1031:       y[8*(*idx)+2] += alpha3*(*v);
1032:       y[8*(*idx)+3] += alpha4*(*v);
1033:       y[8*(*idx)+4] += alpha5*(*v);
1034:       y[8*(*idx)+5] += alpha6*(*v);
1035:       y[8*(*idx)+6] += alpha7*(*v);
1036:       y[8*(*idx)+7] += alpha8*(*v);
1037:       idx++; v++;
1038:     }
1039:   }
1040:   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1041:   VecRestoreArray(xx,&x);
1042:   VecRestoreArray(yy,&y);
1043:   return(0);
1044: }

1046: #undef __FUNCT__  
1048: int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1049: {
1050:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1051:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1052:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1053:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
1054:   int           n,i,jrow,j;

1057:   if (yy != zz) {VecCopy(yy,zz);}
1058:   VecGetArray(xx,&x);
1059:   VecGetArray(zz,&y);
1060:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1061:   idx  = a->j;
1062:   v    = a->a;
1063:   ii   = a->i;

1065:   v    += shift; /* shift for Fortran start by 1 indexing */
1066:   idx  += shift;
1067:   for (i=0; i<m; i++) {
1068:     jrow = ii[i];
1069:     n    = ii[i+1] - jrow;
1070:     sum1  = 0.0;
1071:     sum2  = 0.0;
1072:     sum3  = 0.0;
1073:     sum4  = 0.0;
1074:     sum5  = 0.0;
1075:     sum6  = 0.0;
1076:     sum7  = 0.0;
1077:     sum8  = 0.0;
1078:     for (j=0; j<n; j++) {
1079:       sum1 += v[jrow]*x[8*idx[jrow]];
1080:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1081:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1082:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1083:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1084:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1085:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1086:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1087:       jrow++;
1088:      }
1089:     y[8*i]   += sum1;
1090:     y[8*i+1] += sum2;
1091:     y[8*i+2] += sum3;
1092:     y[8*i+3] += sum4;
1093:     y[8*i+4] += sum5;
1094:     y[8*i+5] += sum6;
1095:     y[8*i+6] += sum7;
1096:     y[8*i+7] += sum8;
1097:   }

1099:   PetscLogFlops(16*a->nz);
1100:   VecRestoreArray(xx,&x);
1101:   VecRestoreArray(zz,&y);
1102:   return(0);
1103: }

1105: #undef __FUNCT__  
1107: int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1108: {
1109:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1110:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1111:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1112:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

1115:   if (yy != zz) {VecCopy(yy,zz);}
1116:   VecGetArray(xx,&x);
1117:   VecGetArray(zz,&y);
1118:   y = y + shift; /* shift for Fortran start by 1 indexing */
1119:   for (i=0; i<m; i++) {
1120:     idx    = a->j + a->i[i] + shift;
1121:     v      = a->a + a->i[i] + shift;
1122:     n      = a->i[i+1] - a->i[i];
1123:     alpha1 = x[8*i];
1124:     alpha2 = x[8*i+1];
1125:     alpha3 = x[8*i+2];
1126:     alpha4 = x[8*i+3];
1127:     alpha5 = x[8*i+4];
1128:     alpha6 = x[8*i+5];
1129:     alpha7 = x[8*i+6];
1130:     alpha8 = x[8*i+7];
1131:     while (n-->0) {
1132:       y[8*(*idx)]   += alpha1*(*v);
1133:       y[8*(*idx)+1] += alpha2*(*v);
1134:       y[8*(*idx)+2] += alpha3*(*v);
1135:       y[8*(*idx)+3] += alpha4*(*v);
1136:       y[8*(*idx)+4] += alpha5*(*v);
1137:       y[8*(*idx)+5] += alpha6*(*v);
1138:       y[8*(*idx)+6] += alpha7*(*v);
1139:       y[8*(*idx)+7] += alpha8*(*v);
1140:       idx++; v++;
1141:     }
1142:   }
1143:   PetscLogFlops(16*a->nz);
1144:   VecRestoreArray(xx,&x);
1145:   VecRestoreArray(zz,&y);
1146:   return(0);
1147: }

1149: /*===================================================================================*/
1150: #undef __FUNCT__  
1152: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1153: {
1154:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1155:   int         ierr;

1158:   /* start the scatter */
1159:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1160:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1161:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1162:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1163:   return(0);
1164: }

1166: #undef __FUNCT__  
1168: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1169: {
1170:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1171:   int         ierr;
1173:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1174:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1175:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1176:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1177:   return(0);
1178: }

1180: #undef __FUNCT__  
1182: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1183: {
1184:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1185:   int         ierr;

1188:   /* start the scatter */
1189:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1190:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1191:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1192:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
1193:   return(0);
1194: }

1196: #undef __FUNCT__  
1198: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1199: {
1200:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1201:   int         ierr;
1203:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1204:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1205:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1206:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1207:   return(0);
1208: }

1210: /* ---------------------------------------------------------------------------------- */
1211: #undef __FUNCT__  
1213: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1214: {
1215:   int         ierr,size,n;
1216:   Mat_MPIMAIJ *b;
1217:   Mat         B;

1220:   ierr   = PetscObjectReference((PetscObject)A);

1222:   if (dof == 1) {
1223:     *maij = A;
1224:   } else {
1225:     MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
1226:     B->assembled    = PETSC_TRUE;

1228:     MPI_Comm_size(A->comm,&size);
1229:     if (size == 1) {
1230:       MatSetType(B,MATSEQMAIJ);
1231:       B->ops->destroy = MatDestroy_SeqMAIJ;
1232:       b      = (Mat_MPIMAIJ*)B->data;
1233:       b->dof = dof;
1234:       b->AIJ = A;
1235:       if (dof == 2) {
1236:         B->ops->mult             = MatMult_SeqMAIJ_2;
1237:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1238:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1239:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1240:       } else if (dof == 3) {
1241:         B->ops->mult             = MatMult_SeqMAIJ_3;
1242:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1243:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1244:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1245:       } else if (dof == 4) {
1246:         B->ops->mult             = MatMult_SeqMAIJ_4;
1247:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1248:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1249:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1250:       } else if (dof == 5) {
1251:         B->ops->mult             = MatMult_SeqMAIJ_5;
1252:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1253:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1254:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1255:       } else if (dof == 6) {
1256:         B->ops->mult             = MatMult_SeqMAIJ_6;
1257:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1258:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1259:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1260:       } else if (dof == 8) {
1261:         B->ops->mult             = MatMult_SeqMAIJ_8;
1262:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1263:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1264:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1265:       } else {
1266:         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.govn",dof);
1267:       }
1268:     } else {
1269:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1270:       IS         from,to;
1271:       Vec        gvec;
1272:       int        *garray,i;

1274:       MatSetType(B,MATMPIMAIJ);
1275:       B->ops->destroy = MatDestroy_MPIMAIJ;
1276:       b      = (Mat_MPIMAIJ*)B->data;
1277:       b->dof = dof;
1278:       b->A   = A;
1279:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1280:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

1282:       VecGetSize(mpiaij->lvec,&n);
1283:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);

1285:       /* create two temporary Index sets for build scatter gather */
1286:       PetscMalloc((n+1)*sizeof(int),&garray);
1287:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1288:       ISCreateBlock(A->comm,dof,n,garray,&from);
1289:       PetscFree(garray);
1290:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

1292:       /* create temporary global vector to generate scatter context */
1293:       VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);

1295:       /* generate the scatter context */
1296:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

1298:       ISDestroy(from);
1299:       ISDestroy(to);
1300:       VecDestroy(gvec);

1302:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1303:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1304:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1305:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1306:     }
1307:     *maij = B;
1308:   }
1309:   return(0);
1310: }