Actual source code: maij.c

  1: /*
  2:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  3:   This format is used for restriction and interpolation operations for 
  4:   multicomponent problems. It interpolates each component the same way
  5:   independently.

  7:      We provide:
  8:          MatMult()
  9:          MatMultTranspose()
 10:          MatMultTransposeAdd()
 11:          MatMultAdd()
 12:           and
 13:          MatCreateMAIJ(Mat,dof,Mat*)

 15:      This single directory handles both the sequential and parallel codes
 16: */

 18:  #include src/mat/impls/maij/maij.h
 19:  #include vecimpl.h

 23: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
 24: {
 26:   PetscTruth     ismpimaij,isseqmaij;

 29:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 30:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 31:   if (ismpimaij) {
 32:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 34:     *B = b->A;
 35:   } else if (isseqmaij) {
 36:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 38:     *B = b->AIJ;
 39:   } else {
 40:     *B = A;
 41:   }
 42:   return(0);
 43: }

 47: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 48: {
 50:   Mat            Aij;

 53:   MatMAIJGetAIJ(A,&Aij);
 54:   MatCreateMAIJ(Aij,dof,B);
 55:   return(0);
 56: }

 60: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 61: {
 63:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 66:   if (b->AIJ) {
 67:     MatDestroy(b->AIJ);
 68:   }
 69:   PetscFree(b);
 70:   return(0);
 71: }

 75: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
 76: {
 78:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

 81:   if (b->AIJ) {
 82:     MatDestroy(b->AIJ);
 83:   }
 84:   if (b->OAIJ) {
 85:     MatDestroy(b->OAIJ);
 86:   }
 87:   if (b->A) {
 88:     MatDestroy(b->A);
 89:   }
 90:   if (b->ctx) {
 91:     VecScatterDestroy(b->ctx);
 92:   }
 93:   if (b->w) {
 94:     VecDestroy(b->w);
 95:   }
 96:   PetscFree(b);
 97:   return(0);
 98: }

100: /*MC
101:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
102:   multicomponent problems, interpolating or restricting each component the same way independently.
103:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

105:   Operations provided:
106: . MatMult
107: . MatMultTranspose
108: . MatMultAdd
109: . MatMultTransposeAdd

111:   Level: advanced

113: .seealso: MatCreateSeqDense
114: M*/

119: PetscErrorCode MatCreate_MAIJ(Mat A)
120: {
122:   Mat_MPIMAIJ    *b;

125:   PetscNew(Mat_MPIMAIJ,&b);
126:   A->data  = (void*)b;
127:   PetscMemzero(b,sizeof(Mat_MPIMAIJ));
128:   PetscMemzero(A->ops,sizeof(struct _MatOps));
129:   A->factor           = 0;
130:   A->mapping          = 0;

132:   b->AIJ  = 0;
133:   b->dof  = 0;
134:   b->OAIJ = 0;
135:   b->ctx  = 0;
136:   b->w    = 0;
137:   return(0);
138: }

141: /* --------------------------------------------------------------------------------------*/
144: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
145: {
146:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
147:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
148:   PetscScalar    *x,*y,*v,sum1, sum2;
150:   PetscInt       m = b->AIJ->m,*idx,*ii;
151:   PetscInt       n,i,jrow,j;

154:   VecGetArray(xx,&x);
155:   VecGetArray(yy,&y);
156:   idx  = a->j;
157:   v    = a->a;
158:   ii   = a->i;

160:   for (i=0; i<m; i++) {
161:     jrow = ii[i];
162:     n    = ii[i+1] - jrow;
163:     sum1  = 0.0;
164:     sum2  = 0.0;
165:     for (j=0; j<n; j++) {
166:       sum1 += v[jrow]*x[2*idx[jrow]];
167:       sum2 += v[jrow]*x[2*idx[jrow]+1];
168:       jrow++;
169:      }
170:     y[2*i]   = sum1;
171:     y[2*i+1] = sum2;
172:   }

174:   PetscLogFlops(4*a->nz - 2*m);
175:   VecRestoreArray(xx,&x);
176:   VecRestoreArray(yy,&y);
177:   return(0);
178: }

182: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
188:   PetscInt       m = b->AIJ->m,n,i,*idx;

191:   VecSet(&zero,yy);
192:   VecGetArray(xx,&x);
193:   VecGetArray(yy,&y);
194: 
195:   for (i=0; i<m; i++) {
196:     idx    = a->j + a->i[i] ;
197:     v      = a->a + a->i[i] ;
198:     n      = a->i[i+1] - a->i[i];
199:     alpha1 = x[2*i];
200:     alpha2 = x[2*i+1];
201:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
202:   }
203:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
204:   VecRestoreArray(xx,&x);
205:   VecRestoreArray(yy,&y);
206:   return(0);
207: }

211: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
212: {
213:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
214:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
215:   PetscScalar    *x,*y,*v,sum1, sum2;
217:   PetscInt       m = b->AIJ->m,*idx,*ii;
218:   PetscInt       n,i,jrow,j;

221:   if (yy != zz) {VecCopy(yy,zz);}
222:   VecGetArray(xx,&x);
223:   VecGetArray(zz,&y);
224:   idx  = a->j;
225:   v    = a->a;
226:   ii   = a->i;

228:   for (i=0; i<m; i++) {
229:     jrow = ii[i];
230:     n    = ii[i+1] - jrow;
231:     sum1  = 0.0;
232:     sum2  = 0.0;
233:     for (j=0; j<n; j++) {
234:       sum1 += v[jrow]*x[2*idx[jrow]];
235:       sum2 += v[jrow]*x[2*idx[jrow]+1];
236:       jrow++;
237:      }
238:     y[2*i]   += sum1;
239:     y[2*i+1] += sum2;
240:   }

242:   PetscLogFlops(4*a->nz - 2*m);
243:   VecRestoreArray(xx,&x);
244:   VecRestoreArray(zz,&y);
245:   return(0);
246: }
249: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
250: {
251:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
252:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
253:   PetscScalar    *x,*y,*v,alpha1,alpha2;
255:   PetscInt       m = b->AIJ->m,n,i,*idx;

258:   if (yy != zz) {VecCopy(yy,zz);}
259:   VecGetArray(xx,&x);
260:   VecGetArray(zz,&y);
261: 
262:   for (i=0; i<m; i++) {
263:     idx   = a->j + a->i[i] ;
264:     v     = a->a + a->i[i] ;
265:     n     = a->i[i+1] - a->i[i];
266:     alpha1 = x[2*i];
267:     alpha2 = x[2*i+1];
268:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
269:   }
270:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
271:   VecRestoreArray(xx,&x);
272:   VecRestoreArray(zz,&y);
273:   return(0);
274: }
275: /* --------------------------------------------------------------------------------------*/
278: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
279: {
280:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
281:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
282:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
284:   PetscInt       m = b->AIJ->m,*idx,*ii;
285:   PetscInt       n,i,jrow,j;

288:   VecGetArray(xx,&x);
289:   VecGetArray(yy,&y);
290:   idx  = a->j;
291:   v    = a->a;
292:   ii   = a->i;

294:   for (i=0; i<m; i++) {
295:     jrow = ii[i];
296:     n    = ii[i+1] - jrow;
297:     sum1  = 0.0;
298:     sum2  = 0.0;
299:     sum3  = 0.0;
300:     for (j=0; j<n; j++) {
301:       sum1 += v[jrow]*x[3*idx[jrow]];
302:       sum2 += v[jrow]*x[3*idx[jrow]+1];
303:       sum3 += v[jrow]*x[3*idx[jrow]+2];
304:       jrow++;
305:      }
306:     y[3*i]   = sum1;
307:     y[3*i+1] = sum2;
308:     y[3*i+2] = sum3;
309:   }

311:   PetscLogFlops(6*a->nz - 3*m);
312:   VecRestoreArray(xx,&x);
313:   VecRestoreArray(yy,&y);
314:   return(0);
315: }

319: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
320: {
321:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
322:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
323:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
325:   PetscInt       m = b->AIJ->m,n,i,*idx;

328:   VecSet(&zero,yy);
329:   VecGetArray(xx,&x);
330:   VecGetArray(yy,&y);
331: 
332:   for (i=0; i<m; i++) {
333:     idx    = a->j + a->i[i];
334:     v      = a->a + a->i[i];
335:     n      = a->i[i+1] - a->i[i];
336:     alpha1 = x[3*i];
337:     alpha2 = x[3*i+1];
338:     alpha3 = x[3*i+2];
339:     while (n-->0) {
340:       y[3*(*idx)]   += alpha1*(*v);
341:       y[3*(*idx)+1] += alpha2*(*v);
342:       y[3*(*idx)+2] += alpha3*(*v);
343:       idx++; v++;
344:     }
345:   }
346:   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
347:   VecRestoreArray(xx,&x);
348:   VecRestoreArray(yy,&y);
349:   return(0);
350: }

354: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
355: {
356:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
357:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
358:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
360:   PetscInt       m = b->AIJ->m,*idx,*ii;
361:   PetscInt       n,i,jrow,j;

364:   if (yy != zz) {VecCopy(yy,zz);}
365:   VecGetArray(xx,&x);
366:   VecGetArray(zz,&y);
367:   idx  = a->j;
368:   v    = a->a;
369:   ii   = a->i;

371:   for (i=0; i<m; i++) {
372:     jrow = ii[i];
373:     n    = ii[i+1] - jrow;
374:     sum1  = 0.0;
375:     sum2  = 0.0;
376:     sum3  = 0.0;
377:     for (j=0; j<n; j++) {
378:       sum1 += v[jrow]*x[3*idx[jrow]];
379:       sum2 += v[jrow]*x[3*idx[jrow]+1];
380:       sum3 += v[jrow]*x[3*idx[jrow]+2];
381:       jrow++;
382:      }
383:     y[3*i]   += sum1;
384:     y[3*i+1] += sum2;
385:     y[3*i+2] += sum3;
386:   }

388:   PetscLogFlops(6*a->nz);
389:   VecRestoreArray(xx,&x);
390:   VecRestoreArray(zz,&y);
391:   return(0);
392: }
395: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
396: {
397:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
398:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
399:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
401:   PetscInt       m = b->AIJ->m,n,i,*idx;

404:   if (yy != zz) {VecCopy(yy,zz);}
405:   VecGetArray(xx,&x);
406:   VecGetArray(zz,&y);
407:   for (i=0; i<m; i++) {
408:     idx    = a->j + a->i[i] ;
409:     v      = a->a + a->i[i] ;
410:     n      = a->i[i+1] - a->i[i];
411:     alpha1 = x[3*i];
412:     alpha2 = x[3*i+1];
413:     alpha3 = x[3*i+2];
414:     while (n-->0) {
415:       y[3*(*idx)]   += alpha1*(*v);
416:       y[3*(*idx)+1] += alpha2*(*v);
417:       y[3*(*idx)+2] += alpha3*(*v);
418:       idx++; v++;
419:     }
420:   }
421:   PetscLogFlops(6*a->nz);
422:   VecRestoreArray(xx,&x);
423:   VecRestoreArray(zz,&y);
424:   return(0);
425: }

427: /* ------------------------------------------------------------------------------*/
430: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
431: {
432:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
433:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
434:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
436:   PetscInt       m = b->AIJ->m,*idx,*ii;
437:   PetscInt       n,i,jrow,j;

440:   VecGetArray(xx,&x);
441:   VecGetArray(yy,&y);
442:   idx  = a->j;
443:   v    = a->a;
444:   ii   = a->i;

446:   for (i=0; i<m; i++) {
447:     jrow = ii[i];
448:     n    = ii[i+1] - jrow;
449:     sum1  = 0.0;
450:     sum2  = 0.0;
451:     sum3  = 0.0;
452:     sum4  = 0.0;
453:     for (j=0; j<n; j++) {
454:       sum1 += v[jrow]*x[4*idx[jrow]];
455:       sum2 += v[jrow]*x[4*idx[jrow]+1];
456:       sum3 += v[jrow]*x[4*idx[jrow]+2];
457:       sum4 += v[jrow]*x[4*idx[jrow]+3];
458:       jrow++;
459:      }
460:     y[4*i]   = sum1;
461:     y[4*i+1] = sum2;
462:     y[4*i+2] = sum3;
463:     y[4*i+3] = sum4;
464:   }

466:   PetscLogFlops(8*a->nz - 4*m);
467:   VecRestoreArray(xx,&x);
468:   VecRestoreArray(yy,&y);
469:   return(0);
470: }

474: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
475: {
476:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
477:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
478:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
480:   PetscInt       m = b->AIJ->m,n,i,*idx;

483:   VecSet(&zero,yy);
484:   VecGetArray(xx,&x);
485:   VecGetArray(yy,&y);
486:   for (i=0; i<m; i++) {
487:     idx    = a->j + a->i[i] ;
488:     v      = a->a + a->i[i] ;
489:     n      = a->i[i+1] - a->i[i];
490:     alpha1 = x[4*i];
491:     alpha2 = x[4*i+1];
492:     alpha3 = x[4*i+2];
493:     alpha4 = x[4*i+3];
494:     while (n-->0) {
495:       y[4*(*idx)]   += alpha1*(*v);
496:       y[4*(*idx)+1] += alpha2*(*v);
497:       y[4*(*idx)+2] += alpha3*(*v);
498:       y[4*(*idx)+3] += alpha4*(*v);
499:       idx++; v++;
500:     }
501:   }
502:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
503:   VecRestoreArray(xx,&x);
504:   VecRestoreArray(yy,&y);
505:   return(0);
506: }

510: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
511: {
512:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
513:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
514:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
516:   PetscInt       m = b->AIJ->m,*idx,*ii;
517:   PetscInt       n,i,jrow,j;

520:   if (yy != zz) {VecCopy(yy,zz);}
521:   VecGetArray(xx,&x);
522:   VecGetArray(zz,&y);
523:   idx  = a->j;
524:   v    = a->a;
525:   ii   = a->i;

527:   for (i=0; i<m; i++) {
528:     jrow = ii[i];
529:     n    = ii[i+1] - jrow;
530:     sum1  = 0.0;
531:     sum2  = 0.0;
532:     sum3  = 0.0;
533:     sum4  = 0.0;
534:     for (j=0; j<n; j++) {
535:       sum1 += v[jrow]*x[4*idx[jrow]];
536:       sum2 += v[jrow]*x[4*idx[jrow]+1];
537:       sum3 += v[jrow]*x[4*idx[jrow]+2];
538:       sum4 += v[jrow]*x[4*idx[jrow]+3];
539:       jrow++;
540:      }
541:     y[4*i]   += sum1;
542:     y[4*i+1] += sum2;
543:     y[4*i+2] += sum3;
544:     y[4*i+3] += sum4;
545:   }

547:   PetscLogFlops(8*a->nz - 4*m);
548:   VecRestoreArray(xx,&x);
549:   VecRestoreArray(zz,&y);
550:   return(0);
551: }
554: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
555: {
556:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
557:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
558:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
560:   PetscInt       m = b->AIJ->m,n,i,*idx;

563:   if (yy != zz) {VecCopy(yy,zz);}
564:   VecGetArray(xx,&x);
565:   VecGetArray(zz,&y);
566: 
567:   for (i=0; i<m; i++) {
568:     idx    = a->j + a->i[i] ;
569:     v      = a->a + a->i[i] ;
570:     n      = a->i[i+1] - a->i[i];
571:     alpha1 = x[4*i];
572:     alpha2 = x[4*i+1];
573:     alpha3 = x[4*i+2];
574:     alpha4 = x[4*i+3];
575:     while (n-->0) {
576:       y[4*(*idx)]   += alpha1*(*v);
577:       y[4*(*idx)+1] += alpha2*(*v);
578:       y[4*(*idx)+2] += alpha3*(*v);
579:       y[4*(*idx)+3] += alpha4*(*v);
580:       idx++; v++;
581:     }
582:   }
583:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
584:   VecRestoreArray(xx,&x);
585:   VecRestoreArray(zz,&y);
586:   return(0);
587: }
588: /* ------------------------------------------------------------------------------*/

592: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
593: {
594:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
595:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
596:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
598:   PetscInt       m = b->AIJ->m,*idx,*ii;
599:   PetscInt       n,i,jrow,j;

602:   VecGetArray(xx,&x);
603:   VecGetArray(yy,&y);
604:   idx  = a->j;
605:   v    = a->a;
606:   ii   = a->i;

608:   for (i=0; i<m; i++) {
609:     jrow = ii[i];
610:     n    = ii[i+1] - jrow;
611:     sum1  = 0.0;
612:     sum2  = 0.0;
613:     sum3  = 0.0;
614:     sum4  = 0.0;
615:     sum5  = 0.0;
616:     for (j=0; j<n; j++) {
617:       sum1 += v[jrow]*x[5*idx[jrow]];
618:       sum2 += v[jrow]*x[5*idx[jrow]+1];
619:       sum3 += v[jrow]*x[5*idx[jrow]+2];
620:       sum4 += v[jrow]*x[5*idx[jrow]+3];
621:       sum5 += v[jrow]*x[5*idx[jrow]+4];
622:       jrow++;
623:      }
624:     y[5*i]   = sum1;
625:     y[5*i+1] = sum2;
626:     y[5*i+2] = sum3;
627:     y[5*i+3] = sum4;
628:     y[5*i+4] = sum5;
629:   }

631:   PetscLogFlops(10*a->nz - 5*m);
632:   VecRestoreArray(xx,&x);
633:   VecRestoreArray(yy,&y);
634:   return(0);
635: }

639: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
640: {
641:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
642:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
643:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
645:   PetscInt       m = b->AIJ->m,n,i,*idx;

648:   VecSet(&zero,yy);
649:   VecGetArray(xx,&x);
650:   VecGetArray(yy,&y);
651: 
652:   for (i=0; i<m; i++) {
653:     idx    = a->j + a->i[i] ;
654:     v      = a->a + a->i[i] ;
655:     n      = a->i[i+1] - a->i[i];
656:     alpha1 = x[5*i];
657:     alpha2 = x[5*i+1];
658:     alpha3 = x[5*i+2];
659:     alpha4 = x[5*i+3];
660:     alpha5 = x[5*i+4];
661:     while (n-->0) {
662:       y[5*(*idx)]   += alpha1*(*v);
663:       y[5*(*idx)+1] += alpha2*(*v);
664:       y[5*(*idx)+2] += alpha3*(*v);
665:       y[5*(*idx)+3] += alpha4*(*v);
666:       y[5*(*idx)+4] += alpha5*(*v);
667:       idx++; v++;
668:     }
669:   }
670:   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
671:   VecRestoreArray(xx,&x);
672:   VecRestoreArray(yy,&y);
673:   return(0);
674: }

678: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
679: {
680:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
681:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
682:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
684:   PetscInt       m = b->AIJ->m,*idx,*ii;
685:   PetscInt       n,i,jrow,j;

688:   if (yy != zz) {VecCopy(yy,zz);}
689:   VecGetArray(xx,&x);
690:   VecGetArray(zz,&y);
691:   idx  = a->j;
692:   v    = a->a;
693:   ii   = a->i;

695:   for (i=0; i<m; i++) {
696:     jrow = ii[i];
697:     n    = ii[i+1] - jrow;
698:     sum1  = 0.0;
699:     sum2  = 0.0;
700:     sum3  = 0.0;
701:     sum4  = 0.0;
702:     sum5  = 0.0;
703:     for (j=0; j<n; j++) {
704:       sum1 += v[jrow]*x[5*idx[jrow]];
705:       sum2 += v[jrow]*x[5*idx[jrow]+1];
706:       sum3 += v[jrow]*x[5*idx[jrow]+2];
707:       sum4 += v[jrow]*x[5*idx[jrow]+3];
708:       sum5 += v[jrow]*x[5*idx[jrow]+4];
709:       jrow++;
710:      }
711:     y[5*i]   += sum1;
712:     y[5*i+1] += sum2;
713:     y[5*i+2] += sum3;
714:     y[5*i+3] += sum4;
715:     y[5*i+4] += sum5;
716:   }

718:   PetscLogFlops(10*a->nz);
719:   VecRestoreArray(xx,&x);
720:   VecRestoreArray(zz,&y);
721:   return(0);
722: }

726: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
727: {
728:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
729:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
730:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
732:   PetscInt       m = b->AIJ->m,n,i,*idx;

735:   if (yy != zz) {VecCopy(yy,zz);}
736:   VecGetArray(xx,&x);
737:   VecGetArray(zz,&y);
738: 
739:   for (i=0; i<m; i++) {
740:     idx    = a->j + a->i[i] ;
741:     v      = a->a + a->i[i] ;
742:     n      = a->i[i+1] - a->i[i];
743:     alpha1 = x[5*i];
744:     alpha2 = x[5*i+1];
745:     alpha3 = x[5*i+2];
746:     alpha4 = x[5*i+3];
747:     alpha5 = x[5*i+4];
748:     while (n-->0) {
749:       y[5*(*idx)]   += alpha1*(*v);
750:       y[5*(*idx)+1] += alpha2*(*v);
751:       y[5*(*idx)+2] += alpha3*(*v);
752:       y[5*(*idx)+3] += alpha4*(*v);
753:       y[5*(*idx)+4] += alpha5*(*v);
754:       idx++; v++;
755:     }
756:   }
757:   PetscLogFlops(10*a->nz);
758:   VecRestoreArray(xx,&x);
759:   VecRestoreArray(zz,&y);
760:   return(0);
761: }

763: /* ------------------------------------------------------------------------------*/
766: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
767: {
768:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
769:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
770:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
772:   PetscInt       m = b->AIJ->m,*idx,*ii;
773:   PetscInt       n,i,jrow,j;

776:   VecGetArray(xx,&x);
777:   VecGetArray(yy,&y);
778:   idx  = a->j;
779:   v    = a->a;
780:   ii   = a->i;

782:   for (i=0; i<m; i++) {
783:     jrow = ii[i];
784:     n    = ii[i+1] - jrow;
785:     sum1  = 0.0;
786:     sum2  = 0.0;
787:     sum3  = 0.0;
788:     sum4  = 0.0;
789:     sum5  = 0.0;
790:     sum6  = 0.0;
791:     for (j=0; j<n; j++) {
792:       sum1 += v[jrow]*x[6*idx[jrow]];
793:       sum2 += v[jrow]*x[6*idx[jrow]+1];
794:       sum3 += v[jrow]*x[6*idx[jrow]+2];
795:       sum4 += v[jrow]*x[6*idx[jrow]+3];
796:       sum5 += v[jrow]*x[6*idx[jrow]+4];
797:       sum6 += v[jrow]*x[6*idx[jrow]+5];
798:       jrow++;
799:      }
800:     y[6*i]   = sum1;
801:     y[6*i+1] = sum2;
802:     y[6*i+2] = sum3;
803:     y[6*i+3] = sum4;
804:     y[6*i+4] = sum5;
805:     y[6*i+5] = sum6;
806:   }

808:   PetscLogFlops(12*a->nz - 6*m);
809:   VecRestoreArray(xx,&x);
810:   VecRestoreArray(yy,&y);
811:   return(0);
812: }

816: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
817: {
818:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
819:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
820:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
822:   PetscInt       m = b->AIJ->m,n,i,*idx;

825:   VecSet(&zero,yy);
826:   VecGetArray(xx,&x);
827:   VecGetArray(yy,&y);

829:   for (i=0; i<m; i++) {
830:     idx    = a->j + a->i[i] ;
831:     v      = a->a + a->i[i] ;
832:     n      = a->i[i+1] - a->i[i];
833:     alpha1 = x[6*i];
834:     alpha2 = x[6*i+1];
835:     alpha3 = x[6*i+2];
836:     alpha4 = x[6*i+3];
837:     alpha5 = x[6*i+4];
838:     alpha6 = x[6*i+5];
839:     while (n-->0) {
840:       y[6*(*idx)]   += alpha1*(*v);
841:       y[6*(*idx)+1] += alpha2*(*v);
842:       y[6*(*idx)+2] += alpha3*(*v);
843:       y[6*(*idx)+3] += alpha4*(*v);
844:       y[6*(*idx)+4] += alpha5*(*v);
845:       y[6*(*idx)+5] += alpha6*(*v);
846:       idx++; v++;
847:     }
848:   }
849:   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
850:   VecRestoreArray(xx,&x);
851:   VecRestoreArray(yy,&y);
852:   return(0);
853: }

857: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
858: {
859:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
860:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
861:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
863:   PetscInt       m = b->AIJ->m,*idx,*ii;
864:   PetscInt       n,i,jrow,j;

867:   if (yy != zz) {VecCopy(yy,zz);}
868:   VecGetArray(xx,&x);
869:   VecGetArray(zz,&y);
870:   idx  = a->j;
871:   v    = a->a;
872:   ii   = a->i;

874:   for (i=0; i<m; i++) {
875:     jrow = ii[i];
876:     n    = ii[i+1] - jrow;
877:     sum1  = 0.0;
878:     sum2  = 0.0;
879:     sum3  = 0.0;
880:     sum4  = 0.0;
881:     sum5  = 0.0;
882:     sum6  = 0.0;
883:     for (j=0; j<n; j++) {
884:       sum1 += v[jrow]*x[6*idx[jrow]];
885:       sum2 += v[jrow]*x[6*idx[jrow]+1];
886:       sum3 += v[jrow]*x[6*idx[jrow]+2];
887:       sum4 += v[jrow]*x[6*idx[jrow]+3];
888:       sum5 += v[jrow]*x[6*idx[jrow]+4];
889:       sum6 += v[jrow]*x[6*idx[jrow]+5];
890:       jrow++;
891:      }
892:     y[6*i]   += sum1;
893:     y[6*i+1] += sum2;
894:     y[6*i+2] += sum3;
895:     y[6*i+3] += sum4;
896:     y[6*i+4] += sum5;
897:     y[6*i+5] += sum6;
898:   }

900:   PetscLogFlops(12*a->nz);
901:   VecRestoreArray(xx,&x);
902:   VecRestoreArray(zz,&y);
903:   return(0);
904: }

908: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
909: {
910:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
911:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
912:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
914:   PetscInt       m = b->AIJ->m,n,i,*idx;

917:   if (yy != zz) {VecCopy(yy,zz);}
918:   VecGetArray(xx,&x);
919:   VecGetArray(zz,&y);
920: 
921:   for (i=0; i<m; i++) {
922:     idx    = a->j + a->i[i] ;
923:     v      = a->a + a->i[i] ;
924:     n      = a->i[i+1] - a->i[i];
925:     alpha1 = x[6*i];
926:     alpha2 = x[6*i+1];
927:     alpha3 = x[6*i+2];
928:     alpha4 = x[6*i+3];
929:     alpha5 = x[6*i+4];
930:     alpha6 = x[6*i+5];
931:     while (n-->0) {
932:       y[6*(*idx)]   += alpha1*(*v);
933:       y[6*(*idx)+1] += alpha2*(*v);
934:       y[6*(*idx)+2] += alpha3*(*v);
935:       y[6*(*idx)+3] += alpha4*(*v);
936:       y[6*(*idx)+4] += alpha5*(*v);
937:       y[6*(*idx)+5] += alpha6*(*v);
938:       idx++; v++;
939:     }
940:   }
941:   PetscLogFlops(12*a->nz);
942:   VecRestoreArray(xx,&x);
943:   VecRestoreArray(zz,&y);
944:   return(0);
945: }

947: /* ------------------------------------------------------------------------------*/
950: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
951: {
952:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
953:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
954:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
956:   PetscInt       m = b->AIJ->m,*idx,*ii;
957:   PetscInt       n,i,jrow,j;

960:   VecGetArray(xx,&x);
961:   VecGetArray(yy,&y);
962:   idx  = a->j;
963:   v    = a->a;
964:   ii   = a->i;

966:   for (i=0; i<m; i++) {
967:     jrow = ii[i];
968:     n    = ii[i+1] - jrow;
969:     sum1  = 0.0;
970:     sum2  = 0.0;
971:     sum3  = 0.0;
972:     sum4  = 0.0;
973:     sum5  = 0.0;
974:     sum6  = 0.0;
975:     sum7  = 0.0;
976:     sum8  = 0.0;
977:     for (j=0; j<n; j++) {
978:       sum1 += v[jrow]*x[8*idx[jrow]];
979:       sum2 += v[jrow]*x[8*idx[jrow]+1];
980:       sum3 += v[jrow]*x[8*idx[jrow]+2];
981:       sum4 += v[jrow]*x[8*idx[jrow]+3];
982:       sum5 += v[jrow]*x[8*idx[jrow]+4];
983:       sum6 += v[jrow]*x[8*idx[jrow]+5];
984:       sum7 += v[jrow]*x[8*idx[jrow]+6];
985:       sum8 += v[jrow]*x[8*idx[jrow]+7];
986:       jrow++;
987:      }
988:     y[8*i]   = sum1;
989:     y[8*i+1] = sum2;
990:     y[8*i+2] = sum3;
991:     y[8*i+3] = sum4;
992:     y[8*i+4] = sum5;
993:     y[8*i+5] = sum6;
994:     y[8*i+6] = sum7;
995:     y[8*i+7] = sum8;
996:   }

998:   PetscLogFlops(16*a->nz - 8*m);
999:   VecRestoreArray(xx,&x);
1000:   VecRestoreArray(yy,&y);
1001:   return(0);
1002: }

1006: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1007: {
1008:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1009:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1010:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1012:   PetscInt       m = b->AIJ->m,n,i,*idx;

1015:   VecSet(&zero,yy);
1016:   VecGetArray(xx,&x);
1017:   VecGetArray(yy,&y);

1019:   for (i=0; i<m; i++) {
1020:     idx    = a->j + a->i[i] ;
1021:     v      = a->a + a->i[i] ;
1022:     n      = a->i[i+1] - a->i[i];
1023:     alpha1 = x[8*i];
1024:     alpha2 = x[8*i+1];
1025:     alpha3 = x[8*i+2];
1026:     alpha4 = x[8*i+3];
1027:     alpha5 = x[8*i+4];
1028:     alpha6 = x[8*i+5];
1029:     alpha7 = x[8*i+6];
1030:     alpha8 = x[8*i+7];
1031:     while (n-->0) {
1032:       y[8*(*idx)]   += alpha1*(*v);
1033:       y[8*(*idx)+1] += alpha2*(*v);
1034:       y[8*(*idx)+2] += alpha3*(*v);
1035:       y[8*(*idx)+3] += alpha4*(*v);
1036:       y[8*(*idx)+4] += alpha5*(*v);
1037:       y[8*(*idx)+5] += alpha6*(*v);
1038:       y[8*(*idx)+6] += alpha7*(*v);
1039:       y[8*(*idx)+7] += alpha8*(*v);
1040:       idx++; v++;
1041:     }
1042:   }
1043:   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1044:   VecRestoreArray(xx,&x);
1045:   VecRestoreArray(yy,&y);
1046:   return(0);
1047: }

1051: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1052: {
1053:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1054:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1055:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1057:   PetscInt       m = b->AIJ->m,*idx,*ii;
1058:   PetscInt       n,i,jrow,j;

1061:   if (yy != zz) {VecCopy(yy,zz);}
1062:   VecGetArray(xx,&x);
1063:   VecGetArray(zz,&y);
1064:   idx  = a->j;
1065:   v    = a->a;
1066:   ii   = a->i;

1068:   for (i=0; i<m; i++) {
1069:     jrow = ii[i];
1070:     n    = ii[i+1] - jrow;
1071:     sum1  = 0.0;
1072:     sum2  = 0.0;
1073:     sum3  = 0.0;
1074:     sum4  = 0.0;
1075:     sum5  = 0.0;
1076:     sum6  = 0.0;
1077:     sum7  = 0.0;
1078:     sum8  = 0.0;
1079:     for (j=0; j<n; j++) {
1080:       sum1 += v[jrow]*x[8*idx[jrow]];
1081:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1082:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1083:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1084:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1085:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1086:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1087:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1088:       jrow++;
1089:      }
1090:     y[8*i]   += sum1;
1091:     y[8*i+1] += sum2;
1092:     y[8*i+2] += sum3;
1093:     y[8*i+3] += sum4;
1094:     y[8*i+4] += sum5;
1095:     y[8*i+5] += sum6;
1096:     y[8*i+6] += sum7;
1097:     y[8*i+7] += sum8;
1098:   }

1100:   PetscLogFlops(16*a->nz);
1101:   VecRestoreArray(xx,&x);
1102:   VecRestoreArray(zz,&y);
1103:   return(0);
1104: }

1108: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1109: {
1110:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1111:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1112:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1114:   PetscInt       m = b->AIJ->m,n,i,*idx;

1117:   if (yy != zz) {VecCopy(yy,zz);}
1118:   VecGetArray(xx,&x);
1119:   VecGetArray(zz,&y);
1120:   for (i=0; i<m; i++) {
1121:     idx    = a->j + a->i[i] ;
1122:     v      = a->a + a->i[i] ;
1123:     n      = a->i[i+1] - a->i[i];
1124:     alpha1 = x[8*i];
1125:     alpha2 = x[8*i+1];
1126:     alpha3 = x[8*i+2];
1127:     alpha4 = x[8*i+3];
1128:     alpha5 = x[8*i+4];
1129:     alpha6 = x[8*i+5];
1130:     alpha7 = x[8*i+6];
1131:     alpha8 = x[8*i+7];
1132:     while (n-->0) {
1133:       y[8*(*idx)]   += alpha1*(*v);
1134:       y[8*(*idx)+1] += alpha2*(*v);
1135:       y[8*(*idx)+2] += alpha3*(*v);
1136:       y[8*(*idx)+3] += alpha4*(*v);
1137:       y[8*(*idx)+4] += alpha5*(*v);
1138:       y[8*(*idx)+5] += alpha6*(*v);
1139:       y[8*(*idx)+6] += alpha7*(*v);
1140:       y[8*(*idx)+7] += alpha8*(*v);
1141:       idx++; v++;
1142:     }
1143:   }
1144:   PetscLogFlops(16*a->nz);
1145:   VecRestoreArray(xx,&x);
1146:   VecRestoreArray(zz,&y);
1147:   return(0);
1148: }

1150: /* ------------------------------------------------------------------------------*/
1153: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1154: {
1155:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1156:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1157:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1159:   PetscInt       m = b->AIJ->m,*idx,*ii;
1160:   PetscInt       n,i,jrow,j;

1163:   VecGetArray(xx,&x);
1164:   VecGetArray(yy,&y);
1165:   idx  = a->j;
1166:   v    = a->a;
1167:   ii   = a->i;

1169:   for (i=0; i<m; i++) {
1170:     jrow = ii[i];
1171:     n    = ii[i+1] - jrow;
1172:     sum1  = 0.0;
1173:     sum2  = 0.0;
1174:     sum3  = 0.0;
1175:     sum4  = 0.0;
1176:     sum5  = 0.0;
1177:     sum6  = 0.0;
1178:     sum7  = 0.0;
1179:     sum8  = 0.0;
1180:     sum9  = 0.0;
1181:     for (j=0; j<n; j++) {
1182:       sum1 += v[jrow]*x[9*idx[jrow]];
1183:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1184:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1185:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1186:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1187:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1188:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1189:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1190:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1191:       jrow++;
1192:      }
1193:     y[9*i]   = sum1;
1194:     y[9*i+1] = sum2;
1195:     y[9*i+2] = sum3;
1196:     y[9*i+3] = sum4;
1197:     y[9*i+4] = sum5;
1198:     y[9*i+5] = sum6;
1199:     y[9*i+6] = sum7;
1200:     y[9*i+7] = sum8;
1201:     y[9*i+8] = sum9;
1202:   }

1204:   PetscLogFlops(18*a->nz - 9*m);
1205:   VecRestoreArray(xx,&x);
1206:   VecRestoreArray(yy,&y);
1207:   return(0);
1208: }

1212: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1213: {
1214:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1215:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1216:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1218:   PetscInt       m = b->AIJ->m,n,i,*idx;

1221:   VecSet(&zero,yy);
1222:   VecGetArray(xx,&x);
1223:   VecGetArray(yy,&y);

1225:   for (i=0; i<m; i++) {
1226:     idx    = a->j + a->i[i] ;
1227:     v      = a->a + a->i[i] ;
1228:     n      = a->i[i+1] - a->i[i];
1229:     alpha1 = x[9*i];
1230:     alpha2 = x[9*i+1];
1231:     alpha3 = x[9*i+2];
1232:     alpha4 = x[9*i+3];
1233:     alpha5 = x[9*i+4];
1234:     alpha6 = x[9*i+5];
1235:     alpha7 = x[9*i+6];
1236:     alpha8 = x[9*i+7];
1237:     alpha9 = x[9*i+8];
1238:     while (n-->0) {
1239:       y[9*(*idx)]   += alpha1*(*v);
1240:       y[9*(*idx)+1] += alpha2*(*v);
1241:       y[9*(*idx)+2] += alpha3*(*v);
1242:       y[9*(*idx)+3] += alpha4*(*v);
1243:       y[9*(*idx)+4] += alpha5*(*v);
1244:       y[9*(*idx)+5] += alpha6*(*v);
1245:       y[9*(*idx)+6] += alpha7*(*v);
1246:       y[9*(*idx)+7] += alpha8*(*v);
1247:       y[9*(*idx)+8] += alpha9*(*v);
1248:       idx++; v++;
1249:     }
1250:   }
1251:   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1252:   VecRestoreArray(xx,&x);
1253:   VecRestoreArray(yy,&y);
1254:   return(0);
1255: }

1259: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1260: {
1261:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1262:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1263:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1265:   PetscInt       m = b->AIJ->m,*idx,*ii;
1266:   PetscInt       n,i,jrow,j;

1269:   if (yy != zz) {VecCopy(yy,zz);}
1270:   VecGetArray(xx,&x);
1271:   VecGetArray(zz,&y);
1272:   idx  = a->j;
1273:   v    = a->a;
1274:   ii   = a->i;

1276:   for (i=0; i<m; i++) {
1277:     jrow = ii[i];
1278:     n    = ii[i+1] - jrow;
1279:     sum1  = 0.0;
1280:     sum2  = 0.0;
1281:     sum3  = 0.0;
1282:     sum4  = 0.0;
1283:     sum5  = 0.0;
1284:     sum6  = 0.0;
1285:     sum7  = 0.0;
1286:     sum8  = 0.0;
1287:     sum9  = 0.0;
1288:     for (j=0; j<n; j++) {
1289:       sum1 += v[jrow]*x[9*idx[jrow]];
1290:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1291:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1292:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1293:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1294:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1295:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1296:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1297:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1298:       jrow++;
1299:      }
1300:     y[9*i]   += sum1;
1301:     y[9*i+1] += sum2;
1302:     y[9*i+2] += sum3;
1303:     y[9*i+3] += sum4;
1304:     y[9*i+4] += sum5;
1305:     y[9*i+5] += sum6;
1306:     y[9*i+6] += sum7;
1307:     y[9*i+7] += sum8;
1308:     y[9*i+8] += sum9;
1309:   }

1311:   PetscLogFlops(18*a->nz);
1312:   VecRestoreArray(xx,&x);
1313:   VecRestoreArray(zz,&y);
1314:   return(0);
1315: }

1319: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1320: {
1321:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1322:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1323:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1325:   PetscInt       m = b->AIJ->m,n,i,*idx;

1328:   if (yy != zz) {VecCopy(yy,zz);}
1329:   VecGetArray(xx,&x);
1330:   VecGetArray(zz,&y);
1331:   for (i=0; i<m; i++) {
1332:     idx    = a->j + a->i[i] ;
1333:     v      = a->a + a->i[i] ;
1334:     n      = a->i[i+1] - a->i[i];
1335:     alpha1 = x[9*i];
1336:     alpha2 = x[9*i+1];
1337:     alpha3 = x[9*i+2];
1338:     alpha4 = x[9*i+3];
1339:     alpha5 = x[9*i+4];
1340:     alpha6 = x[9*i+5];
1341:     alpha7 = x[9*i+6];
1342:     alpha8 = x[9*i+7];
1343:     alpha9 = x[9*i+8];
1344:     while (n-->0) {
1345:       y[9*(*idx)]   += alpha1*(*v);
1346:       y[9*(*idx)+1] += alpha2*(*v);
1347:       y[9*(*idx)+2] += alpha3*(*v);
1348:       y[9*(*idx)+3] += alpha4*(*v);
1349:       y[9*(*idx)+4] += alpha5*(*v);
1350:       y[9*(*idx)+5] += alpha6*(*v);
1351:       y[9*(*idx)+6] += alpha7*(*v);
1352:       y[9*(*idx)+7] += alpha8*(*v);
1353:       y[9*(*idx)+8] += alpha9*(*v);
1354:       idx++; v++;
1355:     }
1356:   }
1357:   PetscLogFlops(18*a->nz);
1358:   VecRestoreArray(xx,&x);
1359:   VecRestoreArray(zz,&y);
1360:   return(0);
1361: }

1363: /*--------------------------------------------------------------------------------------------*/
1366: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1367: {
1368:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1369:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1370:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1371:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1373:   PetscInt       m = b->AIJ->m,*idx,*ii;
1374:   PetscInt       n,i,jrow,j;

1377:   VecGetArray(xx,&x);
1378:   VecGetArray(yy,&y);
1379:   idx  = a->j;
1380:   v    = a->a;
1381:   ii   = a->i;

1383:   for (i=0; i<m; i++) {
1384:     jrow = ii[i];
1385:     n    = ii[i+1] - jrow;
1386:     sum1  = 0.0;
1387:     sum2  = 0.0;
1388:     sum3  = 0.0;
1389:     sum4  = 0.0;
1390:     sum5  = 0.0;
1391:     sum6  = 0.0;
1392:     sum7  = 0.0;
1393:     sum8  = 0.0;
1394:     sum9  = 0.0;
1395:     sum10 = 0.0;
1396:     sum11 = 0.0;
1397:     sum12 = 0.0;
1398:     sum13 = 0.0;
1399:     sum14 = 0.0;
1400:     sum15 = 0.0;
1401:     sum16 = 0.0;
1402:     for (j=0; j<n; j++) {
1403:       sum1  += v[jrow]*x[16*idx[jrow]];
1404:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1405:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1406:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1407:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1408:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1409:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1410:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1411:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1412:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1413:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1414:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1415:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1416:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1417:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1418:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1419:       jrow++;
1420:      }
1421:     y[16*i]    = sum1;
1422:     y[16*i+1]  = sum2;
1423:     y[16*i+2]  = sum3;
1424:     y[16*i+3]  = sum4;
1425:     y[16*i+4]  = sum5;
1426:     y[16*i+5]  = sum6;
1427:     y[16*i+6]  = sum7;
1428:     y[16*i+7]  = sum8;
1429:     y[16*i+8]  = sum9;
1430:     y[16*i+9]  = sum10;
1431:     y[16*i+10] = sum11;
1432:     y[16*i+11] = sum12;
1433:     y[16*i+12] = sum13;
1434:     y[16*i+13] = sum14;
1435:     y[16*i+14] = sum15;
1436:     y[16*i+15] = sum16;
1437:   }

1439:   PetscLogFlops(32*a->nz - 16*m);
1440:   VecRestoreArray(xx,&x);
1441:   VecRestoreArray(yy,&y);
1442:   return(0);
1443: }

1447: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1448: {
1449:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1450:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1451:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1452:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1454:   PetscInt       m = b->AIJ->m,n,i,*idx;

1457:   VecSet(&zero,yy);
1458:   VecGetArray(xx,&x);
1459:   VecGetArray(yy,&y);

1461:   for (i=0; i<m; i++) {
1462:     idx    = a->j + a->i[i] ;
1463:     v      = a->a + a->i[i] ;
1464:     n      = a->i[i+1] - a->i[i];
1465:     alpha1  = x[16*i];
1466:     alpha2  = x[16*i+1];
1467:     alpha3  = x[16*i+2];
1468:     alpha4  = x[16*i+3];
1469:     alpha5  = x[16*i+4];
1470:     alpha6  = x[16*i+5];
1471:     alpha7  = x[16*i+6];
1472:     alpha8  = x[16*i+7];
1473:     alpha9  = x[16*i+8];
1474:     alpha10 = x[16*i+9];
1475:     alpha11 = x[16*i+10];
1476:     alpha12 = x[16*i+11];
1477:     alpha13 = x[16*i+12];
1478:     alpha14 = x[16*i+13];
1479:     alpha15 = x[16*i+14];
1480:     alpha16 = x[16*i+15];
1481:     while (n-->0) {
1482:       y[16*(*idx)]    += alpha1*(*v);
1483:       y[16*(*idx)+1]  += alpha2*(*v);
1484:       y[16*(*idx)+2]  += alpha3*(*v);
1485:       y[16*(*idx)+3]  += alpha4*(*v);
1486:       y[16*(*idx)+4]  += alpha5*(*v);
1487:       y[16*(*idx)+5]  += alpha6*(*v);
1488:       y[16*(*idx)+6]  += alpha7*(*v);
1489:       y[16*(*idx)+7]  += alpha8*(*v);
1490:       y[16*(*idx)+8]  += alpha9*(*v);
1491:       y[16*(*idx)+9]  += alpha10*(*v);
1492:       y[16*(*idx)+10] += alpha11*(*v);
1493:       y[16*(*idx)+11] += alpha12*(*v);
1494:       y[16*(*idx)+12] += alpha13*(*v);
1495:       y[16*(*idx)+13] += alpha14*(*v);
1496:       y[16*(*idx)+14] += alpha15*(*v);
1497:       y[16*(*idx)+15] += alpha16*(*v);
1498:       idx++; v++;
1499:     }
1500:   }
1501:   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1502:   VecRestoreArray(xx,&x);
1503:   VecRestoreArray(yy,&y);
1504:   return(0);
1505: }

1509: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1510: {
1511:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1512:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1513:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1514:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1516:   PetscInt       m = b->AIJ->m,*idx,*ii;
1517:   PetscInt       n,i,jrow,j;

1520:   if (yy != zz) {VecCopy(yy,zz);}
1521:   VecGetArray(xx,&x);
1522:   VecGetArray(zz,&y);
1523:   idx  = a->j;
1524:   v    = a->a;
1525:   ii   = a->i;

1527:   for (i=0; i<m; i++) {
1528:     jrow = ii[i];
1529:     n    = ii[i+1] - jrow;
1530:     sum1  = 0.0;
1531:     sum2  = 0.0;
1532:     sum3  = 0.0;
1533:     sum4  = 0.0;
1534:     sum5  = 0.0;
1535:     sum6  = 0.0;
1536:     sum7  = 0.0;
1537:     sum8  = 0.0;
1538:     sum9  = 0.0;
1539:     sum10 = 0.0;
1540:     sum11 = 0.0;
1541:     sum12 = 0.0;
1542:     sum13 = 0.0;
1543:     sum14 = 0.0;
1544:     sum15 = 0.0;
1545:     sum16 = 0.0;
1546:     for (j=0; j<n; j++) {
1547:       sum1  += v[jrow]*x[16*idx[jrow]];
1548:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1549:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1550:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1551:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1552:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1553:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1554:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1555:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1556:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1557:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1558:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1559:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1560:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1561:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1562:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1563:       jrow++;
1564:      }
1565:     y[16*i]    += sum1;
1566:     y[16*i+1]  += sum2;
1567:     y[16*i+2]  += sum3;
1568:     y[16*i+3]  += sum4;
1569:     y[16*i+4]  += sum5;
1570:     y[16*i+5]  += sum6;
1571:     y[16*i+6]  += sum7;
1572:     y[16*i+7]  += sum8;
1573:     y[16*i+8]  += sum9;
1574:     y[16*i+9]  += sum10;
1575:     y[16*i+10] += sum11;
1576:     y[16*i+11] += sum12;
1577:     y[16*i+12] += sum13;
1578:     y[16*i+13] += sum14;
1579:     y[16*i+14] += sum15;
1580:     y[16*i+15] += sum16;
1581:   }

1583:   PetscLogFlops(32*a->nz);
1584:   VecRestoreArray(xx,&x);
1585:   VecRestoreArray(zz,&y);
1586:   return(0);
1587: }

1591: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1592: {
1593:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1594:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1595:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1596:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1598:   PetscInt       m = b->AIJ->m,n,i,*idx;

1601:   if (yy != zz) {VecCopy(yy,zz);}
1602:   VecGetArray(xx,&x);
1603:   VecGetArray(zz,&y);
1604:   for (i=0; i<m; i++) {
1605:     idx    = a->j + a->i[i] ;
1606:     v      = a->a + a->i[i] ;
1607:     n      = a->i[i+1] - a->i[i];
1608:     alpha1 = x[16*i];
1609:     alpha2 = x[16*i+1];
1610:     alpha3 = x[16*i+2];
1611:     alpha4 = x[16*i+3];
1612:     alpha5 = x[16*i+4];
1613:     alpha6 = x[16*i+5];
1614:     alpha7 = x[16*i+6];
1615:     alpha8 = x[16*i+7];
1616:     alpha9  = x[16*i+8];
1617:     alpha10 = x[16*i+9];
1618:     alpha11 = x[16*i+10];
1619:     alpha12 = x[16*i+11];
1620:     alpha13 = x[16*i+12];
1621:     alpha14 = x[16*i+13];
1622:     alpha15 = x[16*i+14];
1623:     alpha16 = x[16*i+15];
1624:     while (n-->0) {
1625:       y[16*(*idx)]   += alpha1*(*v);
1626:       y[16*(*idx)+1] += alpha2*(*v);
1627:       y[16*(*idx)+2] += alpha3*(*v);
1628:       y[16*(*idx)+3] += alpha4*(*v);
1629:       y[16*(*idx)+4] += alpha5*(*v);
1630:       y[16*(*idx)+5] += alpha6*(*v);
1631:       y[16*(*idx)+6] += alpha7*(*v);
1632:       y[16*(*idx)+7] += alpha8*(*v);
1633:       y[16*(*idx)+8]  += alpha9*(*v);
1634:       y[16*(*idx)+9]  += alpha10*(*v);
1635:       y[16*(*idx)+10] += alpha11*(*v);
1636:       y[16*(*idx)+11] += alpha12*(*v);
1637:       y[16*(*idx)+12] += alpha13*(*v);
1638:       y[16*(*idx)+13] += alpha14*(*v);
1639:       y[16*(*idx)+14] += alpha15*(*v);
1640:       y[16*(*idx)+15] += alpha16*(*v);
1641:       idx++; v++;
1642:     }
1643:   }
1644:   PetscLogFlops(32*a->nz);
1645:   VecRestoreArray(xx,&x);
1646:   VecRestoreArray(zz,&y);
1647:   return(0);
1648: }

1650: /*===================================================================================*/
1653: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1654: {
1655:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

1659:   /* start the scatter */
1660:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1661:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1662:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1663:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1664:   return(0);
1665: }

1669: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1670: {
1671:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

1675:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1676:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1677:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1678:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1679:   return(0);
1680: }

1684: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1685: {
1686:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

1690:   /* start the scatter */
1691:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1692:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1693:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1694:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
1695:   return(0);
1696: }

1700: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1701: {
1702:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

1706:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1707:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1708:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1709:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1710:   return(0);
1711: }

1713:  #include src/mat/impls/aij/seq/aij.h
1716: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A,const MatType newtype,Mat *B)
1717: {
1718:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1719:   Mat               a = b->AIJ;
1720:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
1721:   PetscErrorCode    ierr;
1722:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii;
1723:   PetscInt          *cols;
1724:   PetscScalar       *vals;

1727:   MatGetSize(a,&m,&n);
1728:   PetscMalloc(4*m*sizeof(int),&ilen);
1729:   for (i=0; i<m; i++) {
1730:     nmax = PetscMax(nmax,aij->ilen[i]);
1731:     for (j=0; j<4; j++) {
1732:       ilen[4*i+j] = aij->ilen[i];
1733:     }
1734:   }
1735:   MatCreateSeqAIJ(PETSC_COMM_SELF,4*m,4*n,0,ilen,B);
1736:   MatSetOption(*B,MAT_COLUMNS_SORTED);
1737:   PetscFree(ilen);
1738:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
1739:   ii   = 0;
1740:   for (i=0; i<m; i++) {
1741:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
1742:     for (j=0; j<4; j++) {
1743:       for (k=0; k<ncols; k++) {
1744:         icols[k] = 4*cols[k]+j;
1745:       }
1746:       MatSetValues_SeqAIJ(*B,1,&ii,ncols,icols,vals,INSERT_VALUES);
1747:       ii++;
1748:     }
1749:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
1750:   }
1751:   PetscFree(icols);
1752:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
1753:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
1754:   return(0);
1755: }

1757: /* ---------------------------------------------------------------------------------- */
1758: /*MC
1759:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
1760:   operations for multicomponent problems.  It interpolates each component the same
1761:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
1762:   and MATMPIAIJ for distributed matrices.

1764:   Operations provided:
1765: . MatMult
1766: . MatMultTranspose
1767: . MatMultAdd
1768: . MatMultTransposeAdd

1770:   Level: advanced

1772: M*/
1775: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
1776: {
1778:   PetscMPIInt    size;
1779:   PetscInt       n;
1780:   Mat_MPIMAIJ    *b;
1781:   Mat            B;

1784:   PetscObjectReference((PetscObject)A);

1786:   if (dof == 1) {
1787:     *maij = A;
1788:   } else {
1789:     MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
1790:     B->assembled    = PETSC_TRUE;

1792:     MPI_Comm_size(A->comm,&size);
1793:     if (size == 1) {
1794:       MatSetType(B,MATSEQMAIJ);
1795:       B->ops->destroy = MatDestroy_SeqMAIJ;
1796:       b      = (Mat_MPIMAIJ*)B->data;
1797:       b->dof = dof;
1798:       b->AIJ = A;
1799:       if (dof == 2) {
1800:         B->ops->mult             = MatMult_SeqMAIJ_2;
1801:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1802:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1803:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1804:       } else if (dof == 3) {
1805:         B->ops->mult             = MatMult_SeqMAIJ_3;
1806:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1807:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1808:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1809:       } else if (dof == 4) {
1810:         B->ops->mult             = MatMult_SeqMAIJ_4;
1811:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1812:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1813:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1814:       } else if (dof == 5) {
1815:         B->ops->mult             = MatMult_SeqMAIJ_5;
1816:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1817:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1818:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1819:       } else if (dof == 6) {
1820:         B->ops->mult             = MatMult_SeqMAIJ_6;
1821:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1822:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1823:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1824:       } else if (dof == 8) {
1825:         B->ops->mult             = MatMult_SeqMAIJ_8;
1826:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1827:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1828:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1829:       } else if (dof == 9) {
1830:         B->ops->mult             = MatMult_SeqMAIJ_9;
1831:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
1832:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
1833:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1834:       } else if (dof == 16) {
1835:         B->ops->mult             = MatMult_SeqMAIJ_16;
1836:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
1837:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
1838:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1839:       } else {
1840:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1841:       }
1842:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
1843:     } else {
1844:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1845:       IS         from,to;
1846:       Vec        gvec;
1847:       PetscInt        *garray,i;

1849:       MatSetType(B,MATMPIMAIJ);
1850:       B->ops->destroy = MatDestroy_MPIMAIJ;
1851:       b      = (Mat_MPIMAIJ*)B->data;
1852:       b->dof = dof;
1853:       b->A   = A;
1854:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1855:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

1857:       VecGetSize(mpiaij->lvec,&n);
1858:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);

1860:       /* create two temporary Index sets for build scatter gather */
1861:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
1862:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1863:       ISCreateBlock(A->comm,dof,n,garray,&from);
1864:       PetscFree(garray);
1865:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

1867:       /* create temporary global vector to generate scatter context */
1868:       VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);

1870:       /* generate the scatter context */
1871:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

1873:       ISDestroy(from);
1874:       ISDestroy(to);
1875:       VecDestroy(gvec);

1877:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1878:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1879:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1880:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1881:     }
1882:     *maij = B;
1883:   }
1884:   return(0);
1885: }