Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include src/mat/impls/maij/maij.h
 21:  #include src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   PetscObjectChangeTypeName((PetscObject)A,0);
129:   return(0);
130: }

132: /*MC
133:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
134:   multicomponent problems, interpolating or restricting each component the same way independently.
135:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

137:   Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd

143:   Level: advanced

145: .seealso: MatCreateSeqDense
146: M*/

151: PetscErrorCode  MatCreate_MAIJ(Mat A)
152: {
154:   Mat_MPIMAIJ    *b;
155:   PetscMPIInt    size;

158:   PetscNewLog(A,Mat_MPIMAIJ,&b);
159:   A->data  = (void*)b;
160:   PetscMemzero(A->ops,sizeof(struct _MatOps));
161:   A->factor           = 0;
162:   A->mapping          = 0;

164:   b->AIJ  = 0;
165:   b->dof  = 0;
166:   b->OAIJ = 0;
167:   b->ctx  = 0;
168:   b->w    = 0;
169:   MPI_Comm_size(((PetscObject)A)->comm,&size);
170:   if (size == 1){
171:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
172:   } else {
173:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
174:   }
175:   return(0);
176: }

179: /* --------------------------------------------------------------------------------------*/
182: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186:   PetscScalar    *x,*y,*v,sum1, sum2;
188:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
189:   PetscInt       n,i,jrow,j;

192:   VecGetArray(xx,&x);
193:   VecGetArray(yy,&y);
194:   idx  = a->j;
195:   v    = a->a;
196:   ii   = a->i;

198:   for (i=0; i<m; i++) {
199:     jrow = ii[i];
200:     n    = ii[i+1] - jrow;
201:     sum1  = 0.0;
202:     sum2  = 0.0;
203:     nonzerorow += (n>0);
204:     for (j=0; j<n; j++) {
205:       sum1 += v[jrow]*x[2*idx[jrow]];
206:       sum2 += v[jrow]*x[2*idx[jrow]+1];
207:       jrow++;
208:      }
209:     y[2*i]   = sum1;
210:     y[2*i+1] = sum2;
211:   }

213:   PetscLogFlops(4*a->nz - 2*nonzerorow);
214:   VecRestoreArray(xx,&x);
215:   VecRestoreArray(yy,&y);
216:   return(0);
217: }

221: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
222: {
223:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
224:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
225:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
227:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

230:   VecSet(yy,zero);
231:   VecGetArray(xx,&x);
232:   VecGetArray(yy,&y);
233: 
234:   for (i=0; i<m; i++) {
235:     idx    = a->j + a->i[i] ;
236:     v      = a->a + a->i[i] ;
237:     n      = a->i[i+1] - a->i[i];
238:     alpha1 = x[2*i];
239:     alpha2 = x[2*i+1];
240:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
241:   }
242:   PetscLogFlops(4*a->nz);
243:   VecRestoreArray(xx,&x);
244:   VecRestoreArray(yy,&y);
245:   return(0);
246: }

250: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
251: {
252:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
253:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
254:   PetscScalar    *x,*y,*v,sum1, sum2;
256:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
257:   PetscInt       n,i,jrow,j;

260:   if (yy != zz) {VecCopy(yy,zz);}
261:   VecGetArray(xx,&x);
262:   VecGetArray(zz,&y);
263:   idx  = a->j;
264:   v    = a->a;
265:   ii   = a->i;

267:   for (i=0; i<m; i++) {
268:     jrow = ii[i];
269:     n    = ii[i+1] - jrow;
270:     sum1  = 0.0;
271:     sum2  = 0.0;
272:     for (j=0; j<n; j++) {
273:       sum1 += v[jrow]*x[2*idx[jrow]];
274:       sum2 += v[jrow]*x[2*idx[jrow]+1];
275:       jrow++;
276:      }
277:     y[2*i]   += sum1;
278:     y[2*i+1] += sum2;
279:   }

281:   PetscLogFlops(4*a->nz);
282:   VecRestoreArray(xx,&x);
283:   VecRestoreArray(zz,&y);
284:   return(0);
285: }
288: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
289: {
290:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
291:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
292:   PetscScalar    *x,*y,*v,alpha1,alpha2;
294:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

297:   if (yy != zz) {VecCopy(yy,zz);}
298:   VecGetArray(xx,&x);
299:   VecGetArray(zz,&y);
300: 
301:   for (i=0; i<m; i++) {
302:     idx   = a->j + a->i[i] ;
303:     v     = a->a + a->i[i] ;
304:     n     = a->i[i+1] - a->i[i];
305:     alpha1 = x[2*i];
306:     alpha2 = x[2*i+1];
307:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
308:   }
309:   PetscLogFlops(4*a->nz);
310:   VecRestoreArray(xx,&x);
311:   VecRestoreArray(zz,&y);
312:   return(0);
313: }
314: /* --------------------------------------------------------------------------------------*/
317: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
318: {
319:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
320:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
321:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
323:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
324:   PetscInt       n,i,jrow,j;

327:   VecGetArray(xx,&x);
328:   VecGetArray(yy,&y);
329:   idx  = a->j;
330:   v    = a->a;
331:   ii   = a->i;

333:   for (i=0; i<m; i++) {
334:     jrow = ii[i];
335:     n    = ii[i+1] - jrow;
336:     sum1  = 0.0;
337:     sum2  = 0.0;
338:     sum3  = 0.0;
339:     nonzerorow += (n>0);
340:     for (j=0; j<n; j++) {
341:       sum1 += v[jrow]*x[3*idx[jrow]];
342:       sum2 += v[jrow]*x[3*idx[jrow]+1];
343:       sum3 += v[jrow]*x[3*idx[jrow]+2];
344:       jrow++;
345:      }
346:     y[3*i]   = sum1;
347:     y[3*i+1] = sum2;
348:     y[3*i+2] = sum3;
349:   }

351:   PetscLogFlops(6*a->nz - 3*nonzerorow);
352:   VecRestoreArray(xx,&x);
353:   VecRestoreArray(yy,&y);
354:   return(0);
355: }

359: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
360: {
361:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
362:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
363:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
365:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

368:   VecSet(yy,zero);
369:   VecGetArray(xx,&x);
370:   VecGetArray(yy,&y);
371: 
372:   for (i=0; i<m; i++) {
373:     idx    = a->j + a->i[i];
374:     v      = a->a + a->i[i];
375:     n      = a->i[i+1] - a->i[i];
376:     alpha1 = x[3*i];
377:     alpha2 = x[3*i+1];
378:     alpha3 = x[3*i+2];
379:     while (n-->0) {
380:       y[3*(*idx)]   += alpha1*(*v);
381:       y[3*(*idx)+1] += alpha2*(*v);
382:       y[3*(*idx)+2] += alpha3*(*v);
383:       idx++; v++;
384:     }
385:   }
386:   PetscLogFlops(6*a->nz);
387:   VecRestoreArray(xx,&x);
388:   VecRestoreArray(yy,&y);
389:   return(0);
390: }

394: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
395: {
396:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
397:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
398:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
400:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
401:   PetscInt       n,i,jrow,j;

404:   if (yy != zz) {VecCopy(yy,zz);}
405:   VecGetArray(xx,&x);
406:   VecGetArray(zz,&y);
407:   idx  = a->j;
408:   v    = a->a;
409:   ii   = a->i;

411:   for (i=0; i<m; i++) {
412:     jrow = ii[i];
413:     n    = ii[i+1] - jrow;
414:     sum1  = 0.0;
415:     sum2  = 0.0;
416:     sum3  = 0.0;
417:     for (j=0; j<n; j++) {
418:       sum1 += v[jrow]*x[3*idx[jrow]];
419:       sum2 += v[jrow]*x[3*idx[jrow]+1];
420:       sum3 += v[jrow]*x[3*idx[jrow]+2];
421:       jrow++;
422:      }
423:     y[3*i]   += sum1;
424:     y[3*i+1] += sum2;
425:     y[3*i+2] += sum3;
426:   }

428:   PetscLogFlops(6*a->nz);
429:   VecRestoreArray(xx,&x);
430:   VecRestoreArray(zz,&y);
431:   return(0);
432: }
435: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436: {
437:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
438:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
439:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
441:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

444:   if (yy != zz) {VecCopy(yy,zz);}
445:   VecGetArray(xx,&x);
446:   VecGetArray(zz,&y);
447:   for (i=0; i<m; i++) {
448:     idx    = a->j + a->i[i] ;
449:     v      = a->a + a->i[i] ;
450:     n      = a->i[i+1] - a->i[i];
451:     alpha1 = x[3*i];
452:     alpha2 = x[3*i+1];
453:     alpha3 = x[3*i+2];
454:     while (n-->0) {
455:       y[3*(*idx)]   += alpha1*(*v);
456:       y[3*(*idx)+1] += alpha2*(*v);
457:       y[3*(*idx)+2] += alpha3*(*v);
458:       idx++; v++;
459:     }
460:   }
461:   PetscLogFlops(6*a->nz);
462:   VecRestoreArray(xx,&x);
463:   VecRestoreArray(zz,&y);
464:   return(0);
465: }

467: /* ------------------------------------------------------------------------------*/
470: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
471: {
472:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
473:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
474:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
476:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
477:   PetscInt       n,i,jrow,j;

480:   VecGetArray(xx,&x);
481:   VecGetArray(yy,&y);
482:   idx  = a->j;
483:   v    = a->a;
484:   ii   = a->i;

486:   for (i=0; i<m; i++) {
487:     jrow = ii[i];
488:     n    = ii[i+1] - jrow;
489:     sum1  = 0.0;
490:     sum2  = 0.0;
491:     sum3  = 0.0;
492:     sum4  = 0.0;
493:     nonzerorow += (n>0);
494:     for (j=0; j<n; j++) {
495:       sum1 += v[jrow]*x[4*idx[jrow]];
496:       sum2 += v[jrow]*x[4*idx[jrow]+1];
497:       sum3 += v[jrow]*x[4*idx[jrow]+2];
498:       sum4 += v[jrow]*x[4*idx[jrow]+3];
499:       jrow++;
500:      }
501:     y[4*i]   = sum1;
502:     y[4*i+1] = sum2;
503:     y[4*i+2] = sum3;
504:     y[4*i+3] = sum4;
505:   }

507:   PetscLogFlops(8*a->nz - 4*nonzerorow);
508:   VecRestoreArray(xx,&x);
509:   VecRestoreArray(yy,&y);
510:   return(0);
511: }

515: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
516: {
517:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
518:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
519:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
521:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

524:   VecSet(yy,zero);
525:   VecGetArray(xx,&x);
526:   VecGetArray(yy,&y);
527:   for (i=0; i<m; i++) {
528:     idx    = a->j + a->i[i] ;
529:     v      = a->a + a->i[i] ;
530:     n      = a->i[i+1] - a->i[i];
531:     alpha1 = x[4*i];
532:     alpha2 = x[4*i+1];
533:     alpha3 = x[4*i+2];
534:     alpha4 = x[4*i+3];
535:     while (n-->0) {
536:       y[4*(*idx)]   += alpha1*(*v);
537:       y[4*(*idx)+1] += alpha2*(*v);
538:       y[4*(*idx)+2] += alpha3*(*v);
539:       y[4*(*idx)+3] += alpha4*(*v);
540:       idx++; v++;
541:     }
542:   }
543:   PetscLogFlops(8*a->nz);
544:   VecRestoreArray(xx,&x);
545:   VecRestoreArray(yy,&y);
546:   return(0);
547: }

551: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
552: {
553:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
554:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
555:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
557:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
558:   PetscInt       n,i,jrow,j;

561:   if (yy != zz) {VecCopy(yy,zz);}
562:   VecGetArray(xx,&x);
563:   VecGetArray(zz,&y);
564:   idx  = a->j;
565:   v    = a->a;
566:   ii   = a->i;

568:   for (i=0; i<m; i++) {
569:     jrow = ii[i];
570:     n    = ii[i+1] - jrow;
571:     sum1  = 0.0;
572:     sum2  = 0.0;
573:     sum3  = 0.0;
574:     sum4  = 0.0;
575:     for (j=0; j<n; j++) {
576:       sum1 += v[jrow]*x[4*idx[jrow]];
577:       sum2 += v[jrow]*x[4*idx[jrow]+1];
578:       sum3 += v[jrow]*x[4*idx[jrow]+2];
579:       sum4 += v[jrow]*x[4*idx[jrow]+3];
580:       jrow++;
581:      }
582:     y[4*i]   += sum1;
583:     y[4*i+1] += sum2;
584:     y[4*i+2] += sum3;
585:     y[4*i+3] += sum4;
586:   }

588:   PetscLogFlops(8*a->nz);
589:   VecRestoreArray(xx,&x);
590:   VecRestoreArray(zz,&y);
591:   return(0);
592: }
595: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596: {
597:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
598:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
599:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
601:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

604:   if (yy != zz) {VecCopy(yy,zz);}
605:   VecGetArray(xx,&x);
606:   VecGetArray(zz,&y);
607: 
608:   for (i=0; i<m; i++) {
609:     idx    = a->j + a->i[i] ;
610:     v      = a->a + a->i[i] ;
611:     n      = a->i[i+1] - a->i[i];
612:     alpha1 = x[4*i];
613:     alpha2 = x[4*i+1];
614:     alpha3 = x[4*i+2];
615:     alpha4 = x[4*i+3];
616:     while (n-->0) {
617:       y[4*(*idx)]   += alpha1*(*v);
618:       y[4*(*idx)+1] += alpha2*(*v);
619:       y[4*(*idx)+2] += alpha3*(*v);
620:       y[4*(*idx)+3] += alpha4*(*v);
621:       idx++; v++;
622:     }
623:   }
624:   PetscLogFlops(8*a->nz);
625:   VecRestoreArray(xx,&x);
626:   VecRestoreArray(zz,&y);
627:   return(0);
628: }
629: /* ------------------------------------------------------------------------------*/

633: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634: {
635:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
636:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
637:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
639:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
640:   PetscInt       n,i,jrow,j;

643:   VecGetArray(xx,&x);
644:   VecGetArray(yy,&y);
645:   idx  = a->j;
646:   v    = a->a;
647:   ii   = a->i;

649:   for (i=0; i<m; i++) {
650:     jrow = ii[i];
651:     n    = ii[i+1] - jrow;
652:     sum1  = 0.0;
653:     sum2  = 0.0;
654:     sum3  = 0.0;
655:     sum4  = 0.0;
656:     sum5  = 0.0;
657:     nonzerorow += (n>0);
658:     for (j=0; j<n; j++) {
659:       sum1 += v[jrow]*x[5*idx[jrow]];
660:       sum2 += v[jrow]*x[5*idx[jrow]+1];
661:       sum3 += v[jrow]*x[5*idx[jrow]+2];
662:       sum4 += v[jrow]*x[5*idx[jrow]+3];
663:       sum5 += v[jrow]*x[5*idx[jrow]+4];
664:       jrow++;
665:      }
666:     y[5*i]   = sum1;
667:     y[5*i+1] = sum2;
668:     y[5*i+2] = sum3;
669:     y[5*i+3] = sum4;
670:     y[5*i+4] = sum5;
671:   }

673:   PetscLogFlops(10*a->nz - 5*nonzerorow);
674:   VecRestoreArray(xx,&x);
675:   VecRestoreArray(yy,&y);
676:   return(0);
677: }

681: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
682: {
683:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
684:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
685:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
687:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

690:   VecSet(yy,zero);
691:   VecGetArray(xx,&x);
692:   VecGetArray(yy,&y);
693: 
694:   for (i=0; i<m; i++) {
695:     idx    = a->j + a->i[i] ;
696:     v      = a->a + a->i[i] ;
697:     n      = a->i[i+1] - a->i[i];
698:     alpha1 = x[5*i];
699:     alpha2 = x[5*i+1];
700:     alpha3 = x[5*i+2];
701:     alpha4 = x[5*i+3];
702:     alpha5 = x[5*i+4];
703:     while (n-->0) {
704:       y[5*(*idx)]   += alpha1*(*v);
705:       y[5*(*idx)+1] += alpha2*(*v);
706:       y[5*(*idx)+2] += alpha3*(*v);
707:       y[5*(*idx)+3] += alpha4*(*v);
708:       y[5*(*idx)+4] += alpha5*(*v);
709:       idx++; v++;
710:     }
711:   }
712:   PetscLogFlops(10*a->nz);
713:   VecRestoreArray(xx,&x);
714:   VecRestoreArray(yy,&y);
715:   return(0);
716: }

720: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
721: {
722:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
723:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
724:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
726:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
727:   PetscInt       n,i,jrow,j;

730:   if (yy != zz) {VecCopy(yy,zz);}
731:   VecGetArray(xx,&x);
732:   VecGetArray(zz,&y);
733:   idx  = a->j;
734:   v    = a->a;
735:   ii   = a->i;

737:   for (i=0; i<m; i++) {
738:     jrow = ii[i];
739:     n    = ii[i+1] - jrow;
740:     sum1  = 0.0;
741:     sum2  = 0.0;
742:     sum3  = 0.0;
743:     sum4  = 0.0;
744:     sum5  = 0.0;
745:     for (j=0; j<n; j++) {
746:       sum1 += v[jrow]*x[5*idx[jrow]];
747:       sum2 += v[jrow]*x[5*idx[jrow]+1];
748:       sum3 += v[jrow]*x[5*idx[jrow]+2];
749:       sum4 += v[jrow]*x[5*idx[jrow]+3];
750:       sum5 += v[jrow]*x[5*idx[jrow]+4];
751:       jrow++;
752:      }
753:     y[5*i]   += sum1;
754:     y[5*i+1] += sum2;
755:     y[5*i+2] += sum3;
756:     y[5*i+3] += sum4;
757:     y[5*i+4] += sum5;
758:   }

760:   PetscLogFlops(10*a->nz);
761:   VecRestoreArray(xx,&x);
762:   VecRestoreArray(zz,&y);
763:   return(0);
764: }

768: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
769: {
770:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
771:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
772:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
774:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

777:   if (yy != zz) {VecCopy(yy,zz);}
778:   VecGetArray(xx,&x);
779:   VecGetArray(zz,&y);
780: 
781:   for (i=0; i<m; i++) {
782:     idx    = a->j + a->i[i] ;
783:     v      = a->a + a->i[i] ;
784:     n      = a->i[i+1] - a->i[i];
785:     alpha1 = x[5*i];
786:     alpha2 = x[5*i+1];
787:     alpha3 = x[5*i+2];
788:     alpha4 = x[5*i+3];
789:     alpha5 = x[5*i+4];
790:     while (n-->0) {
791:       y[5*(*idx)]   += alpha1*(*v);
792:       y[5*(*idx)+1] += alpha2*(*v);
793:       y[5*(*idx)+2] += alpha3*(*v);
794:       y[5*(*idx)+3] += alpha4*(*v);
795:       y[5*(*idx)+4] += alpha5*(*v);
796:       idx++; v++;
797:     }
798:   }
799:   PetscLogFlops(10*a->nz);
800:   VecRestoreArray(xx,&x);
801:   VecRestoreArray(zz,&y);
802:   return(0);
803: }

805: /* ------------------------------------------------------------------------------*/
808: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
809: {
810:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
811:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
812:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
814:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
815:   PetscInt       n,i,jrow,j;

818:   VecGetArray(xx,&x);
819:   VecGetArray(yy,&y);
820:   idx  = a->j;
821:   v    = a->a;
822:   ii   = a->i;

824:   for (i=0; i<m; i++) {
825:     jrow = ii[i];
826:     n    = ii[i+1] - jrow;
827:     sum1  = 0.0;
828:     sum2  = 0.0;
829:     sum3  = 0.0;
830:     sum4  = 0.0;
831:     sum5  = 0.0;
832:     sum6  = 0.0;
833:     nonzerorow += (n>0);
834:     for (j=0; j<n; j++) {
835:       sum1 += v[jrow]*x[6*idx[jrow]];
836:       sum2 += v[jrow]*x[6*idx[jrow]+1];
837:       sum3 += v[jrow]*x[6*idx[jrow]+2];
838:       sum4 += v[jrow]*x[6*idx[jrow]+3];
839:       sum5 += v[jrow]*x[6*idx[jrow]+4];
840:       sum6 += v[jrow]*x[6*idx[jrow]+5];
841:       jrow++;
842:      }
843:     y[6*i]   = sum1;
844:     y[6*i+1] = sum2;
845:     y[6*i+2] = sum3;
846:     y[6*i+3] = sum4;
847:     y[6*i+4] = sum5;
848:     y[6*i+5] = sum6;
849:   }

851:   PetscLogFlops(12*a->nz - 6*nonzerorow);
852:   VecRestoreArray(xx,&x);
853:   VecRestoreArray(yy,&y);
854:   return(0);
855: }

859: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
860: {
861:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
862:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
863:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
865:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

868:   VecSet(yy,zero);
869:   VecGetArray(xx,&x);
870:   VecGetArray(yy,&y);

872:   for (i=0; i<m; i++) {
873:     idx    = a->j + a->i[i] ;
874:     v      = a->a + a->i[i] ;
875:     n      = a->i[i+1] - a->i[i];
876:     alpha1 = x[6*i];
877:     alpha2 = x[6*i+1];
878:     alpha3 = x[6*i+2];
879:     alpha4 = x[6*i+3];
880:     alpha5 = x[6*i+4];
881:     alpha6 = x[6*i+5];
882:     while (n-->0) {
883:       y[6*(*idx)]   += alpha1*(*v);
884:       y[6*(*idx)+1] += alpha2*(*v);
885:       y[6*(*idx)+2] += alpha3*(*v);
886:       y[6*(*idx)+3] += alpha4*(*v);
887:       y[6*(*idx)+4] += alpha5*(*v);
888:       y[6*(*idx)+5] += alpha6*(*v);
889:       idx++; v++;
890:     }
891:   }
892:   PetscLogFlops(12*a->nz);
893:   VecRestoreArray(xx,&x);
894:   VecRestoreArray(yy,&y);
895:   return(0);
896: }

900: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
901: {
902:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
903:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
904:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
906:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
907:   PetscInt       n,i,jrow,j;

910:   if (yy != zz) {VecCopy(yy,zz);}
911:   VecGetArray(xx,&x);
912:   VecGetArray(zz,&y);
913:   idx  = a->j;
914:   v    = a->a;
915:   ii   = a->i;

917:   for (i=0; i<m; i++) {
918:     jrow = ii[i];
919:     n    = ii[i+1] - jrow;
920:     sum1  = 0.0;
921:     sum2  = 0.0;
922:     sum3  = 0.0;
923:     sum4  = 0.0;
924:     sum5  = 0.0;
925:     sum6  = 0.0;
926:     for (j=0; j<n; j++) {
927:       sum1 += v[jrow]*x[6*idx[jrow]];
928:       sum2 += v[jrow]*x[6*idx[jrow]+1];
929:       sum3 += v[jrow]*x[6*idx[jrow]+2];
930:       sum4 += v[jrow]*x[6*idx[jrow]+3];
931:       sum5 += v[jrow]*x[6*idx[jrow]+4];
932:       sum6 += v[jrow]*x[6*idx[jrow]+5];
933:       jrow++;
934:      }
935:     y[6*i]   += sum1;
936:     y[6*i+1] += sum2;
937:     y[6*i+2] += sum3;
938:     y[6*i+3] += sum4;
939:     y[6*i+4] += sum5;
940:     y[6*i+5] += sum6;
941:   }

943:   PetscLogFlops(12*a->nz);
944:   VecRestoreArray(xx,&x);
945:   VecRestoreArray(zz,&y);
946:   return(0);
947: }

951: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
952: {
953:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
954:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
955:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
957:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

960:   if (yy != zz) {VecCopy(yy,zz);}
961:   VecGetArray(xx,&x);
962:   VecGetArray(zz,&y);
963: 
964:   for (i=0; i<m; i++) {
965:     idx    = a->j + a->i[i] ;
966:     v      = a->a + a->i[i] ;
967:     n      = a->i[i+1] - a->i[i];
968:     alpha1 = x[6*i];
969:     alpha2 = x[6*i+1];
970:     alpha3 = x[6*i+2];
971:     alpha4 = x[6*i+3];
972:     alpha5 = x[6*i+4];
973:     alpha6 = x[6*i+5];
974:     while (n-->0) {
975:       y[6*(*idx)]   += alpha1*(*v);
976:       y[6*(*idx)+1] += alpha2*(*v);
977:       y[6*(*idx)+2] += alpha3*(*v);
978:       y[6*(*idx)+3] += alpha4*(*v);
979:       y[6*(*idx)+4] += alpha5*(*v);
980:       y[6*(*idx)+5] += alpha6*(*v);
981:       idx++; v++;
982:     }
983:   }
984:   PetscLogFlops(12*a->nz);
985:   VecRestoreArray(xx,&x);
986:   VecRestoreArray(zz,&y);
987:   return(0);
988: }

990: /* ------------------------------------------------------------------------------*/
993: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
994: {
995:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
996:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
997:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
999:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1000:   PetscInt       n,i,jrow,j;

1003:   VecGetArray(xx,&x);
1004:   VecGetArray(yy,&y);
1005:   idx  = a->j;
1006:   v    = a->a;
1007:   ii   = a->i;

1009:   for (i=0; i<m; i++) {
1010:     jrow = ii[i];
1011:     n    = ii[i+1] - jrow;
1012:     sum1  = 0.0;
1013:     sum2  = 0.0;
1014:     sum3  = 0.0;
1015:     sum4  = 0.0;
1016:     sum5  = 0.0;
1017:     sum6  = 0.0;
1018:     sum7  = 0.0;
1019:     nonzerorow += (n>0);
1020:     for (j=0; j<n; j++) {
1021:       sum1 += v[jrow]*x[7*idx[jrow]];
1022:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1023:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1024:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1025:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1026:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1027:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1028:       jrow++;
1029:      }
1030:     y[7*i]   = sum1;
1031:     y[7*i+1] = sum2;
1032:     y[7*i+2] = sum3;
1033:     y[7*i+3] = sum4;
1034:     y[7*i+4] = sum5;
1035:     y[7*i+5] = sum6;
1036:     y[7*i+6] = sum7;
1037:   }

1039:   PetscLogFlops(14*a->nz - 7*nonzerorow);
1040:   VecRestoreArray(xx,&x);
1041:   VecRestoreArray(yy,&y);
1042:   return(0);
1043: }

1047: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1048: {
1049:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1050:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1051:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1053:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1056:   VecSet(yy,zero);
1057:   VecGetArray(xx,&x);
1058:   VecGetArray(yy,&y);

1060:   for (i=0; i<m; i++) {
1061:     idx    = a->j + a->i[i] ;
1062:     v      = a->a + a->i[i] ;
1063:     n      = a->i[i+1] - a->i[i];
1064:     alpha1 = x[7*i];
1065:     alpha2 = x[7*i+1];
1066:     alpha3 = x[7*i+2];
1067:     alpha4 = x[7*i+3];
1068:     alpha5 = x[7*i+4];
1069:     alpha6 = x[7*i+5];
1070:     alpha7 = x[7*i+6];
1071:     while (n-->0) {
1072:       y[7*(*idx)]   += alpha1*(*v);
1073:       y[7*(*idx)+1] += alpha2*(*v);
1074:       y[7*(*idx)+2] += alpha3*(*v);
1075:       y[7*(*idx)+3] += alpha4*(*v);
1076:       y[7*(*idx)+4] += alpha5*(*v);
1077:       y[7*(*idx)+5] += alpha6*(*v);
1078:       y[7*(*idx)+6] += alpha7*(*v);
1079:       idx++; v++;
1080:     }
1081:   }
1082:   PetscLogFlops(14*a->nz);
1083:   VecRestoreArray(xx,&x);
1084:   VecRestoreArray(yy,&y);
1085:   return(0);
1086: }

1090: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1091: {
1092:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1093:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1094:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1096:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1097:   PetscInt       n,i,jrow,j;

1100:   if (yy != zz) {VecCopy(yy,zz);}
1101:   VecGetArray(xx,&x);
1102:   VecGetArray(zz,&y);
1103:   idx  = a->j;
1104:   v    = a->a;
1105:   ii   = a->i;

1107:   for (i=0; i<m; i++) {
1108:     jrow = ii[i];
1109:     n    = ii[i+1] - jrow;
1110:     sum1  = 0.0;
1111:     sum2  = 0.0;
1112:     sum3  = 0.0;
1113:     sum4  = 0.0;
1114:     sum5  = 0.0;
1115:     sum6  = 0.0;
1116:     sum7  = 0.0;
1117:     for (j=0; j<n; j++) {
1118:       sum1 += v[jrow]*x[7*idx[jrow]];
1119:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1120:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1121:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1122:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1123:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1124:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1125:       jrow++;
1126:      }
1127:     y[7*i]   += sum1;
1128:     y[7*i+1] += sum2;
1129:     y[7*i+2] += sum3;
1130:     y[7*i+3] += sum4;
1131:     y[7*i+4] += sum5;
1132:     y[7*i+5] += sum6;
1133:     y[7*i+6] += sum7;
1134:   }

1136:   PetscLogFlops(14*a->nz);
1137:   VecRestoreArray(xx,&x);
1138:   VecRestoreArray(zz,&y);
1139:   return(0);
1140: }

1144: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1145: {
1146:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1147:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1148:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1150:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1153:   if (yy != zz) {VecCopy(yy,zz);}
1154:   VecGetArray(xx,&x);
1155:   VecGetArray(zz,&y);
1156:   for (i=0; i<m; i++) {
1157:     idx    = a->j + a->i[i] ;
1158:     v      = a->a + a->i[i] ;
1159:     n      = a->i[i+1] - a->i[i];
1160:     alpha1 = x[7*i];
1161:     alpha2 = x[7*i+1];
1162:     alpha3 = x[7*i+2];
1163:     alpha4 = x[7*i+3];
1164:     alpha5 = x[7*i+4];
1165:     alpha6 = x[7*i+5];
1166:     alpha7 = x[7*i+6];
1167:     while (n-->0) {
1168:       y[7*(*idx)]   += alpha1*(*v);
1169:       y[7*(*idx)+1] += alpha2*(*v);
1170:       y[7*(*idx)+2] += alpha3*(*v);
1171:       y[7*(*idx)+3] += alpha4*(*v);
1172:       y[7*(*idx)+4] += alpha5*(*v);
1173:       y[7*(*idx)+5] += alpha6*(*v);
1174:       y[7*(*idx)+6] += alpha7*(*v);
1175:       idx++; v++;
1176:     }
1177:   }
1178:   PetscLogFlops(14*a->nz);
1179:   VecRestoreArray(xx,&x);
1180:   VecRestoreArray(zz,&y);
1181:   return(0);
1182: }

1186: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1187: {
1188:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1189:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1190:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1192:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1193:   PetscInt       n,i,jrow,j;

1196:   VecGetArray(xx,&x);
1197:   VecGetArray(yy,&y);
1198:   idx  = a->j;
1199:   v    = a->a;
1200:   ii   = a->i;

1202:   for (i=0; i<m; i++) {
1203:     jrow = ii[i];
1204:     n    = ii[i+1] - jrow;
1205:     sum1  = 0.0;
1206:     sum2  = 0.0;
1207:     sum3  = 0.0;
1208:     sum4  = 0.0;
1209:     sum5  = 0.0;
1210:     sum6  = 0.0;
1211:     sum7  = 0.0;
1212:     sum8  = 0.0;
1213:     nonzerorow += (n>0);
1214:     for (j=0; j<n; j++) {
1215:       sum1 += v[jrow]*x[8*idx[jrow]];
1216:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1217:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1218:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1219:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1220:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1221:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1222:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1223:       jrow++;
1224:      }
1225:     y[8*i]   = sum1;
1226:     y[8*i+1] = sum2;
1227:     y[8*i+2] = sum3;
1228:     y[8*i+3] = sum4;
1229:     y[8*i+4] = sum5;
1230:     y[8*i+5] = sum6;
1231:     y[8*i+6] = sum7;
1232:     y[8*i+7] = sum8;
1233:   }

1235:   PetscLogFlops(16*a->nz - 8*nonzerorow);
1236:   VecRestoreArray(xx,&x);
1237:   VecRestoreArray(yy,&y);
1238:   return(0);
1239: }

1243: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1244: {
1245:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1246:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1247:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1249:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1252:   VecSet(yy,zero);
1253:   VecGetArray(xx,&x);
1254:   VecGetArray(yy,&y);

1256:   for (i=0; i<m; i++) {
1257:     idx    = a->j + a->i[i] ;
1258:     v      = a->a + a->i[i] ;
1259:     n      = a->i[i+1] - a->i[i];
1260:     alpha1 = x[8*i];
1261:     alpha2 = x[8*i+1];
1262:     alpha3 = x[8*i+2];
1263:     alpha4 = x[8*i+3];
1264:     alpha5 = x[8*i+4];
1265:     alpha6 = x[8*i+5];
1266:     alpha7 = x[8*i+6];
1267:     alpha8 = x[8*i+7];
1268:     while (n-->0) {
1269:       y[8*(*idx)]   += alpha1*(*v);
1270:       y[8*(*idx)+1] += alpha2*(*v);
1271:       y[8*(*idx)+2] += alpha3*(*v);
1272:       y[8*(*idx)+3] += alpha4*(*v);
1273:       y[8*(*idx)+4] += alpha5*(*v);
1274:       y[8*(*idx)+5] += alpha6*(*v);
1275:       y[8*(*idx)+6] += alpha7*(*v);
1276:       y[8*(*idx)+7] += alpha8*(*v);
1277:       idx++; v++;
1278:     }
1279:   }
1280:   PetscLogFlops(16*a->nz);
1281:   VecRestoreArray(xx,&x);
1282:   VecRestoreArray(yy,&y);
1283:   return(0);
1284: }

1288: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1289: {
1290:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1291:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1292:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1294:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1295:   PetscInt       n,i,jrow,j;

1298:   if (yy != zz) {VecCopy(yy,zz);}
1299:   VecGetArray(xx,&x);
1300:   VecGetArray(zz,&y);
1301:   idx  = a->j;
1302:   v    = a->a;
1303:   ii   = a->i;

1305:   for (i=0; i<m; i++) {
1306:     jrow = ii[i];
1307:     n    = ii[i+1] - jrow;
1308:     sum1  = 0.0;
1309:     sum2  = 0.0;
1310:     sum3  = 0.0;
1311:     sum4  = 0.0;
1312:     sum5  = 0.0;
1313:     sum6  = 0.0;
1314:     sum7  = 0.0;
1315:     sum8  = 0.0;
1316:     for (j=0; j<n; j++) {
1317:       sum1 += v[jrow]*x[8*idx[jrow]];
1318:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1319:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1320:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1321:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1322:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1323:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1324:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1325:       jrow++;
1326:      }
1327:     y[8*i]   += sum1;
1328:     y[8*i+1] += sum2;
1329:     y[8*i+2] += sum3;
1330:     y[8*i+3] += sum4;
1331:     y[8*i+4] += sum5;
1332:     y[8*i+5] += sum6;
1333:     y[8*i+6] += sum7;
1334:     y[8*i+7] += sum8;
1335:   }

1337:   PetscLogFlops(16*a->nz);
1338:   VecRestoreArray(xx,&x);
1339:   VecRestoreArray(zz,&y);
1340:   return(0);
1341: }

1345: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1346: {
1347:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1348:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1349:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1351:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1354:   if (yy != zz) {VecCopy(yy,zz);}
1355:   VecGetArray(xx,&x);
1356:   VecGetArray(zz,&y);
1357:   for (i=0; i<m; i++) {
1358:     idx    = a->j + a->i[i] ;
1359:     v      = a->a + a->i[i] ;
1360:     n      = a->i[i+1] - a->i[i];
1361:     alpha1 = x[8*i];
1362:     alpha2 = x[8*i+1];
1363:     alpha3 = x[8*i+2];
1364:     alpha4 = x[8*i+3];
1365:     alpha5 = x[8*i+4];
1366:     alpha6 = x[8*i+5];
1367:     alpha7 = x[8*i+6];
1368:     alpha8 = x[8*i+7];
1369:     while (n-->0) {
1370:       y[8*(*idx)]   += alpha1*(*v);
1371:       y[8*(*idx)+1] += alpha2*(*v);
1372:       y[8*(*idx)+2] += alpha3*(*v);
1373:       y[8*(*idx)+3] += alpha4*(*v);
1374:       y[8*(*idx)+4] += alpha5*(*v);
1375:       y[8*(*idx)+5] += alpha6*(*v);
1376:       y[8*(*idx)+6] += alpha7*(*v);
1377:       y[8*(*idx)+7] += alpha8*(*v);
1378:       idx++; v++;
1379:     }
1380:   }
1381:   PetscLogFlops(16*a->nz);
1382:   VecRestoreArray(xx,&x);
1383:   VecRestoreArray(zz,&y);
1384:   return(0);
1385: }

1387: /* ------------------------------------------------------------------------------*/
1390: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1391: {
1392:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1393:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1394:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1396:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1397:   PetscInt       n,i,jrow,j;

1400:   VecGetArray(xx,&x);
1401:   VecGetArray(yy,&y);
1402:   idx  = a->j;
1403:   v    = a->a;
1404:   ii   = a->i;

1406:   for (i=0; i<m; i++) {
1407:     jrow = ii[i];
1408:     n    = ii[i+1] - jrow;
1409:     sum1  = 0.0;
1410:     sum2  = 0.0;
1411:     sum3  = 0.0;
1412:     sum4  = 0.0;
1413:     sum5  = 0.0;
1414:     sum6  = 0.0;
1415:     sum7  = 0.0;
1416:     sum8  = 0.0;
1417:     sum9  = 0.0;
1418:     nonzerorow += (n>0);
1419:     for (j=0; j<n; j++) {
1420:       sum1 += v[jrow]*x[9*idx[jrow]];
1421:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1422:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1423:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1424:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1425:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1426:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1427:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1428:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1429:       jrow++;
1430:      }
1431:     y[9*i]   = sum1;
1432:     y[9*i+1] = sum2;
1433:     y[9*i+2] = sum3;
1434:     y[9*i+3] = sum4;
1435:     y[9*i+4] = sum5;
1436:     y[9*i+5] = sum6;
1437:     y[9*i+6] = sum7;
1438:     y[9*i+7] = sum8;
1439:     y[9*i+8] = sum9;
1440:   }

1442:   PetscLogFlops(18*a->nz - 9*nonzerorow);
1443:   VecRestoreArray(xx,&x);
1444:   VecRestoreArray(yy,&y);
1445:   return(0);
1446: }

1448: /* ------------------------------------------------------------------------------*/

1452: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1453: {
1454:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1455:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1456:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1458:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1461:   VecSet(yy,zero);
1462:   VecGetArray(xx,&x);
1463:   VecGetArray(yy,&y);

1465:   for (i=0; i<m; i++) {
1466:     idx    = a->j + a->i[i] ;
1467:     v      = a->a + a->i[i] ;
1468:     n      = a->i[i+1] - a->i[i];
1469:     alpha1 = x[9*i];
1470:     alpha2 = x[9*i+1];
1471:     alpha3 = x[9*i+2];
1472:     alpha4 = x[9*i+3];
1473:     alpha5 = x[9*i+4];
1474:     alpha6 = x[9*i+5];
1475:     alpha7 = x[9*i+6];
1476:     alpha8 = x[9*i+7];
1477:     alpha9 = x[9*i+8];
1478:     while (n-->0) {
1479:       y[9*(*idx)]   += alpha1*(*v);
1480:       y[9*(*idx)+1] += alpha2*(*v);
1481:       y[9*(*idx)+2] += alpha3*(*v);
1482:       y[9*(*idx)+3] += alpha4*(*v);
1483:       y[9*(*idx)+4] += alpha5*(*v);
1484:       y[9*(*idx)+5] += alpha6*(*v);
1485:       y[9*(*idx)+6] += alpha7*(*v);
1486:       y[9*(*idx)+7] += alpha8*(*v);
1487:       y[9*(*idx)+8] += alpha9*(*v);
1488:       idx++; v++;
1489:     }
1490:   }
1491:   PetscLogFlops(18*a->nz);
1492:   VecRestoreArray(xx,&x);
1493:   VecRestoreArray(yy,&y);
1494:   return(0);
1495: }

1499: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1500: {
1501:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1502:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1503:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1505:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1506:   PetscInt       n,i,jrow,j;

1509:   if (yy != zz) {VecCopy(yy,zz);}
1510:   VecGetArray(xx,&x);
1511:   VecGetArray(zz,&y);
1512:   idx  = a->j;
1513:   v    = a->a;
1514:   ii   = a->i;

1516:   for (i=0; i<m; i++) {
1517:     jrow = ii[i];
1518:     n    = ii[i+1] - jrow;
1519:     sum1  = 0.0;
1520:     sum2  = 0.0;
1521:     sum3  = 0.0;
1522:     sum4  = 0.0;
1523:     sum5  = 0.0;
1524:     sum6  = 0.0;
1525:     sum7  = 0.0;
1526:     sum8  = 0.0;
1527:     sum9  = 0.0;
1528:     for (j=0; j<n; j++) {
1529:       sum1 += v[jrow]*x[9*idx[jrow]];
1530:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1531:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1532:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1533:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1534:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1535:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1536:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1537:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1538:       jrow++;
1539:      }
1540:     y[9*i]   += sum1;
1541:     y[9*i+1] += sum2;
1542:     y[9*i+2] += sum3;
1543:     y[9*i+3] += sum4;
1544:     y[9*i+4] += sum5;
1545:     y[9*i+5] += sum6;
1546:     y[9*i+6] += sum7;
1547:     y[9*i+7] += sum8;
1548:     y[9*i+8] += sum9;
1549:   }

1551:   PetscLogFlops(18*a->nz);
1552:   VecRestoreArray(xx,&x);
1553:   VecRestoreArray(zz,&y);
1554:   return(0);
1555: }

1559: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1560: {
1561:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1562:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1563:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1565:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1568:   if (yy != zz) {VecCopy(yy,zz);}
1569:   VecGetArray(xx,&x);
1570:   VecGetArray(zz,&y);
1571:   for (i=0; i<m; i++) {
1572:     idx    = a->j + a->i[i] ;
1573:     v      = a->a + a->i[i] ;
1574:     n      = a->i[i+1] - a->i[i];
1575:     alpha1 = x[9*i];
1576:     alpha2 = x[9*i+1];
1577:     alpha3 = x[9*i+2];
1578:     alpha4 = x[9*i+3];
1579:     alpha5 = x[9*i+4];
1580:     alpha6 = x[9*i+5];
1581:     alpha7 = x[9*i+6];
1582:     alpha8 = x[9*i+7];
1583:     alpha9 = x[9*i+8];
1584:     while (n-->0) {
1585:       y[9*(*idx)]   += alpha1*(*v);
1586:       y[9*(*idx)+1] += alpha2*(*v);
1587:       y[9*(*idx)+2] += alpha3*(*v);
1588:       y[9*(*idx)+3] += alpha4*(*v);
1589:       y[9*(*idx)+4] += alpha5*(*v);
1590:       y[9*(*idx)+5] += alpha6*(*v);
1591:       y[9*(*idx)+6] += alpha7*(*v);
1592:       y[9*(*idx)+7] += alpha8*(*v);
1593:       y[9*(*idx)+8] += alpha9*(*v);
1594:       idx++; v++;
1595:     }
1596:   }
1597:   PetscLogFlops(18*a->nz);
1598:   VecRestoreArray(xx,&x);
1599:   VecRestoreArray(zz,&y);
1600:   return(0);
1601: }
1602: /*--------------------------------------------------------------------------------------------*/
1605: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1606: {
1607:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1608:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1609:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1611:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1612:   PetscInt       n,i,jrow,j;

1615:   VecGetArray(xx,&x);
1616:   VecGetArray(yy,&y);
1617:   idx  = a->j;
1618:   v    = a->a;
1619:   ii   = a->i;

1621:   for (i=0; i<m; i++) {
1622:     jrow = ii[i];
1623:     n    = ii[i+1] - jrow;
1624:     sum1  = 0.0;
1625:     sum2  = 0.0;
1626:     sum3  = 0.0;
1627:     sum4  = 0.0;
1628:     sum5  = 0.0;
1629:     sum6  = 0.0;
1630:     sum7  = 0.0;
1631:     sum8  = 0.0;
1632:     sum9  = 0.0;
1633:     sum10 = 0.0;
1634:     nonzerorow += (n>0);
1635:     for (j=0; j<n; j++) {
1636:       sum1  += v[jrow]*x[10*idx[jrow]];
1637:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1638:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1639:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1640:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1641:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1642:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1643:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1644:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1645:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1646:       jrow++;
1647:      }
1648:     y[10*i]   = sum1;
1649:     y[10*i+1] = sum2;
1650:     y[10*i+2] = sum3;
1651:     y[10*i+3] = sum4;
1652:     y[10*i+4] = sum5;
1653:     y[10*i+5] = sum6;
1654:     y[10*i+6] = sum7;
1655:     y[10*i+7] = sum8;
1656:     y[10*i+8] = sum9;
1657:     y[10*i+9] = sum10;
1658:   }

1660:   PetscLogFlops(20*a->nz - 10*nonzerorow);
1661:   VecRestoreArray(xx,&x);
1662:   VecRestoreArray(yy,&y);
1663:   return(0);
1664: }

1668: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1669: {
1670:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1671:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1672:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1674:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1675:   PetscInt       n,i,jrow,j;

1678:   if (yy != zz) {VecCopy(yy,zz);}
1679:   VecGetArray(xx,&x);
1680:   VecGetArray(zz,&y);
1681:   idx  = a->j;
1682:   v    = a->a;
1683:   ii   = a->i;

1685:   for (i=0; i<m; i++) {
1686:     jrow = ii[i];
1687:     n    = ii[i+1] - jrow;
1688:     sum1  = 0.0;
1689:     sum2  = 0.0;
1690:     sum3  = 0.0;
1691:     sum4  = 0.0;
1692:     sum5  = 0.0;
1693:     sum6  = 0.0;
1694:     sum7  = 0.0;
1695:     sum8  = 0.0;
1696:     sum9  = 0.0;
1697:     sum10 = 0.0;
1698:     for (j=0; j<n; j++) {
1699:       sum1  += v[jrow]*x[10*idx[jrow]];
1700:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1701:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1702:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1703:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1704:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1705:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1706:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1707:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1708:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1709:       jrow++;
1710:      }
1711:     y[10*i]   += sum1;
1712:     y[10*i+1] += sum2;
1713:     y[10*i+2] += sum3;
1714:     y[10*i+3] += sum4;
1715:     y[10*i+4] += sum5;
1716:     y[10*i+5] += sum6;
1717:     y[10*i+6] += sum7;
1718:     y[10*i+7] += sum8;
1719:     y[10*i+8] += sum9;
1720:     y[10*i+9] += sum10;
1721:   }

1723:   PetscLogFlops(20*a->nz);
1724:   VecRestoreArray(xx,&x);
1725:   VecRestoreArray(yy,&y);
1726:   return(0);
1727: }

1731: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1732: {
1733:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1734:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1735:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1737:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1740:   VecSet(yy,zero);
1741:   VecGetArray(xx,&x);
1742:   VecGetArray(yy,&y);

1744:   for (i=0; i<m; i++) {
1745:     idx    = a->j + a->i[i] ;
1746:     v      = a->a + a->i[i] ;
1747:     n      = a->i[i+1] - a->i[i];
1748:     alpha1 = x[10*i];
1749:     alpha2 = x[10*i+1];
1750:     alpha3 = x[10*i+2];
1751:     alpha4 = x[10*i+3];
1752:     alpha5 = x[10*i+4];
1753:     alpha6 = x[10*i+5];
1754:     alpha7 = x[10*i+6];
1755:     alpha8 = x[10*i+7];
1756:     alpha9 = x[10*i+8];
1757:     alpha10 = x[10*i+9];
1758:     while (n-->0) {
1759:       y[10*(*idx)]   += alpha1*(*v);
1760:       y[10*(*idx)+1] += alpha2*(*v);
1761:       y[10*(*idx)+2] += alpha3*(*v);
1762:       y[10*(*idx)+3] += alpha4*(*v);
1763:       y[10*(*idx)+4] += alpha5*(*v);
1764:       y[10*(*idx)+5] += alpha6*(*v);
1765:       y[10*(*idx)+6] += alpha7*(*v);
1766:       y[10*(*idx)+7] += alpha8*(*v);
1767:       y[10*(*idx)+8] += alpha9*(*v);
1768:       y[10*(*idx)+9] += alpha10*(*v);
1769:       idx++; v++;
1770:     }
1771:   }
1772:   PetscLogFlops(20*a->nz);
1773:   VecRestoreArray(xx,&x);
1774:   VecRestoreArray(yy,&y);
1775:   return(0);
1776: }

1780: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1781: {
1782:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1783:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1784:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1786:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1789:   if (yy != zz) {VecCopy(yy,zz);}
1790:   VecGetArray(xx,&x);
1791:   VecGetArray(zz,&y);
1792:   for (i=0; i<m; i++) {
1793:     idx    = a->j + a->i[i] ;
1794:     v      = a->a + a->i[i] ;
1795:     n      = a->i[i+1] - a->i[i];
1796:     alpha1 = x[10*i];
1797:     alpha2 = x[10*i+1];
1798:     alpha3 = x[10*i+2];
1799:     alpha4 = x[10*i+3];
1800:     alpha5 = x[10*i+4];
1801:     alpha6 = x[10*i+5];
1802:     alpha7 = x[10*i+6];
1803:     alpha8 = x[10*i+7];
1804:     alpha9 = x[10*i+8];
1805:     alpha10 = x[10*i+9];
1806:     while (n-->0) {
1807:       y[10*(*idx)]   += alpha1*(*v);
1808:       y[10*(*idx)+1] += alpha2*(*v);
1809:       y[10*(*idx)+2] += alpha3*(*v);
1810:       y[10*(*idx)+3] += alpha4*(*v);
1811:       y[10*(*idx)+4] += alpha5*(*v);
1812:       y[10*(*idx)+5] += alpha6*(*v);
1813:       y[10*(*idx)+6] += alpha7*(*v);
1814:       y[10*(*idx)+7] += alpha8*(*v);
1815:       y[10*(*idx)+8] += alpha9*(*v);
1816:       y[10*(*idx)+9] += alpha10*(*v);
1817:       idx++; v++;
1818:     }
1819:   }
1820:   PetscLogFlops(20*a->nz);
1821:   VecRestoreArray(xx,&x);
1822:   VecRestoreArray(zz,&y);
1823:   return(0);
1824: }


1827: /*--------------------------------------------------------------------------------------------*/
1830: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1831: {
1832:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1833:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1834:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1835:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1837:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1838:   PetscInt       n,i,jrow,j;

1841:   VecGetArray(xx,&x);
1842:   VecGetArray(yy,&y);
1843:   idx  = a->j;
1844:   v    = a->a;
1845:   ii   = a->i;

1847:   for (i=0; i<m; i++) {
1848:     jrow = ii[i];
1849:     n    = ii[i+1] - jrow;
1850:     sum1  = 0.0;
1851:     sum2  = 0.0;
1852:     sum3  = 0.0;
1853:     sum4  = 0.0;
1854:     sum5  = 0.0;
1855:     sum6  = 0.0;
1856:     sum7  = 0.0;
1857:     sum8  = 0.0;
1858:     sum9  = 0.0;
1859:     sum10 = 0.0;
1860:     sum11 = 0.0;
1861:     sum12 = 0.0;
1862:     sum13 = 0.0;
1863:     sum14 = 0.0;
1864:     sum15 = 0.0;
1865:     sum16 = 0.0;
1866:     nonzerorow += (n>0);
1867:     for (j=0; j<n; j++) {
1868:       sum1  += v[jrow]*x[16*idx[jrow]];
1869:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1870:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1871:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1872:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1873:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1874:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1875:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1876:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1877:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1878:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1879:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1880:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1881:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1882:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1883:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1884:       jrow++;
1885:      }
1886:     y[16*i]    = sum1;
1887:     y[16*i+1]  = sum2;
1888:     y[16*i+2]  = sum3;
1889:     y[16*i+3]  = sum4;
1890:     y[16*i+4]  = sum5;
1891:     y[16*i+5]  = sum6;
1892:     y[16*i+6]  = sum7;
1893:     y[16*i+7]  = sum8;
1894:     y[16*i+8]  = sum9;
1895:     y[16*i+9]  = sum10;
1896:     y[16*i+10] = sum11;
1897:     y[16*i+11] = sum12;
1898:     y[16*i+12] = sum13;
1899:     y[16*i+13] = sum14;
1900:     y[16*i+14] = sum15;
1901:     y[16*i+15] = sum16;
1902:   }

1904:   PetscLogFlops(32*a->nz - 16*nonzerorow);
1905:   VecRestoreArray(xx,&x);
1906:   VecRestoreArray(yy,&y);
1907:   return(0);
1908: }

1912: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1913: {
1914:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1915:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1916:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1917:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1919:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1922:   VecSet(yy,zero);
1923:   VecGetArray(xx,&x);
1924:   VecGetArray(yy,&y);

1926:   for (i=0; i<m; i++) {
1927:     idx    = a->j + a->i[i] ;
1928:     v      = a->a + a->i[i] ;
1929:     n      = a->i[i+1] - a->i[i];
1930:     alpha1  = x[16*i];
1931:     alpha2  = x[16*i+1];
1932:     alpha3  = x[16*i+2];
1933:     alpha4  = x[16*i+3];
1934:     alpha5  = x[16*i+4];
1935:     alpha6  = x[16*i+5];
1936:     alpha7  = x[16*i+6];
1937:     alpha8  = x[16*i+7];
1938:     alpha9  = x[16*i+8];
1939:     alpha10 = x[16*i+9];
1940:     alpha11 = x[16*i+10];
1941:     alpha12 = x[16*i+11];
1942:     alpha13 = x[16*i+12];
1943:     alpha14 = x[16*i+13];
1944:     alpha15 = x[16*i+14];
1945:     alpha16 = x[16*i+15];
1946:     while (n-->0) {
1947:       y[16*(*idx)]    += alpha1*(*v);
1948:       y[16*(*idx)+1]  += alpha2*(*v);
1949:       y[16*(*idx)+2]  += alpha3*(*v);
1950:       y[16*(*idx)+3]  += alpha4*(*v);
1951:       y[16*(*idx)+4]  += alpha5*(*v);
1952:       y[16*(*idx)+5]  += alpha6*(*v);
1953:       y[16*(*idx)+6]  += alpha7*(*v);
1954:       y[16*(*idx)+7]  += alpha8*(*v);
1955:       y[16*(*idx)+8]  += alpha9*(*v);
1956:       y[16*(*idx)+9]  += alpha10*(*v);
1957:       y[16*(*idx)+10] += alpha11*(*v);
1958:       y[16*(*idx)+11] += alpha12*(*v);
1959:       y[16*(*idx)+12] += alpha13*(*v);
1960:       y[16*(*idx)+13] += alpha14*(*v);
1961:       y[16*(*idx)+14] += alpha15*(*v);
1962:       y[16*(*idx)+15] += alpha16*(*v);
1963:       idx++; v++;
1964:     }
1965:   }
1966:   PetscLogFlops(32*a->nz);
1967:   VecRestoreArray(xx,&x);
1968:   VecRestoreArray(yy,&y);
1969:   return(0);
1970: }

1974: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1975: {
1976:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1977:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1978:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1979:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1981:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1982:   PetscInt       n,i,jrow,j;

1985:   if (yy != zz) {VecCopy(yy,zz);}
1986:   VecGetArray(xx,&x);
1987:   VecGetArray(zz,&y);
1988:   idx  = a->j;
1989:   v    = a->a;
1990:   ii   = a->i;

1992:   for (i=0; i<m; i++) {
1993:     jrow = ii[i];
1994:     n    = ii[i+1] - jrow;
1995:     sum1  = 0.0;
1996:     sum2  = 0.0;
1997:     sum3  = 0.0;
1998:     sum4  = 0.0;
1999:     sum5  = 0.0;
2000:     sum6  = 0.0;
2001:     sum7  = 0.0;
2002:     sum8  = 0.0;
2003:     sum9  = 0.0;
2004:     sum10 = 0.0;
2005:     sum11 = 0.0;
2006:     sum12 = 0.0;
2007:     sum13 = 0.0;
2008:     sum14 = 0.0;
2009:     sum15 = 0.0;
2010:     sum16 = 0.0;
2011:     for (j=0; j<n; j++) {
2012:       sum1  += v[jrow]*x[16*idx[jrow]];
2013:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2014:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2015:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2016:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2017:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2018:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2019:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2020:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2021:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2022:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2023:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2024:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2025:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2026:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2027:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2028:       jrow++;
2029:      }
2030:     y[16*i]    += sum1;
2031:     y[16*i+1]  += sum2;
2032:     y[16*i+2]  += sum3;
2033:     y[16*i+3]  += sum4;
2034:     y[16*i+4]  += sum5;
2035:     y[16*i+5]  += sum6;
2036:     y[16*i+6]  += sum7;
2037:     y[16*i+7]  += sum8;
2038:     y[16*i+8]  += sum9;
2039:     y[16*i+9]  += sum10;
2040:     y[16*i+10] += sum11;
2041:     y[16*i+11] += sum12;
2042:     y[16*i+12] += sum13;
2043:     y[16*i+13] += sum14;
2044:     y[16*i+14] += sum15;
2045:     y[16*i+15] += sum16;
2046:   }

2048:   PetscLogFlops(32*a->nz);
2049:   VecRestoreArray(xx,&x);
2050:   VecRestoreArray(zz,&y);
2051:   return(0);
2052: }

2056: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2057: {
2058:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2059:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2060:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2061:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2063:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

2066:   if (yy != zz) {VecCopy(yy,zz);}
2067:   VecGetArray(xx,&x);
2068:   VecGetArray(zz,&y);
2069:   for (i=0; i<m; i++) {
2070:     idx    = a->j + a->i[i] ;
2071:     v      = a->a + a->i[i] ;
2072:     n      = a->i[i+1] - a->i[i];
2073:     alpha1 = x[16*i];
2074:     alpha2 = x[16*i+1];
2075:     alpha3 = x[16*i+2];
2076:     alpha4 = x[16*i+3];
2077:     alpha5 = x[16*i+4];
2078:     alpha6 = x[16*i+5];
2079:     alpha7 = x[16*i+6];
2080:     alpha8 = x[16*i+7];
2081:     alpha9  = x[16*i+8];
2082:     alpha10 = x[16*i+9];
2083:     alpha11 = x[16*i+10];
2084:     alpha12 = x[16*i+11];
2085:     alpha13 = x[16*i+12];
2086:     alpha14 = x[16*i+13];
2087:     alpha15 = x[16*i+14];
2088:     alpha16 = x[16*i+15];
2089:     while (n-->0) {
2090:       y[16*(*idx)]   += alpha1*(*v);
2091:       y[16*(*idx)+1] += alpha2*(*v);
2092:       y[16*(*idx)+2] += alpha3*(*v);
2093:       y[16*(*idx)+3] += alpha4*(*v);
2094:       y[16*(*idx)+4] += alpha5*(*v);
2095:       y[16*(*idx)+5] += alpha6*(*v);
2096:       y[16*(*idx)+6] += alpha7*(*v);
2097:       y[16*(*idx)+7] += alpha8*(*v);
2098:       y[16*(*idx)+8]  += alpha9*(*v);
2099:       y[16*(*idx)+9]  += alpha10*(*v);
2100:       y[16*(*idx)+10] += alpha11*(*v);
2101:       y[16*(*idx)+11] += alpha12*(*v);
2102:       y[16*(*idx)+12] += alpha13*(*v);
2103:       y[16*(*idx)+13] += alpha14*(*v);
2104:       y[16*(*idx)+14] += alpha15*(*v);
2105:       y[16*(*idx)+15] += alpha16*(*v);
2106:       idx++; v++;
2107:     }
2108:   }
2109:   PetscLogFlops(32*a->nz);
2110:   VecRestoreArray(xx,&x);
2111:   VecRestoreArray(zz,&y);
2112:   return(0);
2113: }

2115: /*--------------------------------------------------------------------------------------------*/
2118: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2119: {
2120:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2121:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2122:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2123:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2125:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
2126:   PetscInt       n,i,jrow,j;

2129:   VecGetArray(xx,&x);
2130:   VecGetArray(yy,&y);
2131:   idx  = a->j;
2132:   v    = a->a;
2133:   ii   = a->i;

2135:   for (i=0; i<m; i++) {
2136:     jrow = ii[i];
2137:     n    = ii[i+1] - jrow;
2138:     sum1  = 0.0;
2139:     sum2  = 0.0;
2140:     sum3  = 0.0;
2141:     sum4  = 0.0;
2142:     sum5  = 0.0;
2143:     sum6  = 0.0;
2144:     sum7  = 0.0;
2145:     sum8  = 0.0;
2146:     sum9  = 0.0;
2147:     sum10 = 0.0;
2148:     sum11 = 0.0;
2149:     sum12 = 0.0;
2150:     sum13 = 0.0;
2151:     sum14 = 0.0;
2152:     sum15 = 0.0;
2153:     sum16 = 0.0;
2154:     sum17 = 0.0;
2155:     sum18 = 0.0;
2156:     nonzerorow += (n>0);
2157:     for (j=0; j<n; j++) {
2158:       sum1  += v[jrow]*x[18*idx[jrow]];
2159:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2160:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2161:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2162:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2163:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2164:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2165:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2166:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2167:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2168:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2169:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2170:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2171:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2172:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2173:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2174:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2175:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2176:       jrow++;
2177:      }
2178:     y[18*i]    = sum1;
2179:     y[18*i+1]  = sum2;
2180:     y[18*i+2]  = sum3;
2181:     y[18*i+3]  = sum4;
2182:     y[18*i+4]  = sum5;
2183:     y[18*i+5]  = sum6;
2184:     y[18*i+6]  = sum7;
2185:     y[18*i+7]  = sum8;
2186:     y[18*i+8]  = sum9;
2187:     y[18*i+9]  = sum10;
2188:     y[18*i+10] = sum11;
2189:     y[18*i+11] = sum12;
2190:     y[18*i+12] = sum13;
2191:     y[18*i+13] = sum14;
2192:     y[18*i+14] = sum15;
2193:     y[18*i+15] = sum16;
2194:     y[18*i+16] = sum17;
2195:     y[18*i+17] = sum18;
2196:   }

2198:   PetscLogFlops(36*a->nz - 18*nonzerorow);
2199:   VecRestoreArray(xx,&x);
2200:   VecRestoreArray(yy,&y);
2201:   return(0);
2202: }

2206: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2207: {
2208:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2209:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2210:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2211:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2213:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

2216:   VecSet(yy,zero);
2217:   VecGetArray(xx,&x);
2218:   VecGetArray(yy,&y);

2220:   for (i=0; i<m; i++) {
2221:     idx    = a->j + a->i[i] ;
2222:     v      = a->a + a->i[i] ;
2223:     n      = a->i[i+1] - a->i[i];
2224:     alpha1  = x[18*i];
2225:     alpha2  = x[18*i+1];
2226:     alpha3  = x[18*i+2];
2227:     alpha4  = x[18*i+3];
2228:     alpha5  = x[18*i+4];
2229:     alpha6  = x[18*i+5];
2230:     alpha7  = x[18*i+6];
2231:     alpha8  = x[18*i+7];
2232:     alpha9  = x[18*i+8];
2233:     alpha10 = x[18*i+9];
2234:     alpha11 = x[18*i+10];
2235:     alpha12 = x[18*i+11];
2236:     alpha13 = x[18*i+12];
2237:     alpha14 = x[18*i+13];
2238:     alpha15 = x[18*i+14];
2239:     alpha16 = x[18*i+15];
2240:     alpha17 = x[18*i+16];
2241:     alpha18 = x[18*i+17];
2242:     while (n-->0) {
2243:       y[18*(*idx)]    += alpha1*(*v);
2244:       y[18*(*idx)+1]  += alpha2*(*v);
2245:       y[18*(*idx)+2]  += alpha3*(*v);
2246:       y[18*(*idx)+3]  += alpha4*(*v);
2247:       y[18*(*idx)+4]  += alpha5*(*v);
2248:       y[18*(*idx)+5]  += alpha6*(*v);
2249:       y[18*(*idx)+6]  += alpha7*(*v);
2250:       y[18*(*idx)+7]  += alpha8*(*v);
2251:       y[18*(*idx)+8]  += alpha9*(*v);
2252:       y[18*(*idx)+9]  += alpha10*(*v);
2253:       y[18*(*idx)+10] += alpha11*(*v);
2254:       y[18*(*idx)+11] += alpha12*(*v);
2255:       y[18*(*idx)+12] += alpha13*(*v);
2256:       y[18*(*idx)+13] += alpha14*(*v);
2257:       y[18*(*idx)+14] += alpha15*(*v);
2258:       y[18*(*idx)+15] += alpha16*(*v);
2259:       y[18*(*idx)+16] += alpha17*(*v);
2260:       y[18*(*idx)+17] += alpha18*(*v);
2261:       idx++; v++;
2262:     }
2263:   }
2264:   PetscLogFlops(36*a->nz);
2265:   VecRestoreArray(xx,&x);
2266:   VecRestoreArray(yy,&y);
2267:   return(0);
2268: }

2272: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2273: {
2274:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2275:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2276:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2277:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2279:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
2280:   PetscInt       n,i,jrow,j;

2283:   if (yy != zz) {VecCopy(yy,zz);}
2284:   VecGetArray(xx,&x);
2285:   VecGetArray(zz,&y);
2286:   idx  = a->j;
2287:   v    = a->a;
2288:   ii   = a->i;

2290:   for (i=0; i<m; i++) {
2291:     jrow = ii[i];
2292:     n    = ii[i+1] - jrow;
2293:     sum1  = 0.0;
2294:     sum2  = 0.0;
2295:     sum3  = 0.0;
2296:     sum4  = 0.0;
2297:     sum5  = 0.0;
2298:     sum6  = 0.0;
2299:     sum7  = 0.0;
2300:     sum8  = 0.0;
2301:     sum9  = 0.0;
2302:     sum10 = 0.0;
2303:     sum11 = 0.0;
2304:     sum12 = 0.0;
2305:     sum13 = 0.0;
2306:     sum14 = 0.0;
2307:     sum15 = 0.0;
2308:     sum16 = 0.0;
2309:     sum17 = 0.0;
2310:     sum18 = 0.0;
2311:     for (j=0; j<n; j++) {
2312:       sum1  += v[jrow]*x[18*idx[jrow]];
2313:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2314:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2315:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2316:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2317:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2318:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2319:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2320:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2321:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2322:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2323:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2324:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2325:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2326:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2327:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2328:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2329:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2330:       jrow++;
2331:      }
2332:     y[18*i]    += sum1;
2333:     y[18*i+1]  += sum2;
2334:     y[18*i+2]  += sum3;
2335:     y[18*i+3]  += sum4;
2336:     y[18*i+4]  += sum5;
2337:     y[18*i+5]  += sum6;
2338:     y[18*i+6]  += sum7;
2339:     y[18*i+7]  += sum8;
2340:     y[18*i+8]  += sum9;
2341:     y[18*i+9]  += sum10;
2342:     y[18*i+10] += sum11;
2343:     y[18*i+11] += sum12;
2344:     y[18*i+12] += sum13;
2345:     y[18*i+13] += sum14;
2346:     y[18*i+14] += sum15;
2347:     y[18*i+15] += sum16;
2348:     y[18*i+16] += sum17;
2349:     y[18*i+17] += sum18;
2350:   }

2352:   PetscLogFlops(36*a->nz);
2353:   VecRestoreArray(xx,&x);
2354:   VecRestoreArray(zz,&y);
2355:   return(0);
2356: }

2360: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2361: {
2362:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2363:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2364:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2365:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2367:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

2370:   if (yy != zz) {VecCopy(yy,zz);}
2371:   VecGetArray(xx,&x);
2372:   VecGetArray(zz,&y);
2373:   for (i=0; i<m; i++) {
2374:     idx    = a->j + a->i[i] ;
2375:     v      = a->a + a->i[i] ;
2376:     n      = a->i[i+1] - a->i[i];
2377:     alpha1 = x[18*i];
2378:     alpha2 = x[18*i+1];
2379:     alpha3 = x[18*i+2];
2380:     alpha4 = x[18*i+3];
2381:     alpha5 = x[18*i+4];
2382:     alpha6 = x[18*i+5];
2383:     alpha7 = x[18*i+6];
2384:     alpha8 = x[18*i+7];
2385:     alpha9  = x[18*i+8];
2386:     alpha10 = x[18*i+9];
2387:     alpha11 = x[18*i+10];
2388:     alpha12 = x[18*i+11];
2389:     alpha13 = x[18*i+12];
2390:     alpha14 = x[18*i+13];
2391:     alpha15 = x[18*i+14];
2392:     alpha16 = x[18*i+15];
2393:     alpha17 = x[18*i+16];
2394:     alpha18 = x[18*i+17];
2395:     while (n-->0) {
2396:       y[18*(*idx)]   += alpha1*(*v);
2397:       y[18*(*idx)+1] += alpha2*(*v);
2398:       y[18*(*idx)+2] += alpha3*(*v);
2399:       y[18*(*idx)+3] += alpha4*(*v);
2400:       y[18*(*idx)+4] += alpha5*(*v);
2401:       y[18*(*idx)+5] += alpha6*(*v);
2402:       y[18*(*idx)+6] += alpha7*(*v);
2403:       y[18*(*idx)+7] += alpha8*(*v);
2404:       y[18*(*idx)+8]  += alpha9*(*v);
2405:       y[18*(*idx)+9]  += alpha10*(*v);
2406:       y[18*(*idx)+10] += alpha11*(*v);
2407:       y[18*(*idx)+11] += alpha12*(*v);
2408:       y[18*(*idx)+12] += alpha13*(*v);
2409:       y[18*(*idx)+13] += alpha14*(*v);
2410:       y[18*(*idx)+14] += alpha15*(*v);
2411:       y[18*(*idx)+15] += alpha16*(*v);
2412:       y[18*(*idx)+16] += alpha17*(*v);
2413:       y[18*(*idx)+17] += alpha18*(*v);
2414:       idx++; v++;
2415:     }
2416:   }
2417:   PetscLogFlops(36*a->nz);
2418:   VecRestoreArray(xx,&x);
2419:   VecRestoreArray(zz,&y);
2420:   return(0);
2421: }

2423: /*===================================================================================*/
2426: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2427: {
2428:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2432:   /* start the scatter */
2433:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2434:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2435:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2436:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2437:   return(0);
2438: }

2442: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2443: {
2444:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2448:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2449:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2450:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2451:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2452:   return(0);
2453: }

2457: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2458: {
2459:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2463:   /* start the scatter */
2464:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2465:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2466:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2467:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2468:   return(0);
2469: }

2473: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2474: {
2475:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2479:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2480:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2481:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2482:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2483:   return(0);
2484: }

2486: /* ----------------------------------------------------------------*/
2489: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2490: {
2491:   /* This routine requires testing -- but it's getting better. */
2492:   PetscErrorCode     ierr;
2493:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2494:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2495:   Mat                P=pp->AIJ;
2496:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2497:   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2498:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2499:   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2500:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2501:   MatScalar          *ca;

2504:   /* Start timer */

2507:   /* Get ij structure of P^T */
2508:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2510:   cn = pn*ppdof;
2511:   /* Allocate ci array, arrays for fill computation and */
2512:   /* free space for accumulating nonzero column info */
2513:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2514:   ci[0] = 0;

2516:   /* Work arrays for rows of P^T*A */
2517:   PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2518:   PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2519:   ptasparserow = ptadenserow  + an;
2520:   denserow     = ptasparserow + an;
2521:   sparserow    = denserow     + cn;

2523:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2524:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2525:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2526:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2527:   current_space = free_space;

2529:   /* Determine symbolic info for each row of C: */
2530:   for (i=0;i<pn;i++) {
2531:     ptnzi  = pti[i+1] - pti[i];
2532:     ptJ    = ptj + pti[i];
2533:     for (dof=0;dof<ppdof;dof++) {
2534:       ptanzi = 0;
2535:       /* Determine symbolic row of PtA: */
2536:       for (j=0;j<ptnzi;j++) {
2537:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2538:         arow = ptJ[j]*ppdof + dof;
2539:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2540:         anzj = ai[arow+1] - ai[arow];
2541:         ajj  = aj + ai[arow];
2542:         for (k=0;k<anzj;k++) {
2543:           if (!ptadenserow[ajj[k]]) {
2544:             ptadenserow[ajj[k]]    = -1;
2545:             ptasparserow[ptanzi++] = ajj[k];
2546:           }
2547:         }
2548:       }
2549:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2550:       ptaj = ptasparserow;
2551:       cnzi   = 0;
2552:       for (j=0;j<ptanzi;j++) {
2553:         /* Get offset within block of P */
2554:         pshift = *ptaj%ppdof;
2555:         /* Get block row of P */
2556:         prow = (*ptaj++)/ppdof; /* integer division */
2557:         /* P has same number of nonzeros per row as the compressed form */
2558:         pnzj = pi[prow+1] - pi[prow];
2559:         pjj  = pj + pi[prow];
2560:         for (k=0;k<pnzj;k++) {
2561:           /* Locations in C are shifted by the offset within the block */
2562:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2563:           if (!denserow[pjj[k]*ppdof+pshift]) {
2564:             denserow[pjj[k]*ppdof+pshift] = -1;
2565:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2566:           }
2567:         }
2568:       }

2570:       /* sort sparserow */
2571:       PetscSortInt(cnzi,sparserow);
2572: 
2573:       /* If free space is not available, make more free space */
2574:       /* Double the amount of total space in the list */
2575:       if (current_space->local_remaining<cnzi) {
2576:         PetscFreeSpaceGet(current_space->total_array_size,&current_space);
2577:       }

2579:       /* Copy data into free space, and zero out denserows */
2580:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2581:       current_space->array           += cnzi;
2582:       current_space->local_used      += cnzi;
2583:       current_space->local_remaining -= cnzi;

2585:       for (j=0;j<ptanzi;j++) {
2586:         ptadenserow[ptasparserow[j]] = 0;
2587:       }
2588:       for (j=0;j<cnzi;j++) {
2589:         denserow[sparserow[j]] = 0;
2590:       }
2591:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2592:       /*        For now, we will recompute what is needed. */
2593:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2594:     }
2595:   }
2596:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2597:   /* Allocate space for cj, initialize cj, and */
2598:   /* destroy list of free space and other temporary array(s) */
2599:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2600:   PetscFreeSpaceContiguous(&free_space,cj);
2601:   PetscFree(ptadenserow);
2602: 
2603:   /* Allocate space for ca */
2604:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2605:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2606: 
2607:   /* put together the new matrix */
2608:   MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);

2610:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2611:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2612:   c          = (Mat_SeqAIJ *)((*C)->data);
2613:   c->free_a  = PETSC_TRUE;
2614:   c->free_ij = PETSC_TRUE;
2615:   c->nonew   = 0;

2617:   /* Clean up. */
2618:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2621:   return(0);
2622: }

2626: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2627: {
2628:   /* This routine requires testing -- first draft only */
2630:   PetscInt       flops=0;
2631:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2632:   Mat            P=pp->AIJ;
2633:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2634:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2635:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2636:   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2637:   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2638:   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2639:   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2640:   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;

2643:   /* Allocate temporary array for storage of one row of A*P */
2644:   PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2645:   PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));

2647:   apj      = (PetscInt *)(apa + cn);
2648:   apjdense = apj + cn;

2650:   /* Clear old values in C */
2651:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

2653:   for (i=0;i<am;i++) {
2654:     /* Form sparse row of A*P */
2655:     anzi  = ai[i+1] - ai[i];
2656:     apnzj = 0;
2657:     for (j=0;j<anzi;j++) {
2658:       /* Get offset within block of P */
2659:       pshift = *aj%ppdof;
2660:       /* Get block row of P */
2661:       prow   = *aj++/ppdof; /* integer division */
2662:       pnzj = pi[prow+1] - pi[prow];
2663:       pjj  = pj + pi[prow];
2664:       paj  = pa + pi[prow];
2665:       for (k=0;k<pnzj;k++) {
2666:         poffset = pjj[k]*ppdof+pshift;
2667:         if (!apjdense[poffset]) {
2668:           apjdense[poffset] = -1;
2669:           apj[apnzj++]      = poffset;
2670:         }
2671:         apa[poffset] += (*aa)*paj[k];
2672:       }
2673:       flops += 2*pnzj;
2674:       aa++;
2675:     }

2677:     /* Sort the j index array for quick sparse axpy. */
2678:     /* Note: a array does not need sorting as it is in dense storage locations. */
2679:     PetscSortInt(apnzj,apj);

2681:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2682:     prow    = i/ppdof; /* integer division */
2683:     pshift  = i%ppdof;
2684:     poffset = pi[prow];
2685:     pnzi = pi[prow+1] - poffset;
2686:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2687:     pJ   = pj+poffset;
2688:     pA   = pa+poffset;
2689:     for (j=0;j<pnzi;j++) {
2690:       crow   = (*pJ)*ppdof+pshift;
2691:       cjj    = cj + ci[crow];
2692:       caj    = ca + ci[crow];
2693:       pJ++;
2694:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2695:       for (k=0,nextap=0;nextap<apnzj;k++) {
2696:         if (cjj[k]==apj[nextap]) {
2697:           caj[k] += (*pA)*apa[apj[nextap++]];
2698:         }
2699:       }
2700:       flops += 2*apnzj;
2701:       pA++;
2702:     }

2704:     /* Zero the current row info for A*P */
2705:     for (j=0;j<apnzj;j++) {
2706:       apa[apj[j]]      = 0.;
2707:       apjdense[apj[j]] = 0;
2708:     }
2709:   }

2711:   /* Assemble the final matrix and clean up */
2712:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2713:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2714:   PetscFree(apa);
2715:   PetscLogFlops(flops);
2716:   return(0);
2717: }

2721: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2722: {
2723:   PetscErrorCode    ierr;

2726:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2727:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2728:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2729:   return(0);
2730: }

2734: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2735: {
2737:   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2738:   return(0);
2739: }

2744: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2745: {
2746:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2747:   Mat               a = b->AIJ,B;
2748:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2749:   PetscErrorCode    ierr;
2750:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2751:   PetscInt          *cols;
2752:   PetscScalar       *vals;

2755:   MatGetSize(a,&m,&n);
2756:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2757:   for (i=0; i<m; i++) {
2758:     nmax = PetscMax(nmax,aij->ilen[i]);
2759:     for (j=0; j<dof; j++) {
2760:       ilen[dof*i+j] = aij->ilen[i];
2761:     }
2762:   }
2763:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2764:   PetscFree(ilen);
2765:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
2766:   ii   = 0;
2767:   for (i=0; i<m; i++) {
2768:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2769:     for (j=0; j<dof; j++) {
2770:       for (k=0; k<ncols; k++) {
2771:         icols[k] = dof*cols[k]+j;
2772:       }
2773:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2774:       ii++;
2775:     }
2776:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2777:   }
2778:   PetscFree(icols);
2779:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2780:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2782:   if (reuse == MAT_REUSE_MATRIX) {
2783:     MatHeaderReplace(A,B);
2784:   } else {
2785:     *newmat = B;
2786:   }
2787:   return(0);
2788: }

2791:  #include src/mat/impls/aij/mpi/mpiaij.h

2796: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2797: {
2798:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2799:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2800:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2801:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2802:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2803:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2804:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2805:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2806:   PetscInt          rstart,cstart,*garray,ii,k;
2807:   PetscErrorCode    ierr;
2808:   PetscScalar       *vals,*ovals;

2811:   PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2812:   for (i=0; i<A->rmap.n/dof; i++) {
2813:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2814:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2815:     for (j=0; j<dof; j++) {
2816:       dnz[dof*i+j] = AIJ->ilen[i];
2817:       onz[dof*i+j] = OAIJ->ilen[i];
2818:     }
2819:   }
2820:   MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2821:   PetscFree2(dnz,onz);

2823:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2824:   rstart = dof*maij->A->rmap.rstart;
2825:   cstart = dof*maij->A->cmap.rstart;
2826:   garray = mpiaij->garray;

2828:   ii = rstart;
2829:   for (i=0; i<A->rmap.n/dof; i++) {
2830:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2831:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2832:     for (j=0; j<dof; j++) {
2833:       for (k=0; k<ncols; k++) {
2834:         icols[k] = cstart + dof*cols[k]+j;
2835:       }
2836:       for (k=0; k<oncols; k++) {
2837:         oicols[k] = dof*garray[ocols[k]]+j;
2838:       }
2839:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2840:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2841:       ii++;
2842:     }
2843:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2844:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2845:   }
2846:   PetscFree2(icols,oicols);

2848:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2849:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2851:   if (reuse == MAT_REUSE_MATRIX) {
2852:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
2853:     ((PetscObject)A)->refct = 1;
2854:     MatHeaderReplace(A,B);
2855:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2856:   } else {
2857:     *newmat = B;
2858:   }
2859:   return(0);
2860: }


2864: /* ---------------------------------------------------------------------------------- */
2865: /*MC
2866:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
2867:   operations for multicomponent problems.  It interpolates each component the same
2868:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2869:   and MATMPIAIJ for distributed matrices.

2871:   Operations provided:
2872: + MatMult
2873: . MatMultTranspose
2874: . MatMultAdd
2875: . MatMultTransposeAdd
2876: - MatView

2878:   Level: advanced

2880: M*/
2883: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2884: {
2886:   PetscMPIInt    size;
2887:   PetscInt       n;
2888:   Mat_MPIMAIJ    *b;
2889:   Mat            B;

2892:   PetscObjectReference((PetscObject)A);

2894:   if (dof == 1) {
2895:     *maij = A;
2896:   } else {
2897:     MatCreate(((PetscObject)A)->comm,&B);
2898:     MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2899:     B->assembled    = PETSC_TRUE;

2901:     MPI_Comm_size(((PetscObject)A)->comm,&size);
2902:     if (size == 1) {
2903:       MatSetType(B,MATSEQMAIJ);
2904:       B->ops->destroy = MatDestroy_SeqMAIJ;
2905:       B->ops->view    = MatView_SeqMAIJ;
2906:       b      = (Mat_MPIMAIJ*)B->data;
2907:       b->dof = dof;
2908:       b->AIJ = A;
2909:       if (dof == 2) {
2910:         B->ops->mult             = MatMult_SeqMAIJ_2;
2911:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2912:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2913:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2914:       } else if (dof == 3) {
2915:         B->ops->mult             = MatMult_SeqMAIJ_3;
2916:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2917:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2918:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2919:       } else if (dof == 4) {
2920:         B->ops->mult             = MatMult_SeqMAIJ_4;
2921:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2922:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2923:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2924:       } else if (dof == 5) {
2925:         B->ops->mult             = MatMult_SeqMAIJ_5;
2926:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2927:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2928:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2929:       } else if (dof == 6) {
2930:         B->ops->mult             = MatMult_SeqMAIJ_6;
2931:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2932:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2933:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2934:       } else if (dof == 7) {
2935:         B->ops->mult             = MatMult_SeqMAIJ_7;
2936:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2937:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2938:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2939:       } else if (dof == 8) {
2940:         B->ops->mult             = MatMult_SeqMAIJ_8;
2941:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2942:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2943:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2944:       } else if (dof == 9) {
2945:         B->ops->mult             = MatMult_SeqMAIJ_9;
2946:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2947:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2948:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2949:       } else if (dof == 10) {
2950:         B->ops->mult             = MatMult_SeqMAIJ_10;
2951:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2952:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2953:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2954:       } else if (dof == 16) {
2955:         B->ops->mult             = MatMult_SeqMAIJ_16;
2956:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2957:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2958:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2959:       } else if (dof == 18) {
2960:         B->ops->mult             = MatMult_SeqMAIJ_18;
2961:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
2962:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
2963:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
2964:       } else {
2965:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2966:       }
2967:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2968:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2969:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2970:     } else {
2971:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2972:       IS         from,to;
2973:       Vec        gvec;
2974:       PetscInt   *garray,i;

2976:       MatSetType(B,MATMPIMAIJ);
2977:       B->ops->destroy = MatDestroy_MPIMAIJ;
2978:       B->ops->view    = MatView_MPIMAIJ;
2979:       b      = (Mat_MPIMAIJ*)B->data;
2980:       b->dof = dof;
2981:       b->A   = A;
2982:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2983:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

2985:       VecGetSize(mpiaij->lvec,&n);
2986:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2987:       VecSetBlockSize(b->w,dof);

2989:       /* create two temporary Index sets for build scatter gather */
2990:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2991:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2992:       ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
2993:       PetscFree(garray);
2994:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

2996:       /* create temporary global vector to generate scatter context */
2997:       VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap.n,dof*A->cmap.N,PETSC_NULL,&gvec);
2998:       VecSetBlockSize(gvec,dof);

3000:       /* generate the scatter context */
3001:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3003:       ISDestroy(from);
3004:       ISDestroy(to);
3005:       VecDestroy(gvec);

3007:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3008:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3009:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3010:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3011:       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3012:       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3013:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3014:     }
3015:     *maij = B;
3016:     MatView_Private(B);
3017:   }
3018:   return(0);
3019: }