Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include src/mat/impls/maij/maij.h
 21:  #include src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   PetscObjectChangeTypeName((PetscObject)A,0);
129:   return(0);
130: }

132: /*MC
133:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
134:   multicomponent problems, interpolating or restricting each component the same way independently.
135:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

137:   Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd

143:   Level: advanced

145: .seealso: MatCreateSeqDense
146: M*/

151: PetscErrorCode  MatCreate_MAIJ(Mat A)
152: {
154:   Mat_MPIMAIJ    *b;
155:   PetscMPIInt    size;

158:   PetscNewLog(A,Mat_MPIMAIJ,&b);
159:   A->data  = (void*)b;
160:   PetscMemzero(A->ops,sizeof(struct _MatOps));
161:   A->factor           = 0;
162:   A->mapping          = 0;

164:   b->AIJ  = 0;
165:   b->dof  = 0;
166:   b->OAIJ = 0;
167:   b->ctx  = 0;
168:   b->w    = 0;
169:   MPI_Comm_size(((PetscObject)A)->comm,&size);
170:   if (size == 1){
171:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
172:   } else {
173:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
174:   }
175:   return(0);
176: }

179: /* --------------------------------------------------------------------------------------*/
182: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
185:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
186:   PetscScalar    *x,*y,*v,sum1, sum2;
188:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
189:   PetscInt       n,i,jrow,j;

192:   VecGetArray(xx,&x);
193:   VecGetArray(yy,&y);
194:   idx  = a->j;
195:   v    = a->a;
196:   ii   = a->i;

198:   for (i=0; i<m; i++) {
199:     jrow = ii[i];
200:     n    = ii[i+1] - jrow;
201:     sum1  = 0.0;
202:     sum2  = 0.0;
203:     nonzerorow += (n>0);
204:     for (j=0; j<n; j++) {
205:       sum1 += v[jrow]*x[2*idx[jrow]];
206:       sum2 += v[jrow]*x[2*idx[jrow]+1];
207:       jrow++;
208:      }
209:     y[2*i]   = sum1;
210:     y[2*i+1] = sum2;
211:   }

213:   PetscLogFlops(4*a->nz - 2*nonzerorow);
214:   VecRestoreArray(xx,&x);
215:   VecRestoreArray(yy,&y);
216:   return(0);
217: }

221: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
222: {
223:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
224:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
225:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
227:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

230:   VecSet(yy,zero);
231:   VecGetArray(xx,&x);
232:   VecGetArray(yy,&y);
233: 
234:   for (i=0; i<m; i++) {
235:     idx    = a->j + a->i[i] ;
236:     v      = a->a + a->i[i] ;
237:     n      = a->i[i+1] - a->i[i];
238:     alpha1 = x[2*i];
239:     alpha2 = x[2*i+1];
240:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
241:   }
242:   PetscLogFlops(4*a->nz);
243:   VecRestoreArray(xx,&x);
244:   VecRestoreArray(yy,&y);
245:   return(0);
246: }

250: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
251: {
252:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
253:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
254:   PetscScalar    *x,*y,*v,sum1, sum2;
256:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
257:   PetscInt       n,i,jrow,j;

260:   if (yy != zz) {VecCopy(yy,zz);}
261:   VecGetArray(xx,&x);
262:   VecGetArray(zz,&y);
263:   idx  = a->j;
264:   v    = a->a;
265:   ii   = a->i;

267:   for (i=0; i<m; i++) {
268:     jrow = ii[i];
269:     n    = ii[i+1] - jrow;
270:     sum1  = 0.0;
271:     sum2  = 0.0;
272:     for (j=0; j<n; j++) {
273:       sum1 += v[jrow]*x[2*idx[jrow]];
274:       sum2 += v[jrow]*x[2*idx[jrow]+1];
275:       jrow++;
276:      }
277:     y[2*i]   += sum1;
278:     y[2*i+1] += sum2;
279:   }

281:   PetscLogFlops(4*a->nz);
282:   VecRestoreArray(xx,&x);
283:   VecRestoreArray(zz,&y);
284:   return(0);
285: }
288: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
289: {
290:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
291:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
292:   PetscScalar    *x,*y,*v,alpha1,alpha2;
294:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

297:   if (yy != zz) {VecCopy(yy,zz);}
298:   VecGetArray(xx,&x);
299:   VecGetArray(zz,&y);
300: 
301:   for (i=0; i<m; i++) {
302:     idx   = a->j + a->i[i] ;
303:     v     = a->a + a->i[i] ;
304:     n     = a->i[i+1] - a->i[i];
305:     alpha1 = x[2*i];
306:     alpha2 = x[2*i+1];
307:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
308:   }
309:   PetscLogFlops(4*a->nz);
310:   VecRestoreArray(xx,&x);
311:   VecRestoreArray(zz,&y);
312:   return(0);
313: }
314: /* --------------------------------------------------------------------------------------*/
317: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
318: {
319:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
320:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
321:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
323:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
324:   PetscInt       n,i,jrow,j;

327:   VecGetArray(xx,&x);
328:   VecGetArray(yy,&y);
329:   idx  = a->j;
330:   v    = a->a;
331:   ii   = a->i;

333:   for (i=0; i<m; i++) {
334:     jrow = ii[i];
335:     n    = ii[i+1] - jrow;
336:     sum1  = 0.0;
337:     sum2  = 0.0;
338:     sum3  = 0.0;
339:     nonzerorow += (n>0);
340:     for (j=0; j<n; j++) {
341:       sum1 += v[jrow]*x[3*idx[jrow]];
342:       sum2 += v[jrow]*x[3*idx[jrow]+1];
343:       sum3 += v[jrow]*x[3*idx[jrow]+2];
344:       jrow++;
345:      }
346:     y[3*i]   = sum1;
347:     y[3*i+1] = sum2;
348:     y[3*i+2] = sum3;
349:   }

351:   PetscLogFlops(6*a->nz - 3*nonzerorow);
352:   VecRestoreArray(xx,&x);
353:   VecRestoreArray(yy,&y);
354:   return(0);
355: }

359: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
360: {
361:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
362:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
363:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
365:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

368:   VecSet(yy,zero);
369:   VecGetArray(xx,&x);
370:   VecGetArray(yy,&y);
371: 
372:   for (i=0; i<m; i++) {
373:     idx    = a->j + a->i[i];
374:     v      = a->a + a->i[i];
375:     n      = a->i[i+1] - a->i[i];
376:     alpha1 = x[3*i];
377:     alpha2 = x[3*i+1];
378:     alpha3 = x[3*i+2];
379:     while (n-->0) {
380:       y[3*(*idx)]   += alpha1*(*v);
381:       y[3*(*idx)+1] += alpha2*(*v);
382:       y[3*(*idx)+2] += alpha3*(*v);
383:       idx++; v++;
384:     }
385:   }
386:   PetscLogFlops(6*a->nz);
387:   VecRestoreArray(xx,&x);
388:   VecRestoreArray(yy,&y);
389:   return(0);
390: }

394: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
395: {
396:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
397:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
398:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
400:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
401:   PetscInt       n,i,jrow,j;

404:   if (yy != zz) {VecCopy(yy,zz);}
405:   VecGetArray(xx,&x);
406:   VecGetArray(zz,&y);
407:   idx  = a->j;
408:   v    = a->a;
409:   ii   = a->i;

411:   for (i=0; i<m; i++) {
412:     jrow = ii[i];
413:     n    = ii[i+1] - jrow;
414:     sum1  = 0.0;
415:     sum2  = 0.0;
416:     sum3  = 0.0;
417:     for (j=0; j<n; j++) {
418:       sum1 += v[jrow]*x[3*idx[jrow]];
419:       sum2 += v[jrow]*x[3*idx[jrow]+1];
420:       sum3 += v[jrow]*x[3*idx[jrow]+2];
421:       jrow++;
422:      }
423:     y[3*i]   += sum1;
424:     y[3*i+1] += sum2;
425:     y[3*i+2] += sum3;
426:   }

428:   PetscLogFlops(6*a->nz);
429:   VecRestoreArray(xx,&x);
430:   VecRestoreArray(zz,&y);
431:   return(0);
432: }
435: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436: {
437:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
438:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
439:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
441:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

444:   if (yy != zz) {VecCopy(yy,zz);}
445:   VecGetArray(xx,&x);
446:   VecGetArray(zz,&y);
447:   for (i=0; i<m; i++) {
448:     idx    = a->j + a->i[i] ;
449:     v      = a->a + a->i[i] ;
450:     n      = a->i[i+1] - a->i[i];
451:     alpha1 = x[3*i];
452:     alpha2 = x[3*i+1];
453:     alpha3 = x[3*i+2];
454:     while (n-->0) {
455:       y[3*(*idx)]   += alpha1*(*v);
456:       y[3*(*idx)+1] += alpha2*(*v);
457:       y[3*(*idx)+2] += alpha3*(*v);
458:       idx++; v++;
459:     }
460:   }
461:   PetscLogFlops(6*a->nz);
462:   VecRestoreArray(xx,&x);
463:   VecRestoreArray(zz,&y);
464:   return(0);
465: }

467: /* ------------------------------------------------------------------------------*/
470: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
471: {
472:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
473:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
474:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
476:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
477:   PetscInt       n,i,jrow,j;

480:   VecGetArray(xx,&x);
481:   VecGetArray(yy,&y);
482:   idx  = a->j;
483:   v    = a->a;
484:   ii   = a->i;

486:   for (i=0; i<m; i++) {
487:     jrow = ii[i];
488:     n    = ii[i+1] - jrow;
489:     sum1  = 0.0;
490:     sum2  = 0.0;
491:     sum3  = 0.0;
492:     sum4  = 0.0;
493:     nonzerorow += (n>0);
494:     for (j=0; j<n; j++) {
495:       sum1 += v[jrow]*x[4*idx[jrow]];
496:       sum2 += v[jrow]*x[4*idx[jrow]+1];
497:       sum3 += v[jrow]*x[4*idx[jrow]+2];
498:       sum4 += v[jrow]*x[4*idx[jrow]+3];
499:       jrow++;
500:      }
501:     y[4*i]   = sum1;
502:     y[4*i+1] = sum2;
503:     y[4*i+2] = sum3;
504:     y[4*i+3] = sum4;
505:   }

507:   PetscLogFlops(8*a->nz - 4*nonzerorow);
508:   VecRestoreArray(xx,&x);
509:   VecRestoreArray(yy,&y);
510:   return(0);
511: }

515: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
516: {
517:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
518:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
519:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
521:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

524:   VecSet(yy,zero);
525:   VecGetArray(xx,&x);
526:   VecGetArray(yy,&y);
527:   for (i=0; i<m; i++) {
528:     idx    = a->j + a->i[i] ;
529:     v      = a->a + a->i[i] ;
530:     n      = a->i[i+1] - a->i[i];
531:     alpha1 = x[4*i];
532:     alpha2 = x[4*i+1];
533:     alpha3 = x[4*i+2];
534:     alpha4 = x[4*i+3];
535:     while (n-->0) {
536:       y[4*(*idx)]   += alpha1*(*v);
537:       y[4*(*idx)+1] += alpha2*(*v);
538:       y[4*(*idx)+2] += alpha3*(*v);
539:       y[4*(*idx)+3] += alpha4*(*v);
540:       idx++; v++;
541:     }
542:   }
543:   PetscLogFlops(8*a->nz);
544:   VecRestoreArray(xx,&x);
545:   VecRestoreArray(yy,&y);
546:   return(0);
547: }

551: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
552: {
553:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
554:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
555:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
557:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
558:   PetscInt       n,i,jrow,j;

561:   if (yy != zz) {VecCopy(yy,zz);}
562:   VecGetArray(xx,&x);
563:   VecGetArray(zz,&y);
564:   idx  = a->j;
565:   v    = a->a;
566:   ii   = a->i;

568:   for (i=0; i<m; i++) {
569:     jrow = ii[i];
570:     n    = ii[i+1] - jrow;
571:     sum1  = 0.0;
572:     sum2  = 0.0;
573:     sum3  = 0.0;
574:     sum4  = 0.0;
575:     for (j=0; j<n; j++) {
576:       sum1 += v[jrow]*x[4*idx[jrow]];
577:       sum2 += v[jrow]*x[4*idx[jrow]+1];
578:       sum3 += v[jrow]*x[4*idx[jrow]+2];
579:       sum4 += v[jrow]*x[4*idx[jrow]+3];
580:       jrow++;
581:      }
582:     y[4*i]   += sum1;
583:     y[4*i+1] += sum2;
584:     y[4*i+2] += sum3;
585:     y[4*i+3] += sum4;
586:   }

588:   PetscLogFlops(8*a->nz);
589:   VecRestoreArray(xx,&x);
590:   VecRestoreArray(zz,&y);
591:   return(0);
592: }
595: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596: {
597:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
598:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
599:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
601:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

604:   if (yy != zz) {VecCopy(yy,zz);}
605:   VecGetArray(xx,&x);
606:   VecGetArray(zz,&y);
607: 
608:   for (i=0; i<m; i++) {
609:     idx    = a->j + a->i[i] ;
610:     v      = a->a + a->i[i] ;
611:     n      = a->i[i+1] - a->i[i];
612:     alpha1 = x[4*i];
613:     alpha2 = x[4*i+1];
614:     alpha3 = x[4*i+2];
615:     alpha4 = x[4*i+3];
616:     while (n-->0) {
617:       y[4*(*idx)]   += alpha1*(*v);
618:       y[4*(*idx)+1] += alpha2*(*v);
619:       y[4*(*idx)+2] += alpha3*(*v);
620:       y[4*(*idx)+3] += alpha4*(*v);
621:       idx++; v++;
622:     }
623:   }
624:   PetscLogFlops(8*a->nz);
625:   VecRestoreArray(xx,&x);
626:   VecRestoreArray(zz,&y);
627:   return(0);
628: }
629: /* ------------------------------------------------------------------------------*/

633: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634: {
635:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
636:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
637:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
639:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
640:   PetscInt       n,i,jrow,j;

643:   VecGetArray(xx,&x);
644:   VecGetArray(yy,&y);
645:   idx  = a->j;
646:   v    = a->a;
647:   ii   = a->i;

649:   for (i=0; i<m; i++) {
650:     jrow = ii[i];
651:     n    = ii[i+1] - jrow;
652:     sum1  = 0.0;
653:     sum2  = 0.0;
654:     sum3  = 0.0;
655:     sum4  = 0.0;
656:     sum5  = 0.0;
657:     nonzerorow += (n>0);
658:     for (j=0; j<n; j++) {
659:       sum1 += v[jrow]*x[5*idx[jrow]];
660:       sum2 += v[jrow]*x[5*idx[jrow]+1];
661:       sum3 += v[jrow]*x[5*idx[jrow]+2];
662:       sum4 += v[jrow]*x[5*idx[jrow]+3];
663:       sum5 += v[jrow]*x[5*idx[jrow]+4];
664:       jrow++;
665:      }
666:     y[5*i]   = sum1;
667:     y[5*i+1] = sum2;
668:     y[5*i+2] = sum3;
669:     y[5*i+3] = sum4;
670:     y[5*i+4] = sum5;
671:   }

673:   PetscLogFlops(10*a->nz - 5*nonzerorow);
674:   VecRestoreArray(xx,&x);
675:   VecRestoreArray(yy,&y);
676:   return(0);
677: }

681: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
682: {
683:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
684:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
685:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
687:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

690:   VecSet(yy,zero);
691:   VecGetArray(xx,&x);
692:   VecGetArray(yy,&y);
693: 
694:   for (i=0; i<m; i++) {
695:     idx    = a->j + a->i[i] ;
696:     v      = a->a + a->i[i] ;
697:     n      = a->i[i+1] - a->i[i];
698:     alpha1 = x[5*i];
699:     alpha2 = x[5*i+1];
700:     alpha3 = x[5*i+2];
701:     alpha4 = x[5*i+3];
702:     alpha5 = x[5*i+4];
703:     while (n-->0) {
704:       y[5*(*idx)]   += alpha1*(*v);
705:       y[5*(*idx)+1] += alpha2*(*v);
706:       y[5*(*idx)+2] += alpha3*(*v);
707:       y[5*(*idx)+3] += alpha4*(*v);
708:       y[5*(*idx)+4] += alpha5*(*v);
709:       idx++; v++;
710:     }
711:   }
712:   PetscLogFlops(10*a->nz);
713:   VecRestoreArray(xx,&x);
714:   VecRestoreArray(yy,&y);
715:   return(0);
716: }

720: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
721: {
722:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
723:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
724:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
726:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
727:   PetscInt       n,i,jrow,j;

730:   if (yy != zz) {VecCopy(yy,zz);}
731:   VecGetArray(xx,&x);
732:   VecGetArray(zz,&y);
733:   idx  = a->j;
734:   v    = a->a;
735:   ii   = a->i;

737:   for (i=0; i<m; i++) {
738:     jrow = ii[i];
739:     n    = ii[i+1] - jrow;
740:     sum1  = 0.0;
741:     sum2  = 0.0;
742:     sum3  = 0.0;
743:     sum4  = 0.0;
744:     sum5  = 0.0;
745:     for (j=0; j<n; j++) {
746:       sum1 += v[jrow]*x[5*idx[jrow]];
747:       sum2 += v[jrow]*x[5*idx[jrow]+1];
748:       sum3 += v[jrow]*x[5*idx[jrow]+2];
749:       sum4 += v[jrow]*x[5*idx[jrow]+3];
750:       sum5 += v[jrow]*x[5*idx[jrow]+4];
751:       jrow++;
752:      }
753:     y[5*i]   += sum1;
754:     y[5*i+1] += sum2;
755:     y[5*i+2] += sum3;
756:     y[5*i+3] += sum4;
757:     y[5*i+4] += sum5;
758:   }

760:   PetscLogFlops(10*a->nz);
761:   VecRestoreArray(xx,&x);
762:   VecRestoreArray(zz,&y);
763:   return(0);
764: }

768: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
769: {
770:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
771:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
772:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
774:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

777:   if (yy != zz) {VecCopy(yy,zz);}
778:   VecGetArray(xx,&x);
779:   VecGetArray(zz,&y);
780: 
781:   for (i=0; i<m; i++) {
782:     idx    = a->j + a->i[i] ;
783:     v      = a->a + a->i[i] ;
784:     n      = a->i[i+1] - a->i[i];
785:     alpha1 = x[5*i];
786:     alpha2 = x[5*i+1];
787:     alpha3 = x[5*i+2];
788:     alpha4 = x[5*i+3];
789:     alpha5 = x[5*i+4];
790:     while (n-->0) {
791:       y[5*(*idx)]   += alpha1*(*v);
792:       y[5*(*idx)+1] += alpha2*(*v);
793:       y[5*(*idx)+2] += alpha3*(*v);
794:       y[5*(*idx)+3] += alpha4*(*v);
795:       y[5*(*idx)+4] += alpha5*(*v);
796:       idx++; v++;
797:     }
798:   }
799:   PetscLogFlops(10*a->nz);
800:   VecRestoreArray(xx,&x);
801:   VecRestoreArray(zz,&y);
802:   return(0);
803: }

805: /* ------------------------------------------------------------------------------*/
808: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
809: {
810:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
811:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
812:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
814:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
815:   PetscInt       n,i,jrow,j;

818:   VecGetArray(xx,&x);
819:   VecGetArray(yy,&y);
820:   idx  = a->j;
821:   v    = a->a;
822:   ii   = a->i;

824:   for (i=0; i<m; i++) {
825:     jrow = ii[i];
826:     n    = ii[i+1] - jrow;
827:     sum1  = 0.0;
828:     sum2  = 0.0;
829:     sum3  = 0.0;
830:     sum4  = 0.0;
831:     sum5  = 0.0;
832:     sum6  = 0.0;
833:     nonzerorow += (n>0);
834:     for (j=0; j<n; j++) {
835:       sum1 += v[jrow]*x[6*idx[jrow]];
836:       sum2 += v[jrow]*x[6*idx[jrow]+1];
837:       sum3 += v[jrow]*x[6*idx[jrow]+2];
838:       sum4 += v[jrow]*x[6*idx[jrow]+3];
839:       sum5 += v[jrow]*x[6*idx[jrow]+4];
840:       sum6 += v[jrow]*x[6*idx[jrow]+5];
841:       jrow++;
842:      }
843:     y[6*i]   = sum1;
844:     y[6*i+1] = sum2;
845:     y[6*i+2] = sum3;
846:     y[6*i+3] = sum4;
847:     y[6*i+4] = sum5;
848:     y[6*i+5] = sum6;
849:   }

851:   PetscLogFlops(12*a->nz - 6*nonzerorow);
852:   VecRestoreArray(xx,&x);
853:   VecRestoreArray(yy,&y);
854:   return(0);
855: }

859: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
860: {
861:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
862:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
863:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
865:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

868:   VecSet(yy,zero);
869:   VecGetArray(xx,&x);
870:   VecGetArray(yy,&y);

872:   for (i=0; i<m; i++) {
873:     idx    = a->j + a->i[i] ;
874:     v      = a->a + a->i[i] ;
875:     n      = a->i[i+1] - a->i[i];
876:     alpha1 = x[6*i];
877:     alpha2 = x[6*i+1];
878:     alpha3 = x[6*i+2];
879:     alpha4 = x[6*i+3];
880:     alpha5 = x[6*i+4];
881:     alpha6 = x[6*i+5];
882:     while (n-->0) {
883:       y[6*(*idx)]   += alpha1*(*v);
884:       y[6*(*idx)+1] += alpha2*(*v);
885:       y[6*(*idx)+2] += alpha3*(*v);
886:       y[6*(*idx)+3] += alpha4*(*v);
887:       y[6*(*idx)+4] += alpha5*(*v);
888:       y[6*(*idx)+5] += alpha6*(*v);
889:       idx++; v++;
890:     }
891:   }
892:   PetscLogFlops(12*a->nz);
893:   VecRestoreArray(xx,&x);
894:   VecRestoreArray(yy,&y);
895:   return(0);
896: }

900: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
901: {
902:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
903:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
904:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
906:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
907:   PetscInt       n,i,jrow,j;

910:   if (yy != zz) {VecCopy(yy,zz);}
911:   VecGetArray(xx,&x);
912:   VecGetArray(zz,&y);
913:   idx  = a->j;
914:   v    = a->a;
915:   ii   = a->i;

917:   for (i=0; i<m; i++) {
918:     jrow = ii[i];
919:     n    = ii[i+1] - jrow;
920:     sum1  = 0.0;
921:     sum2  = 0.0;
922:     sum3  = 0.0;
923:     sum4  = 0.0;
924:     sum5  = 0.0;
925:     sum6  = 0.0;
926:     for (j=0; j<n; j++) {
927:       sum1 += v[jrow]*x[6*idx[jrow]];
928:       sum2 += v[jrow]*x[6*idx[jrow]+1];
929:       sum3 += v[jrow]*x[6*idx[jrow]+2];
930:       sum4 += v[jrow]*x[6*idx[jrow]+3];
931:       sum5 += v[jrow]*x[6*idx[jrow]+4];
932:       sum6 += v[jrow]*x[6*idx[jrow]+5];
933:       jrow++;
934:      }
935:     y[6*i]   += sum1;
936:     y[6*i+1] += sum2;
937:     y[6*i+2] += sum3;
938:     y[6*i+3] += sum4;
939:     y[6*i+4] += sum5;
940:     y[6*i+5] += sum6;
941:   }

943:   PetscLogFlops(12*a->nz);
944:   VecRestoreArray(xx,&x);
945:   VecRestoreArray(zz,&y);
946:   return(0);
947: }

951: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
952: {
953:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
954:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
955:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
957:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

960:   if (yy != zz) {VecCopy(yy,zz);}
961:   VecGetArray(xx,&x);
962:   VecGetArray(zz,&y);
963: 
964:   for (i=0; i<m; i++) {
965:     idx    = a->j + a->i[i] ;
966:     v      = a->a + a->i[i] ;
967:     n      = a->i[i+1] - a->i[i];
968:     alpha1 = x[6*i];
969:     alpha2 = x[6*i+1];
970:     alpha3 = x[6*i+2];
971:     alpha4 = x[6*i+3];
972:     alpha5 = x[6*i+4];
973:     alpha6 = x[6*i+5];
974:     while (n-->0) {
975:       y[6*(*idx)]   += alpha1*(*v);
976:       y[6*(*idx)+1] += alpha2*(*v);
977:       y[6*(*idx)+2] += alpha3*(*v);
978:       y[6*(*idx)+3] += alpha4*(*v);
979:       y[6*(*idx)+4] += alpha5*(*v);
980:       y[6*(*idx)+5] += alpha6*(*v);
981:       idx++; v++;
982:     }
983:   }
984:   PetscLogFlops(12*a->nz);
985:   VecRestoreArray(xx,&x);
986:   VecRestoreArray(zz,&y);
987:   return(0);
988: }

990: /* ------------------------------------------------------------------------------*/
993: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
994: {
995:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
996:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
997:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
999:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1000:   PetscInt       n,i,jrow,j;

1003:   VecGetArray(xx,&x);
1004:   VecGetArray(yy,&y);
1005:   idx  = a->j;
1006:   v    = a->a;
1007:   ii   = a->i;

1009:   for (i=0; i<m; i++) {
1010:     jrow = ii[i];
1011:     n    = ii[i+1] - jrow;
1012:     sum1  = 0.0;
1013:     sum2  = 0.0;
1014:     sum3  = 0.0;
1015:     sum4  = 0.0;
1016:     sum5  = 0.0;
1017:     sum6  = 0.0;
1018:     sum7  = 0.0;
1019:     nonzerorow += (n>0);
1020:     for (j=0; j<n; j++) {
1021:       sum1 += v[jrow]*x[7*idx[jrow]];
1022:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1023:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1024:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1025:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1026:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1027:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1028:       jrow++;
1029:      }
1030:     y[7*i]   = sum1;
1031:     y[7*i+1] = sum2;
1032:     y[7*i+2] = sum3;
1033:     y[7*i+3] = sum4;
1034:     y[7*i+4] = sum5;
1035:     y[7*i+5] = sum6;
1036:     y[7*i+6] = sum7;
1037:   }

1039:   PetscLogFlops(14*a->nz - 7*nonzerorow);
1040:   VecRestoreArray(xx,&x);
1041:   VecRestoreArray(yy,&y);
1042:   return(0);
1043: }

1047: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1048: {
1049:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1050:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1051:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1053:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1056:   VecSet(yy,zero);
1057:   VecGetArray(xx,&x);
1058:   VecGetArray(yy,&y);

1060:   for (i=0; i<m; i++) {
1061:     idx    = a->j + a->i[i] ;
1062:     v      = a->a + a->i[i] ;
1063:     n      = a->i[i+1] - a->i[i];
1064:     alpha1 = x[7*i];
1065:     alpha2 = x[7*i+1];
1066:     alpha3 = x[7*i+2];
1067:     alpha4 = x[7*i+3];
1068:     alpha5 = x[7*i+4];
1069:     alpha6 = x[7*i+5];
1070:     alpha7 = x[7*i+6];
1071:     while (n-->0) {
1072:       y[7*(*idx)]   += alpha1*(*v);
1073:       y[7*(*idx)+1] += alpha2*(*v);
1074:       y[7*(*idx)+2] += alpha3*(*v);
1075:       y[7*(*idx)+3] += alpha4*(*v);
1076:       y[7*(*idx)+4] += alpha5*(*v);
1077:       y[7*(*idx)+5] += alpha6*(*v);
1078:       y[7*(*idx)+6] += alpha7*(*v);
1079:       idx++; v++;
1080:     }
1081:   }
1082:   PetscLogFlops(14*a->nz);
1083:   VecRestoreArray(xx,&x);
1084:   VecRestoreArray(yy,&y);
1085:   return(0);
1086: }

1090: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1091: {
1092:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1093:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1094:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1096:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1097:   PetscInt       n,i,jrow,j;

1100:   if (yy != zz) {VecCopy(yy,zz);}
1101:   VecGetArray(xx,&x);
1102:   VecGetArray(zz,&y);
1103:   idx  = a->j;
1104:   v    = a->a;
1105:   ii   = a->i;

1107:   for (i=0; i<m; i++) {
1108:     jrow = ii[i];
1109:     n    = ii[i+1] - jrow;
1110:     sum1  = 0.0;
1111:     sum2  = 0.0;
1112:     sum3  = 0.0;
1113:     sum4  = 0.0;
1114:     sum5  = 0.0;
1115:     sum6  = 0.0;
1116:     sum7  = 0.0;
1117:     for (j=0; j<n; j++) {
1118:       sum1 += v[jrow]*x[7*idx[jrow]];
1119:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1120:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1121:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1122:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1123:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1124:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1125:       jrow++;
1126:      }
1127:     y[7*i]   += sum1;
1128:     y[7*i+1] += sum2;
1129:     y[7*i+2] += sum3;
1130:     y[7*i+3] += sum4;
1131:     y[7*i+4] += sum5;
1132:     y[7*i+5] += sum6;
1133:     y[7*i+6] += sum7;
1134:   }

1136:   PetscLogFlops(14*a->nz);
1137:   VecRestoreArray(xx,&x);
1138:   VecRestoreArray(zz,&y);
1139:   return(0);
1140: }

1144: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1145: {
1146:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1147:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1148:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1150:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1153:   if (yy != zz) {VecCopy(yy,zz);}
1154:   VecGetArray(xx,&x);
1155:   VecGetArray(zz,&y);
1156:   for (i=0; i<m; i++) {
1157:     idx    = a->j + a->i[i] ;
1158:     v      = a->a + a->i[i] ;
1159:     n      = a->i[i+1] - a->i[i];
1160:     alpha1 = x[7*i];
1161:     alpha2 = x[7*i+1];
1162:     alpha3 = x[7*i+2];
1163:     alpha4 = x[7*i+3];
1164:     alpha5 = x[7*i+4];
1165:     alpha6 = x[7*i+5];
1166:     alpha7 = x[7*i+6];
1167:     while (n-->0) {
1168:       y[7*(*idx)]   += alpha1*(*v);
1169:       y[7*(*idx)+1] += alpha2*(*v);
1170:       y[7*(*idx)+2] += alpha3*(*v);
1171:       y[7*(*idx)+3] += alpha4*(*v);
1172:       y[7*(*idx)+4] += alpha5*(*v);
1173:       y[7*(*idx)+5] += alpha6*(*v);
1174:       y[7*(*idx)+6] += alpha7*(*v);
1175:       idx++; v++;
1176:     }
1177:   }
1178:   PetscLogFlops(14*a->nz);
1179:   VecRestoreArray(xx,&x);
1180:   VecRestoreArray(zz,&y);
1181:   return(0);
1182: }

1186: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1187: {
1188:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1189:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1190:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1192:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1193:   PetscInt       n,i,jrow,j;

1196:   VecGetArray(xx,&x);
1197:   VecGetArray(yy,&y);
1198:   idx  = a->j;
1199:   v    = a->a;
1200:   ii   = a->i;

1202:   for (i=0; i<m; i++) {
1203:     jrow = ii[i];
1204:     n    = ii[i+1] - jrow;
1205:     sum1  = 0.0;
1206:     sum2  = 0.0;
1207:     sum3  = 0.0;
1208:     sum4  = 0.0;
1209:     sum5  = 0.0;
1210:     sum6  = 0.0;
1211:     sum7  = 0.0;
1212:     sum8  = 0.0;
1213:     nonzerorow += (n>0);
1214:     for (j=0; j<n; j++) {
1215:       sum1 += v[jrow]*x[8*idx[jrow]];
1216:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1217:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1218:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1219:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1220:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1221:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1222:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1223:       jrow++;
1224:      }
1225:     y[8*i]   = sum1;
1226:     y[8*i+1] = sum2;
1227:     y[8*i+2] = sum3;
1228:     y[8*i+3] = sum4;
1229:     y[8*i+4] = sum5;
1230:     y[8*i+5] = sum6;
1231:     y[8*i+6] = sum7;
1232:     y[8*i+7] = sum8;
1233:   }

1235:   PetscLogFlops(16*a->nz - 8*nonzerorow);
1236:   VecRestoreArray(xx,&x);
1237:   VecRestoreArray(yy,&y);
1238:   return(0);
1239: }

1243: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1244: {
1245:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1246:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1247:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1249:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1252:   VecSet(yy,zero);
1253:   VecGetArray(xx,&x);
1254:   VecGetArray(yy,&y);

1256:   for (i=0; i<m; i++) {
1257:     idx    = a->j + a->i[i] ;
1258:     v      = a->a + a->i[i] ;
1259:     n      = a->i[i+1] - a->i[i];
1260:     alpha1 = x[8*i];
1261:     alpha2 = x[8*i+1];
1262:     alpha3 = x[8*i+2];
1263:     alpha4 = x[8*i+3];
1264:     alpha5 = x[8*i+4];
1265:     alpha6 = x[8*i+5];
1266:     alpha7 = x[8*i+6];
1267:     alpha8 = x[8*i+7];
1268:     while (n-->0) {
1269:       y[8*(*idx)]   += alpha1*(*v);
1270:       y[8*(*idx)+1] += alpha2*(*v);
1271:       y[8*(*idx)+2] += alpha3*(*v);
1272:       y[8*(*idx)+3] += alpha4*(*v);
1273:       y[8*(*idx)+4] += alpha5*(*v);
1274:       y[8*(*idx)+5] += alpha6*(*v);
1275:       y[8*(*idx)+6] += alpha7*(*v);
1276:       y[8*(*idx)+7] += alpha8*(*v);
1277:       idx++; v++;
1278:     }
1279:   }
1280:   PetscLogFlops(16*a->nz);
1281:   VecRestoreArray(xx,&x);
1282:   VecRestoreArray(yy,&y);
1283:   return(0);
1284: }

1288: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1289: {
1290:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1291:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1292:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1294:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1295:   PetscInt       n,i,jrow,j;

1298:   if (yy != zz) {VecCopy(yy,zz);}
1299:   VecGetArray(xx,&x);
1300:   VecGetArray(zz,&y);
1301:   idx  = a->j;
1302:   v    = a->a;
1303:   ii   = a->i;

1305:   for (i=0; i<m; i++) {
1306:     jrow = ii[i];
1307:     n    = ii[i+1] - jrow;
1308:     sum1  = 0.0;
1309:     sum2  = 0.0;
1310:     sum3  = 0.0;
1311:     sum4  = 0.0;
1312:     sum5  = 0.0;
1313:     sum6  = 0.0;
1314:     sum7  = 0.0;
1315:     sum8  = 0.0;
1316:     for (j=0; j<n; j++) {
1317:       sum1 += v[jrow]*x[8*idx[jrow]];
1318:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1319:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1320:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1321:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1322:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1323:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1324:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1325:       jrow++;
1326:      }
1327:     y[8*i]   += sum1;
1328:     y[8*i+1] += sum2;
1329:     y[8*i+2] += sum3;
1330:     y[8*i+3] += sum4;
1331:     y[8*i+4] += sum5;
1332:     y[8*i+5] += sum6;
1333:     y[8*i+6] += sum7;
1334:     y[8*i+7] += sum8;
1335:   }

1337:   PetscLogFlops(16*a->nz);
1338:   VecRestoreArray(xx,&x);
1339:   VecRestoreArray(zz,&y);
1340:   return(0);
1341: }

1345: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1346: {
1347:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1348:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1349:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1351:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1354:   if (yy != zz) {VecCopy(yy,zz);}
1355:   VecGetArray(xx,&x);
1356:   VecGetArray(zz,&y);
1357:   for (i=0; i<m; i++) {
1358:     idx    = a->j + a->i[i] ;
1359:     v      = a->a + a->i[i] ;
1360:     n      = a->i[i+1] - a->i[i];
1361:     alpha1 = x[8*i];
1362:     alpha2 = x[8*i+1];
1363:     alpha3 = x[8*i+2];
1364:     alpha4 = x[8*i+3];
1365:     alpha5 = x[8*i+4];
1366:     alpha6 = x[8*i+5];
1367:     alpha7 = x[8*i+6];
1368:     alpha8 = x[8*i+7];
1369:     while (n-->0) {
1370:       y[8*(*idx)]   += alpha1*(*v);
1371:       y[8*(*idx)+1] += alpha2*(*v);
1372:       y[8*(*idx)+2] += alpha3*(*v);
1373:       y[8*(*idx)+3] += alpha4*(*v);
1374:       y[8*(*idx)+4] += alpha5*(*v);
1375:       y[8*(*idx)+5] += alpha6*(*v);
1376:       y[8*(*idx)+6] += alpha7*(*v);
1377:       y[8*(*idx)+7] += alpha8*(*v);
1378:       idx++; v++;
1379:     }
1380:   }
1381:   PetscLogFlops(16*a->nz);
1382:   VecRestoreArray(xx,&x);
1383:   VecRestoreArray(zz,&y);
1384:   return(0);
1385: }

1387: /* ------------------------------------------------------------------------------*/
1390: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1391: {
1392:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1393:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1394:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1396:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1397:   PetscInt       n,i,jrow,j;

1400:   VecGetArray(xx,&x);
1401:   VecGetArray(yy,&y);
1402:   idx  = a->j;
1403:   v    = a->a;
1404:   ii   = a->i;

1406:   for (i=0; i<m; i++) {
1407:     jrow = ii[i];
1408:     n    = ii[i+1] - jrow;
1409:     sum1  = 0.0;
1410:     sum2  = 0.0;
1411:     sum3  = 0.0;
1412:     sum4  = 0.0;
1413:     sum5  = 0.0;
1414:     sum6  = 0.0;
1415:     sum7  = 0.0;
1416:     sum8  = 0.0;
1417:     sum9  = 0.0;
1418:     nonzerorow += (n>0);
1419:     for (j=0; j<n; j++) {
1420:       sum1 += v[jrow]*x[9*idx[jrow]];
1421:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1422:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1423:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1424:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1425:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1426:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1427:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1428:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1429:       jrow++;
1430:      }
1431:     y[9*i]   = sum1;
1432:     y[9*i+1] = sum2;
1433:     y[9*i+2] = sum3;
1434:     y[9*i+3] = sum4;
1435:     y[9*i+4] = sum5;
1436:     y[9*i+5] = sum6;
1437:     y[9*i+6] = sum7;
1438:     y[9*i+7] = sum8;
1439:     y[9*i+8] = sum9;
1440:   }

1442:   PetscLogFlops(18*a->nz - 9*nonzerorow);
1443:   VecRestoreArray(xx,&x);
1444:   VecRestoreArray(yy,&y);
1445:   return(0);
1446: }

1448: /* ------------------------------------------------------------------------------*/

1452: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1453: {
1454:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1455:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1456:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1458:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1461:   VecSet(yy,zero);
1462:   VecGetArray(xx,&x);
1463:   VecGetArray(yy,&y);

1465:   for (i=0; i<m; i++) {
1466:     idx    = a->j + a->i[i] ;
1467:     v      = a->a + a->i[i] ;
1468:     n      = a->i[i+1] - a->i[i];
1469:     alpha1 = x[9*i];
1470:     alpha2 = x[9*i+1];
1471:     alpha3 = x[9*i+2];
1472:     alpha4 = x[9*i+3];
1473:     alpha5 = x[9*i+4];
1474:     alpha6 = x[9*i+5];
1475:     alpha7 = x[9*i+6];
1476:     alpha8 = x[9*i+7];
1477:     alpha9 = x[9*i+8];
1478:     while (n-->0) {
1479:       y[9*(*idx)]   += alpha1*(*v);
1480:       y[9*(*idx)+1] += alpha2*(*v);
1481:       y[9*(*idx)+2] += alpha3*(*v);
1482:       y[9*(*idx)+3] += alpha4*(*v);
1483:       y[9*(*idx)+4] += alpha5*(*v);
1484:       y[9*(*idx)+5] += alpha6*(*v);
1485:       y[9*(*idx)+6] += alpha7*(*v);
1486:       y[9*(*idx)+7] += alpha8*(*v);
1487:       y[9*(*idx)+8] += alpha9*(*v);
1488:       idx++; v++;
1489:     }
1490:   }
1491:   PetscLogFlops(18*a->nz);
1492:   VecRestoreArray(xx,&x);
1493:   VecRestoreArray(yy,&y);
1494:   return(0);
1495: }

1499: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1500: {
1501:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1502:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1503:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1505:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1506:   PetscInt       n,i,jrow,j;

1509:   if (yy != zz) {VecCopy(yy,zz);}
1510:   VecGetArray(xx,&x);
1511:   VecGetArray(zz,&y);
1512:   idx  = a->j;
1513:   v    = a->a;
1514:   ii   = a->i;

1516:   for (i=0; i<m; i++) {
1517:     jrow = ii[i];
1518:     n    = ii[i+1] - jrow;
1519:     sum1  = 0.0;
1520:     sum2  = 0.0;
1521:     sum3  = 0.0;
1522:     sum4  = 0.0;
1523:     sum5  = 0.0;
1524:     sum6  = 0.0;
1525:     sum7  = 0.0;
1526:     sum8  = 0.0;
1527:     sum9  = 0.0;
1528:     for (j=0; j<n; j++) {
1529:       sum1 += v[jrow]*x[9*idx[jrow]];
1530:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1531:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1532:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1533:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1534:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1535:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1536:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1537:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1538:       jrow++;
1539:      }
1540:     y[9*i]   += sum1;
1541:     y[9*i+1] += sum2;
1542:     y[9*i+2] += sum3;
1543:     y[9*i+3] += sum4;
1544:     y[9*i+4] += sum5;
1545:     y[9*i+5] += sum6;
1546:     y[9*i+6] += sum7;
1547:     y[9*i+7] += sum8;
1548:     y[9*i+8] += sum9;
1549:   }

1551:   PetscLogFlops(18*a->nz);
1552:   VecRestoreArray(xx,&x);
1553:   VecRestoreArray(zz,&y);
1554:   return(0);
1555: }

1559: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1560: {
1561:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1562:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1563:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1565:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1568:   if (yy != zz) {VecCopy(yy,zz);}
1569:   VecGetArray(xx,&x);
1570:   VecGetArray(zz,&y);
1571:   for (i=0; i<m; i++) {
1572:     idx    = a->j + a->i[i] ;
1573:     v      = a->a + a->i[i] ;
1574:     n      = a->i[i+1] - a->i[i];
1575:     alpha1 = x[9*i];
1576:     alpha2 = x[9*i+1];
1577:     alpha3 = x[9*i+2];
1578:     alpha4 = x[9*i+3];
1579:     alpha5 = x[9*i+4];
1580:     alpha6 = x[9*i+5];
1581:     alpha7 = x[9*i+6];
1582:     alpha8 = x[9*i+7];
1583:     alpha9 = x[9*i+8];
1584:     while (n-->0) {
1585:       y[9*(*idx)]   += alpha1*(*v);
1586:       y[9*(*idx)+1] += alpha2*(*v);
1587:       y[9*(*idx)+2] += alpha3*(*v);
1588:       y[9*(*idx)+3] += alpha4*(*v);
1589:       y[9*(*idx)+4] += alpha5*(*v);
1590:       y[9*(*idx)+5] += alpha6*(*v);
1591:       y[9*(*idx)+6] += alpha7*(*v);
1592:       y[9*(*idx)+7] += alpha8*(*v);
1593:       y[9*(*idx)+8] += alpha9*(*v);
1594:       idx++; v++;
1595:     }
1596:   }
1597:   PetscLogFlops(18*a->nz);
1598:   VecRestoreArray(xx,&x);
1599:   VecRestoreArray(zz,&y);
1600:   return(0);
1601: }
1602: /*--------------------------------------------------------------------------------------------*/
1605: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1606: {
1607:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1608:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1609:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1611:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1612:   PetscInt       n,i,jrow,j;

1615:   VecGetArray(xx,&x);
1616:   VecGetArray(yy,&y);
1617:   idx  = a->j;
1618:   v    = a->a;
1619:   ii   = a->i;

1621:   for (i=0; i<m; i++) {
1622:     jrow = ii[i];
1623:     n    = ii[i+1] - jrow;
1624:     sum1  = 0.0;
1625:     sum2  = 0.0;
1626:     sum3  = 0.0;
1627:     sum4  = 0.0;
1628:     sum5  = 0.0;
1629:     sum6  = 0.0;
1630:     sum7  = 0.0;
1631:     sum8  = 0.0;
1632:     sum9  = 0.0;
1633:     sum10 = 0.0;
1634:     nonzerorow += (n>0);
1635:     for (j=0; j<n; j++) {
1636:       sum1  += v[jrow]*x[10*idx[jrow]];
1637:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1638:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1639:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1640:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1641:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1642:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1643:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1644:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1645:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1646:       jrow++;
1647:      }
1648:     y[10*i]   = sum1;
1649:     y[10*i+1] = sum2;
1650:     y[10*i+2] = sum3;
1651:     y[10*i+3] = sum4;
1652:     y[10*i+4] = sum5;
1653:     y[10*i+5] = sum6;
1654:     y[10*i+6] = sum7;
1655:     y[10*i+7] = sum8;
1656:     y[10*i+8] = sum9;
1657:     y[10*i+9] = sum10;
1658:   }

1660:   PetscLogFlops(20*a->nz - 10*nonzerorow);
1661:   VecRestoreArray(xx,&x);
1662:   VecRestoreArray(yy,&y);
1663:   return(0);
1664: }

1668: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1669: {
1670:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1671:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1672:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1674:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1675:   PetscInt       n,i,jrow,j;

1678:   if (yy != zz) {VecCopy(yy,zz);}
1679:   VecGetArray(xx,&x);
1680:   VecGetArray(zz,&y);
1681:   idx  = a->j;
1682:   v    = a->a;
1683:   ii   = a->i;

1685:   for (i=0; i<m; i++) {
1686:     jrow = ii[i];
1687:     n    = ii[i+1] - jrow;
1688:     sum1  = 0.0;
1689:     sum2  = 0.0;
1690:     sum3  = 0.0;
1691:     sum4  = 0.0;
1692:     sum5  = 0.0;
1693:     sum6  = 0.0;
1694:     sum7  = 0.0;
1695:     sum8  = 0.0;
1696:     sum9  = 0.0;
1697:     sum10 = 0.0;
1698:     for (j=0; j<n; j++) {
1699:       sum1  += v[jrow]*x[10*idx[jrow]];
1700:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1701:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1702:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1703:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1704:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1705:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1706:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1707:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1708:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1709:       jrow++;
1710:      }
1711:     y[10*i]   += sum1;
1712:     y[10*i+1] += sum2;
1713:     y[10*i+2] += sum3;
1714:     y[10*i+3] += sum4;
1715:     y[10*i+4] += sum5;
1716:     y[10*i+5] += sum6;
1717:     y[10*i+6] += sum7;
1718:     y[10*i+7] += sum8;
1719:     y[10*i+8] += sum9;
1720:     y[10*i+9] += sum10;
1721:   }

1723:   PetscLogFlops(20*a->nz);
1724:   VecRestoreArray(xx,&x);
1725:   VecRestoreArray(yy,&y);
1726:   return(0);
1727: }

1731: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1732: {
1733:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1734:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1735:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1737:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1740:   VecSet(yy,zero);
1741:   VecGetArray(xx,&x);
1742:   VecGetArray(yy,&y);

1744:   for (i=0; i<m; i++) {
1745:     idx    = a->j + a->i[i] ;
1746:     v      = a->a + a->i[i] ;
1747:     n      = a->i[i+1] - a->i[i];
1748:     alpha1 = x[10*i];
1749:     alpha2 = x[10*i+1];
1750:     alpha3 = x[10*i+2];
1751:     alpha4 = x[10*i+3];
1752:     alpha5 = x[10*i+4];
1753:     alpha6 = x[10*i+5];
1754:     alpha7 = x[10*i+6];
1755:     alpha8 = x[10*i+7];
1756:     alpha9 = x[10*i+8];
1757:     alpha10 = x[10*i+9];
1758:     while (n-->0) {
1759:       y[10*(*idx)]   += alpha1*(*v);
1760:       y[10*(*idx)+1] += alpha2*(*v);
1761:       y[10*(*idx)+2] += alpha3*(*v);
1762:       y[10*(*idx)+3] += alpha4*(*v);
1763:       y[10*(*idx)+4] += alpha5*(*v);
1764:       y[10*(*idx)+5] += alpha6*(*v);
1765:       y[10*(*idx)+6] += alpha7*(*v);
1766:       y[10*(*idx)+7] += alpha8*(*v);
1767:       y[10*(*idx)+8] += alpha9*(*v);
1768:       y[10*(*idx)+9] += alpha10*(*v);
1769:       idx++; v++;
1770:     }
1771:   }
1772:   PetscLogFlops(20*a->nz);
1773:   VecRestoreArray(xx,&x);
1774:   VecRestoreArray(yy,&y);
1775:   return(0);
1776: }

1780: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1781: {
1782:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1783:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1784:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1786:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1789:   if (yy != zz) {VecCopy(yy,zz);}
1790:   VecGetArray(xx,&x);
1791:   VecGetArray(zz,&y);
1792:   for (i=0; i<m; i++) {
1793:     idx    = a->j + a->i[i] ;
1794:     v      = a->a + a->i[i] ;
1795:     n      = a->i[i+1] - a->i[i];
1796:     alpha1 = x[10*i];
1797:     alpha2 = x[10*i+1];
1798:     alpha3 = x[10*i+2];
1799:     alpha4 = x[10*i+3];
1800:     alpha5 = x[10*i+4];
1801:     alpha6 = x[10*i+5];
1802:     alpha7 = x[10*i+6];
1803:     alpha8 = x[10*i+7];
1804:     alpha9 = x[10*i+8];
1805:     alpha10 = x[10*i+9];
1806:     while (n-->0) {
1807:       y[10*(*idx)]   += alpha1*(*v);
1808:       y[10*(*idx)+1] += alpha2*(*v);
1809:       y[10*(*idx)+2] += alpha3*(*v);
1810:       y[10*(*idx)+3] += alpha4*(*v);
1811:       y[10*(*idx)+4] += alpha5*(*v);
1812:       y[10*(*idx)+5] += alpha6*(*v);
1813:       y[10*(*idx)+6] += alpha7*(*v);
1814:       y[10*(*idx)+7] += alpha8*(*v);
1815:       y[10*(*idx)+8] += alpha9*(*v);
1816:       y[10*(*idx)+9] += alpha10*(*v);
1817:       idx++; v++;
1818:     }
1819:   }
1820:   PetscLogFlops(20*a->nz);
1821:   VecRestoreArray(xx,&x);
1822:   VecRestoreArray(zz,&y);
1823:   return(0);
1824: }


1827: /*--------------------------------------------------------------------------------------------*/
1830: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1831: {
1832:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1833:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1834:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1835:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1837:   PetscInt       m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1838:   PetscInt       n,i,jrow,j;

1841:   VecGetArray(xx,&x);
1842:   VecGetArray(yy,&y);
1843:   idx  = a->j;
1844:   v    = a->a;
1845:   ii   = a->i;

1847:   for (i=0; i<m; i++) {
1848:     jrow = ii[i];
1849:     n    = ii[i+1] - jrow;
1850:     sum1  = 0.0;
1851:     sum2  = 0.0;
1852:     sum3  = 0.0;
1853:     sum4  = 0.0;
1854:     sum5  = 0.0;
1855:     sum6  = 0.0;
1856:     sum7  = 0.0;
1857:     sum8  = 0.0;
1858:     sum9  = 0.0;
1859:     sum10 = 0.0;
1860:     sum11 = 0.0;
1861:     sum12 = 0.0;
1862:     sum13 = 0.0;
1863:     sum14 = 0.0;
1864:     sum15 = 0.0;
1865:     sum16 = 0.0;
1866:     nonzerorow += (n>0);
1867:     for (j=0; j<n; j++) {
1868:       sum1  += v[jrow]*x[16*idx[jrow]];
1869:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1870:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1871:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1872:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1873:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1874:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1875:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1876:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1877:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1878:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1879:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1880:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1881:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1882:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1883:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1884:       jrow++;
1885:      }
1886:     y[16*i]    = sum1;
1887:     y[16*i+1]  = sum2;
1888:     y[16*i+2]  = sum3;
1889:     y[16*i+3]  = sum4;
1890:     y[16*i+4]  = sum5;
1891:     y[16*i+5]  = sum6;
1892:     y[16*i+6]  = sum7;
1893:     y[16*i+7]  = sum8;
1894:     y[16*i+8]  = sum9;
1895:     y[16*i+9]  = sum10;
1896:     y[16*i+10] = sum11;
1897:     y[16*i+11] = sum12;
1898:     y[16*i+12] = sum13;
1899:     y[16*i+13] = sum14;
1900:     y[16*i+14] = sum15;
1901:     y[16*i+15] = sum16;
1902:   }

1904:   PetscLogFlops(32*a->nz - 16*nonzerorow);
1905:   VecRestoreArray(xx,&x);
1906:   VecRestoreArray(yy,&y);
1907:   return(0);
1908: }

1912: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1913: {
1914:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1915:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1916:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1917:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1919:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

1922:   VecSet(yy,zero);
1923:   VecGetArray(xx,&x);
1924:   VecGetArray(yy,&y);

1926:   for (i=0; i<m; i++) {
1927:     idx    = a->j + a->i[i] ;
1928:     v      = a->a + a->i[i] ;
1929:     n      = a->i[i+1] - a->i[i];
1930:     alpha1  = x[16*i];
1931:     alpha2  = x[16*i+1];
1932:     alpha3  = x[16*i+2];
1933:     alpha4  = x[16*i+3];
1934:     alpha5  = x[16*i+4];
1935:     alpha6  = x[16*i+5];
1936:     alpha7  = x[16*i+6];
1937:     alpha8  = x[16*i+7];
1938:     alpha9  = x[16*i+8];
1939:     alpha10 = x[16*i+9];
1940:     alpha11 = x[16*i+10];
1941:     alpha12 = x[16*i+11];
1942:     alpha13 = x[16*i+12];
1943:     alpha14 = x[16*i+13];
1944:     alpha15 = x[16*i+14];
1945:     alpha16 = x[16*i+15];
1946:     while (n-->0) {
1947:       y[16*(*idx)]    += alpha1*(*v);
1948:       y[16*(*idx)+1]  += alpha2*(*v);
1949:       y[16*(*idx)+2]  += alpha3*(*v);
1950:       y[16*(*idx)+3]  += alpha4*(*v);
1951:       y[16*(*idx)+4]  += alpha5*(*v);
1952:       y[16*(*idx)+5]  += alpha6*(*v);
1953:       y[16*(*idx)+6]  += alpha7*(*v);
1954:       y[16*(*idx)+7]  += alpha8*(*v);
1955:       y[16*(*idx)+8]  += alpha9*(*v);
1956:       y[16*(*idx)+9]  += alpha10*(*v);
1957:       y[16*(*idx)+10] += alpha11*(*v);
1958:       y[16*(*idx)+11] += alpha12*(*v);
1959:       y[16*(*idx)+12] += alpha13*(*v);
1960:       y[16*(*idx)+13] += alpha14*(*v);
1961:       y[16*(*idx)+14] += alpha15*(*v);
1962:       y[16*(*idx)+15] += alpha16*(*v);
1963:       idx++; v++;
1964:     }
1965:   }
1966:   PetscLogFlops(32*a->nz);
1967:   VecRestoreArray(xx,&x);
1968:   VecRestoreArray(yy,&y);
1969:   return(0);
1970: }

1974: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1975: {
1976:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1977:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1978:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1979:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1981:   PetscInt       m = b->AIJ->rmap.n,*idx,*ii;
1982:   PetscInt       n,i,jrow,j;

1985:   if (yy != zz) {VecCopy(yy,zz);}
1986:   VecGetArray(xx,&x);
1987:   VecGetArray(zz,&y);
1988:   idx  = a->j;
1989:   v    = a->a;
1990:   ii   = a->i;

1992:   for (i=0; i<m; i++) {
1993:     jrow = ii[i];
1994:     n    = ii[i+1] - jrow;
1995:     sum1  = 0.0;
1996:     sum2  = 0.0;
1997:     sum3  = 0.0;
1998:     sum4  = 0.0;
1999:     sum5  = 0.0;
2000:     sum6  = 0.0;
2001:     sum7  = 0.0;
2002:     sum8  = 0.0;
2003:     sum9  = 0.0;
2004:     sum10 = 0.0;
2005:     sum11 = 0.0;
2006:     sum12 = 0.0;
2007:     sum13 = 0.0;
2008:     sum14 = 0.0;
2009:     sum15 = 0.0;
2010:     sum16 = 0.0;
2011:     for (j=0; j<n; j++) {
2012:       sum1  += v[jrow]*x[16*idx[jrow]];
2013:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2014:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2015:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2016:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2017:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2018:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2019:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2020:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2021:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2022:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2023:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2024:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2025:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2026:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2027:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2028:       jrow++;
2029:      }
2030:     y[16*i]    += sum1;
2031:     y[16*i+1]  += sum2;
2032:     y[16*i+2]  += sum3;
2033:     y[16*i+3]  += sum4;
2034:     y[16*i+4]  += sum5;
2035:     y[16*i+5]  += sum6;
2036:     y[16*i+6]  += sum7;
2037:     y[16*i+7]  += sum8;
2038:     y[16*i+8]  += sum9;
2039:     y[16*i+9]  += sum10;
2040:     y[16*i+10] += sum11;
2041:     y[16*i+11] += sum12;
2042:     y[16*i+12] += sum13;
2043:     y[16*i+13] += sum14;
2044:     y[16*i+14] += sum15;
2045:     y[16*i+15] += sum16;
2046:   }

2048:   PetscLogFlops(32*a->nz);
2049:   VecRestoreArray(xx,&x);
2050:   VecRestoreArray(zz,&y);
2051:   return(0);
2052: }

2056: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2057: {
2058:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
2059:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
2060:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2061:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2063:   PetscInt       m = b->AIJ->rmap.n,n,i,*idx;

2066:   if (yy != zz) {VecCopy(yy,zz);}
2067:   VecGetArray(xx,&x);
2068:   VecGetArray(zz,&y);
2069:   for (i=0; i<m; i++) {
2070:     idx    = a->j + a->i[i] ;
2071:     v      = a->a + a->i[i] ;
2072:     n      = a->i[i+1] - a->i[i];
2073:     alpha1 = x[16*i];
2074:     alpha2 = x[16*i+1];
2075:     alpha3 = x[16*i+2];
2076:     alpha4 = x[16*i+3];
2077:     alpha5 = x[16*i+4];
2078:     alpha6 = x[16*i+5];
2079:     alpha7 = x[16*i+6];
2080:     alpha8 = x[16*i+7];
2081:     alpha9  = x[16*i+8];
2082:     alpha10 = x[16*i+9];
2083:     alpha11 = x[16*i+10];
2084:     alpha12 = x[16*i+11];
2085:     alpha13 = x[16*i+12];
2086:     alpha14 = x[16*i+13];
2087:     alpha15 = x[16*i+14];
2088:     alpha16 = x[16*i+15];
2089:     while (n-->0) {
2090:       y[16*(*idx)]   += alpha1*(*v);
2091:       y[16*(*idx)+1] += alpha2*(*v);
2092:       y[16*(*idx)+2] += alpha3*(*v);
2093:       y[16*(*idx)+3] += alpha4*(*v);
2094:       y[16*(*idx)+4] += alpha5*(*v);
2095:       y[16*(*idx)+5] += alpha6*(*v);
2096:       y[16*(*idx)+6] += alpha7*(*v);
2097:       y[16*(*idx)+7] += alpha8*(*v);
2098:       y[16*(*idx)+8]  += alpha9*(*v);
2099:       y[16*(*idx)+9]  += alpha10*(*v);
2100:       y[16*(*idx)+10] += alpha11*(*v);
2101:       y[16*(*idx)+11] += alpha12*(*v);
2102:       y[16*(*idx)+12] += alpha13*(*v);
2103:       y[16*(*idx)+13] += alpha14*(*v);
2104:       y[16*(*idx)+14] += alpha15*(*v);
2105:       y[16*(*idx)+15] += alpha16*(*v);
2106:       idx++; v++;
2107:     }
2108:   }
2109:   PetscLogFlops(32*a->nz);
2110:   VecRestoreArray(xx,&x);
2111:   VecRestoreArray(zz,&y);
2112:   return(0);
2113: }

2115: /*===================================================================================*/
2118: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2119: {
2120:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2124:   /* start the scatter */
2125:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2126:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2127:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2128:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2129:   return(0);
2130: }

2134: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2135: {
2136:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2140:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2141:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2142:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2143:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2144:   return(0);
2145: }

2149: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2150: {
2151:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2155:   /* start the scatter */
2156:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2157:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2158:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2159:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2160:   return(0);
2161: }

2165: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2166: {
2167:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2171:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2172:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2173:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2174:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2175:   return(0);
2176: }

2178: /* ----------------------------------------------------------------*/
2181: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2182: {
2183:   /* This routine requires testing -- but it's getting better. */
2184:   PetscErrorCode     ierr;
2185:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2186:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2187:   Mat                P=pp->AIJ;
2188:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2189:   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2190:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2191:   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2192:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2193:   MatScalar          *ca;

2196:   /* Start timer */

2199:   /* Get ij structure of P^T */
2200:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2202:   cn = pn*ppdof;
2203:   /* Allocate ci array, arrays for fill computation and */
2204:   /* free space for accumulating nonzero column info */
2205:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2206:   ci[0] = 0;

2208:   /* Work arrays for rows of P^T*A */
2209:   PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2210:   PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2211:   ptasparserow = ptadenserow  + an;
2212:   denserow     = ptasparserow + an;
2213:   sparserow    = denserow     + cn;

2215:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2216:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2217:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2218:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2219:   current_space = free_space;

2221:   /* Determine symbolic info for each row of C: */
2222:   for (i=0;i<pn;i++) {
2223:     ptnzi  = pti[i+1] - pti[i];
2224:     ptJ    = ptj + pti[i];
2225:     for (dof=0;dof<ppdof;dof++) {
2226:       ptanzi = 0;
2227:       /* Determine symbolic row of PtA: */
2228:       for (j=0;j<ptnzi;j++) {
2229:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2230:         arow = ptJ[j]*ppdof + dof;
2231:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2232:         anzj = ai[arow+1] - ai[arow];
2233:         ajj  = aj + ai[arow];
2234:         for (k=0;k<anzj;k++) {
2235:           if (!ptadenserow[ajj[k]]) {
2236:             ptadenserow[ajj[k]]    = -1;
2237:             ptasparserow[ptanzi++] = ajj[k];
2238:           }
2239:         }
2240:       }
2241:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2242:       ptaj = ptasparserow;
2243:       cnzi   = 0;
2244:       for (j=0;j<ptanzi;j++) {
2245:         /* Get offset within block of P */
2246:         pshift = *ptaj%ppdof;
2247:         /* Get block row of P */
2248:         prow = (*ptaj++)/ppdof; /* integer division */
2249:         /* P has same number of nonzeros per row as the compressed form */
2250:         pnzj = pi[prow+1] - pi[prow];
2251:         pjj  = pj + pi[prow];
2252:         for (k=0;k<pnzj;k++) {
2253:           /* Locations in C are shifted by the offset within the block */
2254:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2255:           if (!denserow[pjj[k]*ppdof+pshift]) {
2256:             denserow[pjj[k]*ppdof+pshift] = -1;
2257:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2258:           }
2259:         }
2260:       }

2262:       /* sort sparserow */
2263:       PetscSortInt(cnzi,sparserow);
2264: 
2265:       /* If free space is not available, make more free space */
2266:       /* Double the amount of total space in the list */
2267:       if (current_space->local_remaining<cnzi) {
2268:         PetscFreeSpaceGet(current_space->total_array_size,&current_space);
2269:       }

2271:       /* Copy data into free space, and zero out denserows */
2272:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2273:       current_space->array           += cnzi;
2274:       current_space->local_used      += cnzi;
2275:       current_space->local_remaining -= cnzi;

2277:       for (j=0;j<ptanzi;j++) {
2278:         ptadenserow[ptasparserow[j]] = 0;
2279:       }
2280:       for (j=0;j<cnzi;j++) {
2281:         denserow[sparserow[j]] = 0;
2282:       }
2283:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2284:       /*        For now, we will recompute what is needed. */
2285:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2286:     }
2287:   }
2288:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2289:   /* Allocate space for cj, initialize cj, and */
2290:   /* destroy list of free space and other temporary array(s) */
2291:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2292:   PetscFreeSpaceContiguous(&free_space,cj);
2293:   PetscFree(ptadenserow);
2294: 
2295:   /* Allocate space for ca */
2296:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2297:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2298: 
2299:   /* put together the new matrix */
2300:   MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);

2302:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2303:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2304:   c          = (Mat_SeqAIJ *)((*C)->data);
2305:   c->free_a  = PETSC_TRUE;
2306:   c->free_ij = PETSC_TRUE;
2307:   c->nonew   = 0;

2309:   /* Clean up. */
2310:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2313:   return(0);
2314: }

2318: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2319: {
2320:   /* This routine requires testing -- first draft only */
2322:   PetscInt       flops=0;
2323:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2324:   Mat            P=pp->AIJ;
2325:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2326:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2327:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2328:   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2329:   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2330:   PetscInt       am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2331:   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2332:   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;

2335:   /* Allocate temporary array for storage of one row of A*P */
2336:   PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2337:   PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));

2339:   apj      = (PetscInt *)(apa + cn);
2340:   apjdense = apj + cn;

2342:   /* Clear old values in C */
2343:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

2345:   for (i=0;i<am;i++) {
2346:     /* Form sparse row of A*P */
2347:     anzi  = ai[i+1] - ai[i];
2348:     apnzj = 0;
2349:     for (j=0;j<anzi;j++) {
2350:       /* Get offset within block of P */
2351:       pshift = *aj%ppdof;
2352:       /* Get block row of P */
2353:       prow   = *aj++/ppdof; /* integer division */
2354:       pnzj = pi[prow+1] - pi[prow];
2355:       pjj  = pj + pi[prow];
2356:       paj  = pa + pi[prow];
2357:       for (k=0;k<pnzj;k++) {
2358:         poffset = pjj[k]*ppdof+pshift;
2359:         if (!apjdense[poffset]) {
2360:           apjdense[poffset] = -1;
2361:           apj[apnzj++]      = poffset;
2362:         }
2363:         apa[poffset] += (*aa)*paj[k];
2364:       }
2365:       flops += 2*pnzj;
2366:       aa++;
2367:     }

2369:     /* Sort the j index array for quick sparse axpy. */
2370:     /* Note: a array does not need sorting as it is in dense storage locations. */
2371:     PetscSortInt(apnzj,apj);

2373:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2374:     prow    = i/ppdof; /* integer division */
2375:     pshift  = i%ppdof;
2376:     poffset = pi[prow];
2377:     pnzi = pi[prow+1] - poffset;
2378:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2379:     pJ   = pj+poffset;
2380:     pA   = pa+poffset;
2381:     for (j=0;j<pnzi;j++) {
2382:       crow   = (*pJ)*ppdof+pshift;
2383:       cjj    = cj + ci[crow];
2384:       caj    = ca + ci[crow];
2385:       pJ++;
2386:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2387:       for (k=0,nextap=0;nextap<apnzj;k++) {
2388:         if (cjj[k]==apj[nextap]) {
2389:           caj[k] += (*pA)*apa[apj[nextap++]];
2390:         }
2391:       }
2392:       flops += 2*apnzj;
2393:       pA++;
2394:     }

2396:     /* Zero the current row info for A*P */
2397:     for (j=0;j<apnzj;j++) {
2398:       apa[apj[j]]      = 0.;
2399:       apjdense[apj[j]] = 0;
2400:     }
2401:   }

2403:   /* Assemble the final matrix and clean up */
2404:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2405:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2406:   PetscFree(apa);
2407:   PetscLogFlops(flops);
2408:   return(0);
2409: }

2413: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2414: {
2415:   PetscErrorCode    ierr;

2418:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2419:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2420:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2421:   return(0);
2422: }

2426: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2427: {
2429:   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2430:   return(0);
2431: }

2436: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2437: {
2438:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2439:   Mat               a = b->AIJ,B;
2440:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2441:   PetscErrorCode    ierr;
2442:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2443:   PetscInt          *cols;
2444:   PetscScalar       *vals;

2447:   MatGetSize(a,&m,&n);
2448:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2449:   for (i=0; i<m; i++) {
2450:     nmax = PetscMax(nmax,aij->ilen[i]);
2451:     for (j=0; j<dof; j++) {
2452:       ilen[dof*i+j] = aij->ilen[i];
2453:     }
2454:   }
2455:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2456:   PetscFree(ilen);
2457:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
2458:   ii   = 0;
2459:   for (i=0; i<m; i++) {
2460:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2461:     for (j=0; j<dof; j++) {
2462:       for (k=0; k<ncols; k++) {
2463:         icols[k] = dof*cols[k]+j;
2464:       }
2465:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2466:       ii++;
2467:     }
2468:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2469:   }
2470:   PetscFree(icols);
2471:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2472:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2474:   if (reuse == MAT_REUSE_MATRIX) {
2475:     MatHeaderReplace(A,B);
2476:   } else {
2477:     *newmat = B;
2478:   }
2479:   return(0);
2480: }

2483:  #include src/mat/impls/aij/mpi/mpiaij.h

2488: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2489: {
2490:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2491:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2492:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2493:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2494:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2495:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2496:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2497:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2498:   PetscInt          rstart,cstart,*garray,ii,k;
2499:   PetscErrorCode    ierr;
2500:   PetscScalar       *vals,*ovals;

2503:   PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2504:   for (i=0; i<A->rmap.n/dof; i++) {
2505:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2506:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2507:     for (j=0; j<dof; j++) {
2508:       dnz[dof*i+j] = AIJ->ilen[i];
2509:       onz[dof*i+j] = OAIJ->ilen[i];
2510:     }
2511:   }
2512:   MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2513:   PetscFree2(dnz,onz);

2515:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2516:   rstart = dof*maij->A->rmap.rstart;
2517:   cstart = dof*maij->A->cmap.rstart;
2518:   garray = mpiaij->garray;

2520:   ii = rstart;
2521:   for (i=0; i<A->rmap.n/dof; i++) {
2522:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2523:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2524:     for (j=0; j<dof; j++) {
2525:       for (k=0; k<ncols; k++) {
2526:         icols[k] = cstart + dof*cols[k]+j;
2527:       }
2528:       for (k=0; k<oncols; k++) {
2529:         oicols[k] = dof*garray[ocols[k]]+j;
2530:       }
2531:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2532:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2533:       ii++;
2534:     }
2535:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2536:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2537:   }
2538:   PetscFree2(icols,oicols);

2540:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2541:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2543:   if (reuse == MAT_REUSE_MATRIX) {
2544:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
2545:     ((PetscObject)A)->refct = 1;
2546:     MatHeaderReplace(A,B);
2547:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2548:   } else {
2549:     *newmat = B;
2550:   }
2551:   return(0);
2552: }


2556: /* ---------------------------------------------------------------------------------- */
2557: /*MC
2558:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
2559:   operations for multicomponent problems.  It interpolates each component the same
2560:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2561:   and MATMPIAIJ for distributed matrices.

2563:   Operations provided:
2564: + MatMult
2565: . MatMultTranspose
2566: . MatMultAdd
2567: . MatMultTransposeAdd
2568: - MatView

2570:   Level: advanced

2572: M*/
2575: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2576: {
2578:   PetscMPIInt    size;
2579:   PetscInt       n;
2580:   Mat_MPIMAIJ    *b;
2581:   Mat            B;

2584:   PetscObjectReference((PetscObject)A);

2586:   if (dof == 1) {
2587:     *maij = A;
2588:   } else {
2589:     MatCreate(((PetscObject)A)->comm,&B);
2590:     MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2591:     B->assembled    = PETSC_TRUE;

2593:     MPI_Comm_size(((PetscObject)A)->comm,&size);
2594:     if (size == 1) {
2595:       MatSetType(B,MATSEQMAIJ);
2596:       B->ops->destroy = MatDestroy_SeqMAIJ;
2597:       B->ops->view    = MatView_SeqMAIJ;
2598:       b      = (Mat_MPIMAIJ*)B->data;
2599:       b->dof = dof;
2600:       b->AIJ = A;
2601:       if (dof == 2) {
2602:         B->ops->mult             = MatMult_SeqMAIJ_2;
2603:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2604:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2605:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2606:       } else if (dof == 3) {
2607:         B->ops->mult             = MatMult_SeqMAIJ_3;
2608:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2609:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2610:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2611:       } else if (dof == 4) {
2612:         B->ops->mult             = MatMult_SeqMAIJ_4;
2613:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2614:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2615:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2616:       } else if (dof == 5) {
2617:         B->ops->mult             = MatMult_SeqMAIJ_5;
2618:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2619:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2620:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2621:       } else if (dof == 6) {
2622:         B->ops->mult             = MatMult_SeqMAIJ_6;
2623:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2624:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2625:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2626:       } else if (dof == 7) {
2627:         B->ops->mult             = MatMult_SeqMAIJ_7;
2628:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2629:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2630:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2631:       } else if (dof == 8) {
2632:         B->ops->mult             = MatMult_SeqMAIJ_8;
2633:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2634:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2635:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2636:       } else if (dof == 9) {
2637:         B->ops->mult             = MatMult_SeqMAIJ_9;
2638:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2639:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2640:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2641:       } else if (dof == 10) {
2642:         B->ops->mult             = MatMult_SeqMAIJ_10;
2643:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
2644:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
2645:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2646:       } else if (dof == 16) {
2647:         B->ops->mult             = MatMult_SeqMAIJ_16;
2648:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2649:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2650:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2651:       } else {
2652:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2653:       }
2654:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2655:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2656:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2657:     } else {
2658:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2659:       IS         from,to;
2660:       Vec        gvec;
2661:       PetscInt   *garray,i;

2663:       MatSetType(B,MATMPIMAIJ);
2664:       B->ops->destroy = MatDestroy_MPIMAIJ;
2665:       B->ops->view    = MatView_MPIMAIJ;
2666:       b      = (Mat_MPIMAIJ*)B->data;
2667:       b->dof = dof;
2668:       b->A   = A;
2669:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2670:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

2672:       VecGetSize(mpiaij->lvec,&n);
2673:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2674:       VecSetBlockSize(b->w,dof);

2676:       /* create two temporary Index sets for build scatter gather */
2677:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2678:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2679:       ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
2680:       PetscFree(garray);
2681:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

2683:       /* create temporary global vector to generate scatter context */
2684:       VecCreateMPI(((PetscObject)A)->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);
2685:       VecSetBlockSize(gvec,dof);

2687:       /* generate the scatter context */
2688:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

2690:       ISDestroy(from);
2691:       ISDestroy(to);
2692:       VecDestroy(gvec);

2694:       B->ops->mult             = MatMult_MPIMAIJ_dof;
2695:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2696:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2697:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2698:       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
2699:       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
2700:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2701:     }
2702:     *maij = B;
2703:     MatView_Private(B);
2704:   }
2705:   return(0);
2706: }