Actual source code: maij.c

  1: /*$Id: maij.c,v 1.16 2001/04/10 19:35:37 bsmith Exp $*/
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for 
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19: #include "src/mat/impls/aij/mpi/mpiaij.h"

 21: typedef struct {
 22:   int        dof;         /* number of components */
 23:   Mat        AIJ;        /* representation of interpolation for one component */
 24: } Mat_SeqMAIJ;

 26: typedef struct {
 27:   int        dof;         /* number of components */
 28:   Mat        AIJ,OAIJ;    /* representation of interpolation for one component */
 29:   Mat        A;
 30:   VecScatter ctx;         /* update ghost points for parallel case */
 31:   Vec        w;           /* work space for ghost values for parallel case */
 32: } Mat_MPIMAIJ;

 34: int MatMAIJGetAIJ(Mat A,Mat *B)
 35: {
 36:   int         ierr;
 37:   PetscTruth  ismpimaij,isseqmaij;

 40:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 41:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 42:   if (ismpimaij) {
 43:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 45:     *B = b->A;
 46:   } else if (isseqmaij) {
 47:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 49:     *B = b->AIJ;
 50:   } else {
 51:     *B = A;
 52:   }
 53:   return(0);
 54: }

 56: int MatMAIJRedimension(Mat A,int dof,Mat *B)
 57: {
 59:   Mat Aij;

 62:   MatMAIJGetAIJ(A,&Aij);
 63:   MatCreateMAIJ(Aij,dof,B);
 64:   return(0);
 65: }

 67: int MatDestroy_SeqMAIJ(Mat A)
 68: {
 69:   int         ierr;
 70:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 73:   if (b->AIJ) {
 74:     MatDestroy(b->AIJ);
 75:   }
 76:   PetscFree(b);
 77:   return(0);
 78: }

 80: int MatDestroy_MPIMAIJ(Mat A)
 81: {
 82:   int         ierr;
 83:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 86:   if (b->AIJ) {
 87:     MatDestroy(b->AIJ);
 88:   }
 89:   if (b->OAIJ) {
 90:     MatDestroy(b->OAIJ);
 91:   }
 92:   if (b->A) {
 93:     MatDestroy(b->A);
 94:   }
 95:   if (b->ctx) {
 96:     VecScatterDestroy(b->ctx);
 97:   }
 98:   if (b->w) {
 99:     VecDestroy(b->w);
100:   }
101:   PetscFree(b);
102:   return(0);
103: }

105: EXTERN_C_BEGIN
106: int MatCreate_MAIJ(Mat A)
107: {
108:   int         ierr;
109:   Mat_MPIMAIJ *b;

112:   ierr     = PetscNew(Mat_MPIMAIJ,&b);
113:   A->data  = (void*)b;
114:   PetscMemzero(b,sizeof(Mat_MPIMAIJ));
115:   PetscMemzero(A->ops,sizeof(struct _MatOps));
116:   A->factor           = 0;
117:   A->mapping          = 0;

119:   b->AIJ  = 0;
120:   b->dof  = 0;
121:   b->OAIJ = 0;
122:   b->ctx  = 0;
123:   b->w    = 0;
124:   return(0);
125: }
126: EXTERN_C_END

128: /* --------------------------------------------------------------------------------------*/
129: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
130: {
131:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
132:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
133:   Scalar      *x,*y,*v,sum1, sum2;
134:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
135:   int         n,i,jrow,j;

138:   VecGetArray(xx,&x);
139:   VecGetArray(yy,&y);
140:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
141:   idx  = a->j;
142:   v    = a->a;
143:   ii   = a->i;

145:   v    += shift; /* shift for Fortran start by 1 indexing */
146:   idx  += shift;
147:   for (i=0; i<m; i++) {
148:     jrow = ii[i];
149:     n    = ii[i+1] - jrow;
150:     sum1  = 0.0;
151:     sum2  = 0.0;
152:     for (j=0; j<n; j++) {
153:       sum1 += v[jrow]*x[2*idx[jrow]];
154:       sum2 += v[jrow]*x[2*idx[jrow]+1];
155:       jrow++;
156:      }
157:     y[2*i]   = sum1;
158:     y[2*i+1] = sum2;
159:   }

161:   PetscLogFlops(4*a->nz - 2*m);
162:   VecRestoreArray(xx,&x);
163:   VecRestoreArray(yy,&y);
164:   return(0);
165: }

167: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
168: {
169:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
170:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
171:   Scalar      *x,*y,*v,alpha1,alpha2,zero = 0.0;
172:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

175:   VecSet(&zero,yy);
176:   VecGetArray(xx,&x);
177:   VecGetArray(yy,&y);
178:   y = y + shift; /* shift for Fortran start by 1 indexing */
179:   for (i=0; i<m; i++) {
180:     idx    = a->j + a->i[i] + shift;
181:     v      = a->a + a->i[i] + shift;
182:     n      = a->i[i+1] - a->i[i];
183:     alpha1 = x[2*i];
184:     alpha2 = x[2*i+1];
185:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
186:   }
187:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
188:   VecRestoreArray(xx,&x);
189:   VecRestoreArray(yy,&y);
190:   return(0);
191: }

193: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
194: {
195:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
196:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
197:   Scalar      *x,*y,*v,sum1, sum2;
198:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
199:   int         n,i,jrow,j;

202:   if (yy != zz) {VecCopy(yy,zz);}
203:   VecGetArray(xx,&x);
204:   VecGetArray(zz,&y);
205:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
206:   idx  = a->j;
207:   v    = a->a;
208:   ii   = a->i;

210:   v    += shift; /* shift for Fortran start by 1 indexing */
211:   idx  += shift;
212:   for (i=0; i<m; i++) {
213:     jrow = ii[i];
214:     n    = ii[i+1] - jrow;
215:     sum1  = 0.0;
216:     sum2  = 0.0;
217:     for (j=0; j<n; j++) {
218:       sum1 += v[jrow]*x[2*idx[jrow]];
219:       sum2 += v[jrow]*x[2*idx[jrow]+1];
220:       jrow++;
221:      }
222:     y[2*i]   += sum1;
223:     y[2*i+1] += sum2;
224:   }

226:   PetscLogFlops(4*a->nz - 2*m);
227:   VecRestoreArray(xx,&x);
228:   VecRestoreArray(zz,&y);
229:   return(0);
230: }
231: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
232: {
233:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
234:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
235:   Scalar      *x,*y,*v,alpha1,alpha2;
236:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

239:   if (yy != zz) {VecCopy(yy,zz);}
240:   VecGetArray(xx,&x);
241:   VecGetArray(zz,&y);
242:   y = y + shift; /* shift for Fortran start by 1 indexing */
243:   for (i=0; i<m; i++) {
244:     idx   = a->j + a->i[i] + shift;
245:     v     = a->a + a->i[i] + shift;
246:     n     = a->i[i+1] - a->i[i];
247:     alpha1 = x[2*i];
248:     alpha2 = x[2*i+1];
249:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
250:   }
251:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
252:   VecRestoreArray(xx,&x);
253:   VecRestoreArray(zz,&y);
254:   return(0);
255: }
256: /* --------------------------------------------------------------------------------------*/
257: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
258: {
259:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
260:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
261:   Scalar      *x,*y,*v,sum1, sum2, sum3;
262:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
263:   int         n,i,jrow,j;

266:   VecGetArray(xx,&x);
267:   VecGetArray(yy,&y);
268:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
269:   idx  = a->j;
270:   v    = a->a;
271:   ii   = a->i;

273:   v    += shift; /* shift for Fortran start by 1 indexing */
274:   idx  += shift;
275:   for (i=0; i<m; i++) {
276:     jrow = ii[i];
277:     n    = ii[i+1] - jrow;
278:     sum1  = 0.0;
279:     sum2  = 0.0;
280:     sum3  = 0.0;
281:     for (j=0; j<n; j++) {
282:       sum1 += v[jrow]*x[3*idx[jrow]];
283:       sum2 += v[jrow]*x[3*idx[jrow]+1];
284:       sum3 += v[jrow]*x[3*idx[jrow]+2];
285:       jrow++;
286:      }
287:     y[3*i]   = sum1;
288:     y[3*i+1] = sum2;
289:     y[3*i+2] = sum3;
290:   }

292:   PetscLogFlops(6*a->nz - 3*m);
293:   VecRestoreArray(xx,&x);
294:   VecRestoreArray(yy,&y);
295:   return(0);
296: }

298: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
299: {
300:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
301:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
302:   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
303:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

306:   VecSet(&zero,yy);
307:   VecGetArray(xx,&x);
308:   VecGetArray(yy,&y);
309:   y = y + shift; /* shift for Fortran start by 1 indexing */
310:   for (i=0; i<m; i++) {
311:     idx    = a->j + a->i[i] + shift;
312:     v      = a->a + a->i[i] + shift;
313:     n      = a->i[i+1] - a->i[i];
314:     alpha1 = x[3*i];
315:     alpha2 = x[3*i+1];
316:     alpha3 = x[3*i+2];
317:     while (n-->0) {
318:       y[3*(*idx)]   += alpha1*(*v);
319:       y[3*(*idx)+1] += alpha2*(*v);
320:       y[3*(*idx)+2] += alpha3*(*v);
321:       idx++; v++;
322:     }
323:   }
324:   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
325:   VecRestoreArray(xx,&x);
326:   VecRestoreArray(yy,&y);
327:   return(0);
328: }

330: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
331: {
332:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
333:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
334:   Scalar      *x,*y,*v,sum1, sum2, sum3;
335:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
336:   int         n,i,jrow,j;

339:   if (yy != zz) {VecCopy(yy,zz);}
340:   VecGetArray(xx,&x);
341:   VecGetArray(zz,&y);
342:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
343:   idx  = a->j;
344:   v    = a->a;
345:   ii   = a->i;

347:   v    += shift; /* shift for Fortran start by 1 indexing */
348:   idx  += shift;
349:   for (i=0; i<m; i++) {
350:     jrow = ii[i];
351:     n    = ii[i+1] - jrow;
352:     sum1  = 0.0;
353:     sum2  = 0.0;
354:     sum3  = 0.0;
355:     for (j=0; j<n; j++) {
356:       sum1 += v[jrow]*x[3*idx[jrow]];
357:       sum2 += v[jrow]*x[3*idx[jrow]+1];
358:       sum3 += v[jrow]*x[3*idx[jrow]+2];
359:       jrow++;
360:      }
361:     y[3*i]   += sum1;
362:     y[3*i+1] += sum2;
363:     y[3*i+2] += sum3;
364:   }

366:   PetscLogFlops(6*a->nz);
367:   VecRestoreArray(xx,&x);
368:   VecRestoreArray(zz,&y);
369:   return(0);
370: }
371: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
372: {
373:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
374:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
375:   Scalar      *x,*y,*v,alpha1,alpha2,alpha3;
376:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

379:   if (yy != zz) {VecCopy(yy,zz);}
380:   VecGetArray(xx,&x);
381:   VecGetArray(zz,&y);
382:   y = y + shift; /* shift for Fortran start by 1 indexing */
383:   for (i=0; i<m; i++) {
384:     idx    = a->j + a->i[i] + shift;
385:     v      = a->a + a->i[i] + shift;
386:     n      = a->i[i+1] - a->i[i];
387:     alpha1 = x[3*i];
388:     alpha2 = x[3*i+1];
389:     alpha3 = x[3*i+2];
390:     while (n-->0) {
391:       y[3*(*idx)]   += alpha1*(*v);
392:       y[3*(*idx)+1] += alpha2*(*v);
393:       y[3*(*idx)+2] += alpha3*(*v);
394:       idx++; v++;
395:     }
396:   }
397:   PetscLogFlops(6*a->nz);
398:   VecRestoreArray(xx,&x);
399:   VecRestoreArray(zz,&y);
400:   return(0);
401: }

403: /* ------------------------------------------------------------------------------*/
404: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
405: {
406:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
407:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
408:   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
409:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
410:   int         n,i,jrow,j;

413:   VecGetArray(xx,&x);
414:   VecGetArray(yy,&y);
415:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
416:   idx  = a->j;
417:   v    = a->a;
418:   ii   = a->i;

420:   v    += shift; /* shift for Fortran start by 1 indexing */
421:   idx  += shift;
422:   for (i=0; i<m; i++) {
423:     jrow = ii[i];
424:     n    = ii[i+1] - jrow;
425:     sum1  = 0.0;
426:     sum2  = 0.0;
427:     sum3  = 0.0;
428:     sum4  = 0.0;
429:     for (j=0; j<n; j++) {
430:       sum1 += v[jrow]*x[4*idx[jrow]];
431:       sum2 += v[jrow]*x[4*idx[jrow]+1];
432:       sum3 += v[jrow]*x[4*idx[jrow]+2];
433:       sum4 += v[jrow]*x[4*idx[jrow]+3];
434:       jrow++;
435:      }
436:     y[4*i]   = sum1;
437:     y[4*i+1] = sum2;
438:     y[4*i+2] = sum3;
439:     y[4*i+3] = sum4;
440:   }

442:   PetscLogFlops(8*a->nz - 4*m);
443:   VecRestoreArray(xx,&x);
444:   VecRestoreArray(yy,&y);
445:   return(0);
446: }

448: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
449: {
450:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
451:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
452:   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
453:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

456:   VecSet(&zero,yy);
457:   VecGetArray(xx,&x);
458:   VecGetArray(yy,&y);
459:   y = y + shift; /* shift for Fortran start by 1 indexing */
460:   for (i=0; i<m; i++) {
461:     idx    = a->j + a->i[i] + shift;
462:     v      = a->a + a->i[i] + shift;
463:     n      = a->i[i+1] - a->i[i];
464:     alpha1 = x[4*i];
465:     alpha2 = x[4*i+1];
466:     alpha3 = x[4*i+2];
467:     alpha4 = x[4*i+3];
468:     while (n-->0) {
469:       y[4*(*idx)]   += alpha1*(*v);
470:       y[4*(*idx)+1] += alpha2*(*v);
471:       y[4*(*idx)+2] += alpha3*(*v);
472:       y[4*(*idx)+3] += alpha4*(*v);
473:       idx++; v++;
474:     }
475:   }
476:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
477:   VecRestoreArray(xx,&x);
478:   VecRestoreArray(yy,&y);
479:   return(0);
480: }

482: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
483: {
484:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
485:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
486:   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4;
487:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
488:   int         n,i,jrow,j;

491:   if (yy != zz) {VecCopy(yy,zz);}
492:   VecGetArray(xx,&x);
493:   VecGetArray(zz,&y);
494:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
495:   idx  = a->j;
496:   v    = a->a;
497:   ii   = a->i;

499:   v    += shift; /* shift for Fortran start by 1 indexing */
500:   idx  += shift;
501:   for (i=0; i<m; i++) {
502:     jrow = ii[i];
503:     n    = ii[i+1] - jrow;
504:     sum1  = 0.0;
505:     sum2  = 0.0;
506:     sum3  = 0.0;
507:     sum4  = 0.0;
508:     for (j=0; j<n; j++) {
509:       sum1 += v[jrow]*x[4*idx[jrow]];
510:       sum2 += v[jrow]*x[4*idx[jrow]+1];
511:       sum3 += v[jrow]*x[4*idx[jrow]+2];
512:       sum4 += v[jrow]*x[4*idx[jrow]+3];
513:       jrow++;
514:      }
515:     y[4*i]   += sum1;
516:     y[4*i+1] += sum2;
517:     y[4*i+2] += sum3;
518:     y[4*i+3] += sum4;
519:   }

521:   PetscLogFlops(8*a->nz - 4*m);
522:   VecRestoreArray(xx,&x);
523:   VecRestoreArray(zz,&y);
524:   return(0);
525: }
526: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
527: {
528:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
529:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
530:   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
531:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

534:   if (yy != zz) {VecCopy(yy,zz);}
535:   VecGetArray(xx,&x);
536:   VecGetArray(zz,&y);
537:   y = y + shift; /* shift for Fortran start by 1 indexing */
538:   for (i=0; i<m; i++) {
539:     idx    = a->j + a->i[i] + shift;
540:     v      = a->a + a->i[i] + shift;
541:     n      = a->i[i+1] - a->i[i];
542:     alpha1 = x[4*i];
543:     alpha2 = x[4*i+1];
544:     alpha3 = x[4*i+2];
545:     alpha4 = x[4*i+3];
546:     while (n-->0) {
547:       y[4*(*idx)]   += alpha1*(*v);
548:       y[4*(*idx)+1] += alpha2*(*v);
549:       y[4*(*idx)+2] += alpha3*(*v);
550:       y[4*(*idx)+3] += alpha4*(*v);
551:       idx++; v++;
552:     }
553:   }
554:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
555:   VecRestoreArray(xx,&x);
556:   VecRestoreArray(zz,&y);
557:   return(0);
558: }
559: /* ------------------------------------------------------------------------------*/

561: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
562: {
563:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
564:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
565:   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
566:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
567:   int         n,i,jrow,j;

570:   VecGetArray(xx,&x);
571:   VecGetArray(yy,&y);
572:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
573:   idx  = a->j;
574:   v    = a->a;
575:   ii   = a->i;

577:   v    += shift; /* shift for Fortran start by 1 indexing */
578:   idx  += shift;
579:   for (i=0; i<m; i++) {
580:     jrow = ii[i];
581:     n    = ii[i+1] - jrow;
582:     sum1  = 0.0;
583:     sum2  = 0.0;
584:     sum3  = 0.0;
585:     sum4  = 0.0;
586:     sum5  = 0.0;
587:     for (j=0; j<n; j++) {
588:       sum1 += v[jrow]*x[5*idx[jrow]];
589:       sum2 += v[jrow]*x[5*idx[jrow]+1];
590:       sum3 += v[jrow]*x[5*idx[jrow]+2];
591:       sum4 += v[jrow]*x[5*idx[jrow]+3];
592:       sum5 += v[jrow]*x[5*idx[jrow]+4];
593:       jrow++;
594:      }
595:     y[5*i]   = sum1;
596:     y[5*i+1] = sum2;
597:     y[5*i+2] = sum3;
598:     y[5*i+3] = sum4;
599:     y[5*i+4] = sum5;
600:   }

602:   PetscLogFlops(10*a->nz - 5*m);
603:   VecRestoreArray(xx,&x);
604:   VecRestoreArray(yy,&y);
605:   return(0);
606: }

608: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
609: {
610:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
611:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
612:   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
613:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

616:   VecSet(&zero,yy);
617:   VecGetArray(xx,&x);
618:   VecGetArray(yy,&y);
619:   y = y + shift; /* shift for Fortran start by 1 indexing */
620:   for (i=0; i<m; i++) {
621:     idx    = a->j + a->i[i] + shift;
622:     v      = a->a + a->i[i] + shift;
623:     n      = a->i[i+1] - a->i[i];
624:     alpha1 = x[5*i];
625:     alpha2 = x[5*i+1];
626:     alpha3 = x[5*i+2];
627:     alpha4 = x[5*i+3];
628:     alpha5 = x[5*i+4];
629:     while (n-->0) {
630:       y[5*(*idx)]   += alpha1*(*v);
631:       y[5*(*idx)+1] += alpha2*(*v);
632:       y[5*(*idx)+2] += alpha3*(*v);
633:       y[5*(*idx)+3] += alpha4*(*v);
634:       y[5*(*idx)+4] += alpha5*(*v);
635:       idx++; v++;
636:     }
637:   }
638:   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
639:   VecRestoreArray(xx,&x);
640:   VecRestoreArray(yy,&y);
641:   return(0);
642: }

644: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
645: {
646:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
647:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
648:   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
649:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
650:   int         n,i,jrow,j;

653:   if (yy != zz) {VecCopy(yy,zz);}
654:   VecGetArray(xx,&x);
655:   VecGetArray(zz,&y);
656:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
657:   idx  = a->j;
658:   v    = a->a;
659:   ii   = a->i;

661:   v    += shift; /* shift for Fortran start by 1 indexing */
662:   idx  += shift;
663:   for (i=0; i<m; i++) {
664:     jrow = ii[i];
665:     n    = ii[i+1] - jrow;
666:     sum1  = 0.0;
667:     sum2  = 0.0;
668:     sum3  = 0.0;
669:     sum4  = 0.0;
670:     sum5  = 0.0;
671:     for (j=0; j<n; j++) {
672:       sum1 += v[jrow]*x[5*idx[jrow]];
673:       sum2 += v[jrow]*x[5*idx[jrow]+1];
674:       sum3 += v[jrow]*x[5*idx[jrow]+2];
675:       sum4 += v[jrow]*x[5*idx[jrow]+3];
676:       sum5 += v[jrow]*x[5*idx[jrow]+4];
677:       jrow++;
678:      }
679:     y[5*i]   += sum1;
680:     y[5*i+1] += sum2;
681:     y[5*i+2] += sum3;
682:     y[5*i+3] += sum4;
683:     y[5*i+4] += sum5;
684:   }

686:   PetscLogFlops(10*a->nz);
687:   VecRestoreArray(xx,&x);
688:   VecRestoreArray(zz,&y);
689:   return(0);
690: }

692: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
693: {
694:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
695:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
696:   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
697:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

700:   if (yy != zz) {VecCopy(yy,zz);}
701:   VecGetArray(xx,&x);
702:   VecGetArray(zz,&y);
703:   y = y + shift; /* shift for Fortran start by 1 indexing */
704:   for (i=0; i<m; i++) {
705:     idx    = a->j + a->i[i] + shift;
706:     v      = a->a + a->i[i] + shift;
707:     n      = a->i[i+1] - a->i[i];
708:     alpha1 = x[5*i];
709:     alpha2 = x[5*i+1];
710:     alpha3 = x[5*i+2];
711:     alpha4 = x[5*i+3];
712:     alpha5 = x[5*i+4];
713:     while (n-->0) {
714:       y[5*(*idx)]   += alpha1*(*v);
715:       y[5*(*idx)+1] += alpha2*(*v);
716:       y[5*(*idx)+2] += alpha3*(*v);
717:       y[5*(*idx)+3] += alpha4*(*v);
718:       y[5*(*idx)+4] += alpha5*(*v);
719:       idx++; v++;
720:     }
721:   }
722:   PetscLogFlops(10*a->nz);
723:   VecRestoreArray(xx,&x);
724:   VecRestoreArray(zz,&y);
725:   return(0);
726: }

728: /* ------------------------------------------------------------------------------*/
729: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
730: {
731:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
732:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
733:   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
734:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
735:   int         n,i,jrow,j;

738:   VecGetArray(xx,&x);
739:   VecGetArray(yy,&y);
740:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
741:   idx  = a->j;
742:   v    = a->a;
743:   ii   = a->i;

745:   v    += shift; /* shift for Fortran start by 1 indexing */
746:   idx  += shift;
747:   for (i=0; i<m; i++) {
748:     jrow = ii[i];
749:     n    = ii[i+1] - jrow;
750:     sum1  = 0.0;
751:     sum2  = 0.0;
752:     sum3  = 0.0;
753:     sum4  = 0.0;
754:     sum5  = 0.0;
755:     sum6  = 0.0;
756:     for (j=0; j<n; j++) {
757:       sum1 += v[jrow]*x[6*idx[jrow]];
758:       sum2 += v[jrow]*x[6*idx[jrow]+1];
759:       sum3 += v[jrow]*x[6*idx[jrow]+2];
760:       sum4 += v[jrow]*x[6*idx[jrow]+3];
761:       sum5 += v[jrow]*x[6*idx[jrow]+4];
762:       sum6 += v[jrow]*x[6*idx[jrow]+5];
763:       jrow++;
764:      }
765:     y[6*i]   = sum1;
766:     y[6*i+1] = sum2;
767:     y[6*i+2] = sum3;
768:     y[6*i+3] = sum4;
769:     y[6*i+4] = sum5;
770:     y[6*i+5] = sum6;
771:   }

773:   PetscLogFlops(12*a->nz - 6*m);
774:   VecRestoreArray(xx,&x);
775:   VecRestoreArray(yy,&y);
776:   return(0);
777: }

779: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
780: {
781:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
782:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
783:   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
784:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

787:   VecSet(&zero,yy);
788:   VecGetArray(xx,&x);
789:   VecGetArray(yy,&y);
790:   y = y + shift; /* shift for Fortran start by 1 indexing */
791:   for (i=0; i<m; i++) {
792:     idx    = a->j + a->i[i] + shift;
793:     v      = a->a + a->i[i] + shift;
794:     n      = a->i[i+1] - a->i[i];
795:     alpha1 = x[6*i];
796:     alpha2 = x[6*i+1];
797:     alpha3 = x[6*i+2];
798:     alpha4 = x[6*i+3];
799:     alpha5 = x[6*i+4];
800:     alpha6 = x[6*i+5];
801:     while (n-->0) {
802:       y[6*(*idx)]   += alpha1*(*v);
803:       y[6*(*idx)+1] += alpha2*(*v);
804:       y[6*(*idx)+2] += alpha3*(*v);
805:       y[6*(*idx)+3] += alpha4*(*v);
806:       y[6*(*idx)+4] += alpha5*(*v);
807:       y[6*(*idx)+5] += alpha6*(*v);
808:       idx++; v++;
809:     }
810:   }
811:   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
812:   VecRestoreArray(xx,&x);
813:   VecRestoreArray(yy,&y);
814:   return(0);
815: }

817: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
818: {
819:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
820:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
821:   Scalar      *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
822:   int         ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
823:   int         n,i,jrow,j;

826:   if (yy != zz) {VecCopy(yy,zz);}
827:   VecGetArray(xx,&x);
828:   VecGetArray(zz,&y);
829:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
830:   idx  = a->j;
831:   v    = a->a;
832:   ii   = a->i;

834:   v    += shift; /* shift for Fortran start by 1 indexing */
835:   idx  += shift;
836:   for (i=0; i<m; i++) {
837:     jrow = ii[i];
838:     n    = ii[i+1] - jrow;
839:     sum1  = 0.0;
840:     sum2  = 0.0;
841:     sum3  = 0.0;
842:     sum4  = 0.0;
843:     sum5  = 0.0;
844:     sum6  = 0.0;
845:     for (j=0; j<n; j++) {
846:       sum1 += v[jrow]*x[6*idx[jrow]];
847:       sum2 += v[jrow]*x[6*idx[jrow]+1];
848:       sum3 += v[jrow]*x[6*idx[jrow]+2];
849:       sum4 += v[jrow]*x[6*idx[jrow]+3];
850:       sum5 += v[jrow]*x[6*idx[jrow]+4];
851:       sum6 += v[jrow]*x[6*idx[jrow]+5];
852:       jrow++;
853:      }
854:     y[6*i]   += sum1;
855:     y[6*i+1] += sum2;
856:     y[6*i+2] += sum3;
857:     y[6*i+3] += sum4;
858:     y[6*i+4] += sum5;
859:     y[6*i+5] += sum6;
860:   }

862:   PetscLogFlops(12*a->nz);
863:   VecRestoreArray(xx,&x);
864:   VecRestoreArray(zz,&y);
865:   return(0);
866: }

868: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
869: {
870:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
871:   Mat_SeqAIJ  *a = (Mat_SeqAIJ*)b->AIJ->data;
872:   Scalar      *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
873:   int         ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

876:   if (yy != zz) {VecCopy(yy,zz);}
877:   VecGetArray(xx,&x);
878:   VecGetArray(zz,&y);
879:   y = y + shift; /* shift for Fortran start by 1 indexing */
880:   for (i=0; i<m; i++) {
881:     idx    = a->j + a->i[i] + shift;
882:     v      = a->a + a->i[i] + shift;
883:     n      = a->i[i+1] - a->i[i];
884:     alpha1 = x[6*i];
885:     alpha2 = x[6*i+1];
886:     alpha3 = x[6*i+2];
887:     alpha4 = x[6*i+3];
888:     alpha5 = x[6*i+4];
889:     alpha6 = x[6*i+5];
890:     while (n-->0) {
891:       y[6*(*idx)]   += alpha1*(*v);
892:       y[6*(*idx)+1] += alpha2*(*v);
893:       y[6*(*idx)+2] += alpha3*(*v);
894:       y[6*(*idx)+3] += alpha4*(*v);
895:       y[6*(*idx)+4] += alpha5*(*v);
896:       y[6*(*idx)+5] += alpha6*(*v);
897:       idx++; v++;
898:     }
899:   }
900:   PetscLogFlops(12*a->nz);
901:   VecRestoreArray(xx,&x);
902:   VecRestoreArray(zz,&y);
903:   return(0);
904: }

906: /*===================================================================================*/
907: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
908: {
909:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
910:   int         ierr;

913:   /* start the scatter */
914:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
915:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
916:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
917:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
918:   return(0);
919: }

921: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
922: {
923:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
924:   int         ierr;
926:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
927:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
928:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
929:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
930:   return(0);
931: }

933: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
934: {
935:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
936:   int         ierr;

939:   /* start the scatter */
940:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
941:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
942:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
943:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
944:   return(0);
945: }

947: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
948: {
949:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
950:   int         ierr;
952:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
953:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
954:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
955:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
956:   return(0);
957: }

959: /* ---------------------------------------------------------------------------------- */
960: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
961: {
962:   int         ierr,size,n;
963:   Mat_MPIMAIJ *b;
964:   Mat         B;

967:   ierr   = PetscObjectReference((PetscObject)A);

969:   if (dof == 1) {
970:     *maij = A;
971:   } else {
972:     MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
973:     B->assembled    = PETSC_TRUE;

975:     MPI_Comm_size(A->comm,&size);
976:     if (size == 1) {
977:       MatSetType(B,MATSEQMAIJ);
978:       B->ops->destroy = MatDestroy_SeqMAIJ;
979:       b      = (Mat_MPIMAIJ*)B->data;
980:       b->dof = dof;
981:       b->AIJ = A;
982:       if (dof == 2) {
983:         B->ops->mult             = MatMult_SeqMAIJ_2;
984:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
985:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
986:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
987:       } else if (dof == 3) {
988:         B->ops->mult             = MatMult_SeqMAIJ_3;
989:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
990:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
991:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
992:       } else if (dof == 4) {
993:         B->ops->mult             = MatMult_SeqMAIJ_4;
994:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
995:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
996:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
997:       } else if (dof == 5) {
998:         B->ops->mult             = MatMult_SeqMAIJ_5;
999:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1000:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1001:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1002:       } else if (dof == 6) {
1003:         B->ops->mult             = MatMult_SeqMAIJ_6;
1004:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1005:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1006:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1007:       } else {
1008:         SETERRQ1(1,"Cannot handle a dof of %dn",dof);
1009:       }
1010:     } else {
1011:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1012:       IS         from,to;
1013:       Vec        gvec;
1014:       int        *garray,i;

1016:       MatSetType(B,MATMPIMAIJ);
1017:       B->ops->destroy = MatDestroy_MPIMAIJ;
1018:       b      = (Mat_MPIMAIJ*)B->data;
1019:       b->dof = dof;
1020:       b->A   = A;
1021:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1022:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

1024:       VecGetSize(mpiaij->lvec,&n);
1025:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);

1027:       /* create two temporary Index sets for build scatter gather */
1028:       PetscMalloc((n+1)*sizeof(int),&garray);
1029:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1030:       ISCreateBlock(A->comm,dof,n,garray,&from);
1031:       PetscFree(garray);
1032:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

1034:       /* create temporary global vector to generate scatter context */
1035:       VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);

1037:       /* generate the scatter context */
1038:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

1040:       ISDestroy(from);
1041:       ISDestroy(to);
1042:       VecDestroy(gvec);

1044:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1045:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1046:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1047:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1048:     }
1049:     *maij = B;
1050:   }
1051:   return(0);
1052: }