Actual source code: maij.c
1: /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include src/mat/impls/maij/maij.h
20: #include src/vec/vecimpl.h
24: int MatMAIJGetAIJ(Mat A,Mat *B)
25: {
26: int ierr;
27: PetscTruth ismpimaij,isseqmaij;
30: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
31: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
32: if (ismpimaij) {
33: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
35: *B = b->A;
36: } else if (isseqmaij) {
37: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
39: *B = b->AIJ;
40: } else {
41: *B = A;
42: }
43: return(0);
44: }
48: int MatMAIJRedimension(Mat A,int dof,Mat *B)
49: {
51: Mat Aij;
54: MatMAIJGetAIJ(A,&Aij);
55: MatCreateMAIJ(Aij,dof,B);
56: return(0);
57: }
61: int MatDestroy_SeqMAIJ(Mat A)
62: {
63: int ierr;
64: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
67: if (b->AIJ) {
68: MatDestroy(b->AIJ);
69: }
70: PetscFree(b);
71: return(0);
72: }
76: int MatDestroy_MPIMAIJ(Mat A)
77: {
78: int ierr;
79: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
82: if (b->AIJ) {
83: MatDestroy(b->AIJ);
84: }
85: if (b->OAIJ) {
86: MatDestroy(b->OAIJ);
87: }
88: if (b->A) {
89: MatDestroy(b->A);
90: }
91: if (b->ctx) {
92: VecScatterDestroy(b->ctx);
93: }
94: if (b->w) {
95: VecDestroy(b->w);
96: }
97: PetscFree(b);
98: return(0);
99: }
101: /*MC
102: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
103: multicomponent problems, interpolating or restricting each component the same way independently.
104: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
106: Operations provided:
107: . MatMult
108: . MatMultTranspose
109: . MatMultAdd
110: . MatMultTransposeAdd
112: Level: advanced
114: .seealso: MatCreateSeqDense
115: M*/
117: EXTERN_C_BEGIN
120: int MatCreate_MAIJ(Mat A)
121: {
122: int ierr;
123: Mat_MPIMAIJ *b;
126: PetscNew(Mat_MPIMAIJ,&b);
127: A->data = (void*)b;
128: PetscMemzero(b,sizeof(Mat_MPIMAIJ));
129: PetscMemzero(A->ops,sizeof(struct _MatOps));
130: A->factor = 0;
131: A->mapping = 0;
133: b->AIJ = 0;
134: b->dof = 0;
135: b->OAIJ = 0;
136: b->ctx = 0;
137: b->w = 0;
138: return(0);
139: }
140: EXTERN_C_END
142: /* --------------------------------------------------------------------------------------*/
145: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
146: {
147: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
148: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
149: PetscScalar *x,*y,*v,sum1, sum2;
150: int ierr,m = b->AIJ->m,*idx,*ii;
151: int n,i,jrow,j;
154: VecGetArrayFast(xx,&x);
155: VecGetArrayFast(yy,&y);
156: idx = a->j;
157: v = a->a;
158: ii = a->i;
160: for (i=0; i<m; i++) {
161: jrow = ii[i];
162: n = ii[i+1] - jrow;
163: sum1 = 0.0;
164: sum2 = 0.0;
165: for (j=0; j<n; j++) {
166: sum1 += v[jrow]*x[2*idx[jrow]];
167: sum2 += v[jrow]*x[2*idx[jrow]+1];
168: jrow++;
169: }
170: y[2*i] = sum1;
171: y[2*i+1] = sum2;
172: }
174: PetscLogFlops(4*a->nz - 2*m);
175: VecRestoreArrayFast(xx,&x);
176: VecRestoreArrayFast(yy,&y);
177: return(0);
178: }
182: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
185: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
186: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
187: int ierr,m = b->AIJ->m,n,i,*idx;
190: VecSet(&zero,yy);
191: VecGetArrayFast(xx,&x);
192: VecGetArrayFast(yy,&y);
193:
194: for (i=0; i<m; i++) {
195: idx = a->j + a->i[i] ;
196: v = a->a + a->i[i] ;
197: n = a->i[i+1] - a->i[i];
198: alpha1 = x[2*i];
199: alpha2 = x[2*i+1];
200: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
201: }
202: PetscLogFlops(4*a->nz - 2*b->AIJ->n);
203: VecRestoreArrayFast(xx,&x);
204: VecRestoreArrayFast(yy,&y);
205: return(0);
206: }
210: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
211: {
212: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
213: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
214: PetscScalar *x,*y,*v,sum1, sum2;
215: int ierr,m = b->AIJ->m,*idx,*ii;
216: int n,i,jrow,j;
219: if (yy != zz) {VecCopy(yy,zz);}
220: VecGetArrayFast(xx,&x);
221: VecGetArrayFast(zz,&y);
222: idx = a->j;
223: v = a->a;
224: ii = a->i;
226: for (i=0; i<m; i++) {
227: jrow = ii[i];
228: n = ii[i+1] - jrow;
229: sum1 = 0.0;
230: sum2 = 0.0;
231: for (j=0; j<n; j++) {
232: sum1 += v[jrow]*x[2*idx[jrow]];
233: sum2 += v[jrow]*x[2*idx[jrow]+1];
234: jrow++;
235: }
236: y[2*i] += sum1;
237: y[2*i+1] += sum2;
238: }
240: PetscLogFlops(4*a->nz - 2*m);
241: VecRestoreArrayFast(xx,&x);
242: VecRestoreArrayFast(zz,&y);
243: return(0);
244: }
247: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
248: {
249: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
250: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
251: PetscScalar *x,*y,*v,alpha1,alpha2;
252: int ierr,m = b->AIJ->m,n,i,*idx;
255: if (yy != zz) {VecCopy(yy,zz);}
256: VecGetArrayFast(xx,&x);
257: VecGetArrayFast(zz,&y);
258:
259: for (i=0; i<m; i++) {
260: idx = a->j + a->i[i] ;
261: v = a->a + a->i[i] ;
262: n = a->i[i+1] - a->i[i];
263: alpha1 = x[2*i];
264: alpha2 = x[2*i+1];
265: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
266: }
267: PetscLogFlops(4*a->nz - 2*b->AIJ->n);
268: VecRestoreArrayFast(xx,&x);
269: VecRestoreArrayFast(zz,&y);
270: return(0);
271: }
272: /* --------------------------------------------------------------------------------------*/
275: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
276: {
277: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
278: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
279: PetscScalar *x,*y,*v,sum1, sum2, sum3;
280: int ierr,m = b->AIJ->m,*idx,*ii;
281: int n,i,jrow,j;
284: VecGetArrayFast(xx,&x);
285: VecGetArrayFast(yy,&y);
286: idx = a->j;
287: v = a->a;
288: ii = a->i;
290: for (i=0; i<m; i++) {
291: jrow = ii[i];
292: n = ii[i+1] - jrow;
293: sum1 = 0.0;
294: sum2 = 0.0;
295: sum3 = 0.0;
296: for (j=0; j<n; j++) {
297: sum1 += v[jrow]*x[3*idx[jrow]];
298: sum2 += v[jrow]*x[3*idx[jrow]+1];
299: sum3 += v[jrow]*x[3*idx[jrow]+2];
300: jrow++;
301: }
302: y[3*i] = sum1;
303: y[3*i+1] = sum2;
304: y[3*i+2] = sum3;
305: }
307: PetscLogFlops(6*a->nz - 3*m);
308: VecRestoreArrayFast(xx,&x);
309: VecRestoreArrayFast(yy,&y);
310: return(0);
311: }
315: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
316: {
317: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
318: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
319: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
320: int ierr,m = b->AIJ->m,n,i,*idx;
323: VecSet(&zero,yy);
324: VecGetArrayFast(xx,&x);
325: VecGetArrayFast(yy,&y);
326:
327: for (i=0; i<m; i++) {
328: idx = a->j + a->i[i];
329: v = a->a + a->i[i];
330: n = a->i[i+1] - a->i[i];
331: alpha1 = x[3*i];
332: alpha2 = x[3*i+1];
333: alpha3 = x[3*i+2];
334: while (n-->0) {
335: y[3*(*idx)] += alpha1*(*v);
336: y[3*(*idx)+1] += alpha2*(*v);
337: y[3*(*idx)+2] += alpha3*(*v);
338: idx++; v++;
339: }
340: }
341: PetscLogFlops(6*a->nz - 3*b->AIJ->n);
342: VecRestoreArrayFast(xx,&x);
343: VecRestoreArrayFast(yy,&y);
344: return(0);
345: }
349: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
350: {
351: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
352: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
353: PetscScalar *x,*y,*v,sum1, sum2, sum3;
354: int ierr,m = b->AIJ->m,*idx,*ii;
355: int n,i,jrow,j;
358: if (yy != zz) {VecCopy(yy,zz);}
359: VecGetArrayFast(xx,&x);
360: VecGetArrayFast(zz,&y);
361: idx = a->j;
362: v = a->a;
363: ii = a->i;
365: for (i=0; i<m; i++) {
366: jrow = ii[i];
367: n = ii[i+1] - jrow;
368: sum1 = 0.0;
369: sum2 = 0.0;
370: sum3 = 0.0;
371: for (j=0; j<n; j++) {
372: sum1 += v[jrow]*x[3*idx[jrow]];
373: sum2 += v[jrow]*x[3*idx[jrow]+1];
374: sum3 += v[jrow]*x[3*idx[jrow]+2];
375: jrow++;
376: }
377: y[3*i] += sum1;
378: y[3*i+1] += sum2;
379: y[3*i+2] += sum3;
380: }
382: PetscLogFlops(6*a->nz);
383: VecRestoreArrayFast(xx,&x);
384: VecRestoreArrayFast(zz,&y);
385: return(0);
386: }
389: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
390: {
391: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
392: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
393: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
394: int ierr,m = b->AIJ->m,n,i,*idx;
397: if (yy != zz) {VecCopy(yy,zz);}
398: VecGetArrayFast(xx,&x);
399: VecGetArrayFast(zz,&y);
400: for (i=0; i<m; i++) {
401: idx = a->j + a->i[i] ;
402: v = a->a + a->i[i] ;
403: n = a->i[i+1] - a->i[i];
404: alpha1 = x[3*i];
405: alpha2 = x[3*i+1];
406: alpha3 = x[3*i+2];
407: while (n-->0) {
408: y[3*(*idx)] += alpha1*(*v);
409: y[3*(*idx)+1] += alpha2*(*v);
410: y[3*(*idx)+2] += alpha3*(*v);
411: idx++; v++;
412: }
413: }
414: PetscLogFlops(6*a->nz);
415: VecRestoreArrayFast(xx,&x);
416: VecRestoreArrayFast(zz,&y);
417: return(0);
418: }
420: /* ------------------------------------------------------------------------------*/
423: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
424: {
425: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
426: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
427: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
428: int ierr,m = b->AIJ->m,*idx,*ii;
429: int n,i,jrow,j;
432: VecGetArrayFast(xx,&x);
433: VecGetArrayFast(yy,&y);
434: idx = a->j;
435: v = a->a;
436: ii = a->i;
438: for (i=0; i<m; i++) {
439: jrow = ii[i];
440: n = ii[i+1] - jrow;
441: sum1 = 0.0;
442: sum2 = 0.0;
443: sum3 = 0.0;
444: sum4 = 0.0;
445: for (j=0; j<n; j++) {
446: sum1 += v[jrow]*x[4*idx[jrow]];
447: sum2 += v[jrow]*x[4*idx[jrow]+1];
448: sum3 += v[jrow]*x[4*idx[jrow]+2];
449: sum4 += v[jrow]*x[4*idx[jrow]+3];
450: jrow++;
451: }
452: y[4*i] = sum1;
453: y[4*i+1] = sum2;
454: y[4*i+2] = sum3;
455: y[4*i+3] = sum4;
456: }
458: PetscLogFlops(8*a->nz - 4*m);
459: VecRestoreArrayFast(xx,&x);
460: VecRestoreArrayFast(yy,&y);
461: return(0);
462: }
466: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
467: {
468: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
469: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
470: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
471: int ierr,m = b->AIJ->m,n,i,*idx;
474: VecSet(&zero,yy);
475: VecGetArrayFast(xx,&x);
476: VecGetArrayFast(yy,&y);
477: for (i=0; i<m; i++) {
478: idx = a->j + a->i[i] ;
479: v = a->a + a->i[i] ;
480: n = a->i[i+1] - a->i[i];
481: alpha1 = x[4*i];
482: alpha2 = x[4*i+1];
483: alpha3 = x[4*i+2];
484: alpha4 = x[4*i+3];
485: while (n-->0) {
486: y[4*(*idx)] += alpha1*(*v);
487: y[4*(*idx)+1] += alpha2*(*v);
488: y[4*(*idx)+2] += alpha3*(*v);
489: y[4*(*idx)+3] += alpha4*(*v);
490: idx++; v++;
491: }
492: }
493: PetscLogFlops(8*a->nz - 4*b->AIJ->n);
494: VecRestoreArrayFast(xx,&x);
495: VecRestoreArrayFast(yy,&y);
496: return(0);
497: }
501: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
502: {
503: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
504: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
505: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
506: int ierr,m = b->AIJ->m,*idx,*ii;
507: int n,i,jrow,j;
510: if (yy != zz) {VecCopy(yy,zz);}
511: VecGetArrayFast(xx,&x);
512: VecGetArrayFast(zz,&y);
513: idx = a->j;
514: v = a->a;
515: ii = a->i;
517: for (i=0; i<m; i++) {
518: jrow = ii[i];
519: n = ii[i+1] - jrow;
520: sum1 = 0.0;
521: sum2 = 0.0;
522: sum3 = 0.0;
523: sum4 = 0.0;
524: for (j=0; j<n; j++) {
525: sum1 += v[jrow]*x[4*idx[jrow]];
526: sum2 += v[jrow]*x[4*idx[jrow]+1];
527: sum3 += v[jrow]*x[4*idx[jrow]+2];
528: sum4 += v[jrow]*x[4*idx[jrow]+3];
529: jrow++;
530: }
531: y[4*i] += sum1;
532: y[4*i+1] += sum2;
533: y[4*i+2] += sum3;
534: y[4*i+3] += sum4;
535: }
537: PetscLogFlops(8*a->nz - 4*m);
538: VecRestoreArrayFast(xx,&x);
539: VecRestoreArrayFast(zz,&y);
540: return(0);
541: }
544: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
545: {
546: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
547: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
548: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
549: int ierr,m = b->AIJ->m,n,i,*idx;
552: if (yy != zz) {VecCopy(yy,zz);}
553: VecGetArrayFast(xx,&x);
554: VecGetArrayFast(zz,&y);
555:
556: for (i=0; i<m; i++) {
557: idx = a->j + a->i[i] ;
558: v = a->a + a->i[i] ;
559: n = a->i[i+1] - a->i[i];
560: alpha1 = x[4*i];
561: alpha2 = x[4*i+1];
562: alpha3 = x[4*i+2];
563: alpha4 = x[4*i+3];
564: while (n-->0) {
565: y[4*(*idx)] += alpha1*(*v);
566: y[4*(*idx)+1] += alpha2*(*v);
567: y[4*(*idx)+2] += alpha3*(*v);
568: y[4*(*idx)+3] += alpha4*(*v);
569: idx++; v++;
570: }
571: }
572: PetscLogFlops(8*a->nz - 4*b->AIJ->n);
573: VecRestoreArrayFast(xx,&x);
574: VecRestoreArrayFast(zz,&y);
575: return(0);
576: }
577: /* ------------------------------------------------------------------------------*/
581: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
582: {
583: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
584: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
585: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
586: int ierr,m = b->AIJ->m,*idx,*ii;
587: int n,i,jrow,j;
590: VecGetArrayFast(xx,&x);
591: VecGetArrayFast(yy,&y);
592: idx = a->j;
593: v = a->a;
594: ii = a->i;
596: for (i=0; i<m; i++) {
597: jrow = ii[i];
598: n = ii[i+1] - jrow;
599: sum1 = 0.0;
600: sum2 = 0.0;
601: sum3 = 0.0;
602: sum4 = 0.0;
603: sum5 = 0.0;
604: for (j=0; j<n; j++) {
605: sum1 += v[jrow]*x[5*idx[jrow]];
606: sum2 += v[jrow]*x[5*idx[jrow]+1];
607: sum3 += v[jrow]*x[5*idx[jrow]+2];
608: sum4 += v[jrow]*x[5*idx[jrow]+3];
609: sum5 += v[jrow]*x[5*idx[jrow]+4];
610: jrow++;
611: }
612: y[5*i] = sum1;
613: y[5*i+1] = sum2;
614: y[5*i+2] = sum3;
615: y[5*i+3] = sum4;
616: y[5*i+4] = sum5;
617: }
619: PetscLogFlops(10*a->nz - 5*m);
620: VecRestoreArrayFast(xx,&x);
621: VecRestoreArrayFast(yy,&y);
622: return(0);
623: }
627: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
628: {
629: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
630: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
631: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
632: int ierr,m = b->AIJ->m,n,i,*idx;
635: VecSet(&zero,yy);
636: VecGetArrayFast(xx,&x);
637: VecGetArrayFast(yy,&y);
638:
639: for (i=0; i<m; i++) {
640: idx = a->j + a->i[i] ;
641: v = a->a + a->i[i] ;
642: n = a->i[i+1] - a->i[i];
643: alpha1 = x[5*i];
644: alpha2 = x[5*i+1];
645: alpha3 = x[5*i+2];
646: alpha4 = x[5*i+3];
647: alpha5 = x[5*i+4];
648: while (n-->0) {
649: y[5*(*idx)] += alpha1*(*v);
650: y[5*(*idx)+1] += alpha2*(*v);
651: y[5*(*idx)+2] += alpha3*(*v);
652: y[5*(*idx)+3] += alpha4*(*v);
653: y[5*(*idx)+4] += alpha5*(*v);
654: idx++; v++;
655: }
656: }
657: PetscLogFlops(10*a->nz - 5*b->AIJ->n);
658: VecRestoreArrayFast(xx,&x);
659: VecRestoreArrayFast(yy,&y);
660: return(0);
661: }
665: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
666: {
667: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
668: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
669: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
670: int ierr,m = b->AIJ->m,*idx,*ii;
671: int n,i,jrow,j;
674: if (yy != zz) {VecCopy(yy,zz);}
675: VecGetArrayFast(xx,&x);
676: VecGetArrayFast(zz,&y);
677: idx = a->j;
678: v = a->a;
679: ii = a->i;
681: for (i=0; i<m; i++) {
682: jrow = ii[i];
683: n = ii[i+1] - jrow;
684: sum1 = 0.0;
685: sum2 = 0.0;
686: sum3 = 0.0;
687: sum4 = 0.0;
688: sum5 = 0.0;
689: for (j=0; j<n; j++) {
690: sum1 += v[jrow]*x[5*idx[jrow]];
691: sum2 += v[jrow]*x[5*idx[jrow]+1];
692: sum3 += v[jrow]*x[5*idx[jrow]+2];
693: sum4 += v[jrow]*x[5*idx[jrow]+3];
694: sum5 += v[jrow]*x[5*idx[jrow]+4];
695: jrow++;
696: }
697: y[5*i] += sum1;
698: y[5*i+1] += sum2;
699: y[5*i+2] += sum3;
700: y[5*i+3] += sum4;
701: y[5*i+4] += sum5;
702: }
704: PetscLogFlops(10*a->nz);
705: VecRestoreArrayFast(xx,&x);
706: VecRestoreArrayFast(zz,&y);
707: return(0);
708: }
712: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
713: {
714: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
715: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
716: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
717: int ierr,m = b->AIJ->m,n,i,*idx;
720: if (yy != zz) {VecCopy(yy,zz);}
721: VecGetArrayFast(xx,&x);
722: VecGetArrayFast(zz,&y);
723:
724: for (i=0; i<m; i++) {
725: idx = a->j + a->i[i] ;
726: v = a->a + a->i[i] ;
727: n = a->i[i+1] - a->i[i];
728: alpha1 = x[5*i];
729: alpha2 = x[5*i+1];
730: alpha3 = x[5*i+2];
731: alpha4 = x[5*i+3];
732: alpha5 = x[5*i+4];
733: while (n-->0) {
734: y[5*(*idx)] += alpha1*(*v);
735: y[5*(*idx)+1] += alpha2*(*v);
736: y[5*(*idx)+2] += alpha3*(*v);
737: y[5*(*idx)+3] += alpha4*(*v);
738: y[5*(*idx)+4] += alpha5*(*v);
739: idx++; v++;
740: }
741: }
742: PetscLogFlops(10*a->nz);
743: VecRestoreArrayFast(xx,&x);
744: VecRestoreArrayFast(zz,&y);
745: return(0);
746: }
748: /* ------------------------------------------------------------------------------*/
751: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
752: {
753: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
754: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
755: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
756: int ierr,m = b->AIJ->m,*idx,*ii;
757: int n,i,jrow,j;
760: VecGetArrayFast(xx,&x);
761: VecGetArrayFast(yy,&y);
762: idx = a->j;
763: v = a->a;
764: ii = a->i;
766: for (i=0; i<m; i++) {
767: jrow = ii[i];
768: n = ii[i+1] - jrow;
769: sum1 = 0.0;
770: sum2 = 0.0;
771: sum3 = 0.0;
772: sum4 = 0.0;
773: sum5 = 0.0;
774: sum6 = 0.0;
775: for (j=0; j<n; j++) {
776: sum1 += v[jrow]*x[6*idx[jrow]];
777: sum2 += v[jrow]*x[6*idx[jrow]+1];
778: sum3 += v[jrow]*x[6*idx[jrow]+2];
779: sum4 += v[jrow]*x[6*idx[jrow]+3];
780: sum5 += v[jrow]*x[6*idx[jrow]+4];
781: sum6 += v[jrow]*x[6*idx[jrow]+5];
782: jrow++;
783: }
784: y[6*i] = sum1;
785: y[6*i+1] = sum2;
786: y[6*i+2] = sum3;
787: y[6*i+3] = sum4;
788: y[6*i+4] = sum5;
789: y[6*i+5] = sum6;
790: }
792: PetscLogFlops(12*a->nz - 6*m);
793: VecRestoreArrayFast(xx,&x);
794: VecRestoreArrayFast(yy,&y);
795: return(0);
796: }
800: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
801: {
802: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
803: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
804: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
805: int ierr,m = b->AIJ->m,n,i,*idx;
808: VecSet(&zero,yy);
809: VecGetArrayFast(xx,&x);
810: VecGetArrayFast(yy,&y);
812: for (i=0; i<m; i++) {
813: idx = a->j + a->i[i] ;
814: v = a->a + a->i[i] ;
815: n = a->i[i+1] - a->i[i];
816: alpha1 = x[6*i];
817: alpha2 = x[6*i+1];
818: alpha3 = x[6*i+2];
819: alpha4 = x[6*i+3];
820: alpha5 = x[6*i+4];
821: alpha6 = x[6*i+5];
822: while (n-->0) {
823: y[6*(*idx)] += alpha1*(*v);
824: y[6*(*idx)+1] += alpha2*(*v);
825: y[6*(*idx)+2] += alpha3*(*v);
826: y[6*(*idx)+3] += alpha4*(*v);
827: y[6*(*idx)+4] += alpha5*(*v);
828: y[6*(*idx)+5] += alpha6*(*v);
829: idx++; v++;
830: }
831: }
832: PetscLogFlops(12*a->nz - 6*b->AIJ->n);
833: VecRestoreArrayFast(xx,&x);
834: VecRestoreArrayFast(yy,&y);
835: return(0);
836: }
840: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
841: {
842: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
843: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
844: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
845: int ierr,m = b->AIJ->m,*idx,*ii;
846: int n,i,jrow,j;
849: if (yy != zz) {VecCopy(yy,zz);}
850: VecGetArrayFast(xx,&x);
851: VecGetArrayFast(zz,&y);
852: idx = a->j;
853: v = a->a;
854: ii = a->i;
856: for (i=0; i<m; i++) {
857: jrow = ii[i];
858: n = ii[i+1] - jrow;
859: sum1 = 0.0;
860: sum2 = 0.0;
861: sum3 = 0.0;
862: sum4 = 0.0;
863: sum5 = 0.0;
864: sum6 = 0.0;
865: for (j=0; j<n; j++) {
866: sum1 += v[jrow]*x[6*idx[jrow]];
867: sum2 += v[jrow]*x[6*idx[jrow]+1];
868: sum3 += v[jrow]*x[6*idx[jrow]+2];
869: sum4 += v[jrow]*x[6*idx[jrow]+3];
870: sum5 += v[jrow]*x[6*idx[jrow]+4];
871: sum6 += v[jrow]*x[6*idx[jrow]+5];
872: jrow++;
873: }
874: y[6*i] += sum1;
875: y[6*i+1] += sum2;
876: y[6*i+2] += sum3;
877: y[6*i+3] += sum4;
878: y[6*i+4] += sum5;
879: y[6*i+5] += sum6;
880: }
882: PetscLogFlops(12*a->nz);
883: VecRestoreArrayFast(xx,&x);
884: VecRestoreArrayFast(zz,&y);
885: return(0);
886: }
890: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
891: {
892: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
893: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
894: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
895: int ierr,m = b->AIJ->m,n,i,*idx;
898: if (yy != zz) {VecCopy(yy,zz);}
899: VecGetArrayFast(xx,&x);
900: VecGetArrayFast(zz,&y);
901:
902: for (i=0; i<m; i++) {
903: idx = a->j + a->i[i] ;
904: v = a->a + a->i[i] ;
905: n = a->i[i+1] - a->i[i];
906: alpha1 = x[6*i];
907: alpha2 = x[6*i+1];
908: alpha3 = x[6*i+2];
909: alpha4 = x[6*i+3];
910: alpha5 = x[6*i+4];
911: alpha6 = x[6*i+5];
912: while (n-->0) {
913: y[6*(*idx)] += alpha1*(*v);
914: y[6*(*idx)+1] += alpha2*(*v);
915: y[6*(*idx)+2] += alpha3*(*v);
916: y[6*(*idx)+3] += alpha4*(*v);
917: y[6*(*idx)+4] += alpha5*(*v);
918: y[6*(*idx)+5] += alpha6*(*v);
919: idx++; v++;
920: }
921: }
922: PetscLogFlops(12*a->nz);
923: VecRestoreArrayFast(xx,&x);
924: VecRestoreArrayFast(zz,&y);
925: return(0);
926: }
928: /* ------------------------------------------------------------------------------*/
931: int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
932: {
933: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
934: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
935: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
936: int ierr,m = b->AIJ->m,*idx,*ii;
937: int n,i,jrow,j;
940: VecGetArrayFast(xx,&x);
941: VecGetArrayFast(yy,&y);
942: idx = a->j;
943: v = a->a;
944: ii = a->i;
946: for (i=0; i<m; i++) {
947: jrow = ii[i];
948: n = ii[i+1] - jrow;
949: sum1 = 0.0;
950: sum2 = 0.0;
951: sum3 = 0.0;
952: sum4 = 0.0;
953: sum5 = 0.0;
954: sum6 = 0.0;
955: sum7 = 0.0;
956: sum8 = 0.0;
957: for (j=0; j<n; j++) {
958: sum1 += v[jrow]*x[8*idx[jrow]];
959: sum2 += v[jrow]*x[8*idx[jrow]+1];
960: sum3 += v[jrow]*x[8*idx[jrow]+2];
961: sum4 += v[jrow]*x[8*idx[jrow]+3];
962: sum5 += v[jrow]*x[8*idx[jrow]+4];
963: sum6 += v[jrow]*x[8*idx[jrow]+5];
964: sum7 += v[jrow]*x[8*idx[jrow]+6];
965: sum8 += v[jrow]*x[8*idx[jrow]+7];
966: jrow++;
967: }
968: y[8*i] = sum1;
969: y[8*i+1] = sum2;
970: y[8*i+2] = sum3;
971: y[8*i+3] = sum4;
972: y[8*i+4] = sum5;
973: y[8*i+5] = sum6;
974: y[8*i+6] = sum7;
975: y[8*i+7] = sum8;
976: }
978: PetscLogFlops(16*a->nz - 8*m);
979: VecRestoreArrayFast(xx,&x);
980: VecRestoreArrayFast(yy,&y);
981: return(0);
982: }
986: int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
987: {
988: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
989: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
990: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
991: int ierr,m = b->AIJ->m,n,i,*idx;
994: VecSet(&zero,yy);
995: VecGetArrayFast(xx,&x);
996: VecGetArrayFast(yy,&y);
998: for (i=0; i<m; i++) {
999: idx = a->j + a->i[i] ;
1000: v = a->a + a->i[i] ;
1001: n = a->i[i+1] - a->i[i];
1002: alpha1 = x[8*i];
1003: alpha2 = x[8*i+1];
1004: alpha3 = x[8*i+2];
1005: alpha4 = x[8*i+3];
1006: alpha5 = x[8*i+4];
1007: alpha6 = x[8*i+5];
1008: alpha7 = x[8*i+6];
1009: alpha8 = x[8*i+7];
1010: while (n-->0) {
1011: y[8*(*idx)] += alpha1*(*v);
1012: y[8*(*idx)+1] += alpha2*(*v);
1013: y[8*(*idx)+2] += alpha3*(*v);
1014: y[8*(*idx)+3] += alpha4*(*v);
1015: y[8*(*idx)+4] += alpha5*(*v);
1016: y[8*(*idx)+5] += alpha6*(*v);
1017: y[8*(*idx)+6] += alpha7*(*v);
1018: y[8*(*idx)+7] += alpha8*(*v);
1019: idx++; v++;
1020: }
1021: }
1022: PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1023: VecRestoreArrayFast(xx,&x);
1024: VecRestoreArrayFast(yy,&y);
1025: return(0);
1026: }
1030: int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1031: {
1032: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1033: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1034: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1035: int ierr,m = b->AIJ->m,*idx,*ii;
1036: int n,i,jrow,j;
1039: if (yy != zz) {VecCopy(yy,zz);}
1040: VecGetArrayFast(xx,&x);
1041: VecGetArrayFast(zz,&y);
1042: idx = a->j;
1043: v = a->a;
1044: ii = a->i;
1046: for (i=0; i<m; i++) {
1047: jrow = ii[i];
1048: n = ii[i+1] - jrow;
1049: sum1 = 0.0;
1050: sum2 = 0.0;
1051: sum3 = 0.0;
1052: sum4 = 0.0;
1053: sum5 = 0.0;
1054: sum6 = 0.0;
1055: sum7 = 0.0;
1056: sum8 = 0.0;
1057: for (j=0; j<n; j++) {
1058: sum1 += v[jrow]*x[8*idx[jrow]];
1059: sum2 += v[jrow]*x[8*idx[jrow]+1];
1060: sum3 += v[jrow]*x[8*idx[jrow]+2];
1061: sum4 += v[jrow]*x[8*idx[jrow]+3];
1062: sum5 += v[jrow]*x[8*idx[jrow]+4];
1063: sum6 += v[jrow]*x[8*idx[jrow]+5];
1064: sum7 += v[jrow]*x[8*idx[jrow]+6];
1065: sum8 += v[jrow]*x[8*idx[jrow]+7];
1066: jrow++;
1067: }
1068: y[8*i] += sum1;
1069: y[8*i+1] += sum2;
1070: y[8*i+2] += sum3;
1071: y[8*i+3] += sum4;
1072: y[8*i+4] += sum5;
1073: y[8*i+5] += sum6;
1074: y[8*i+6] += sum7;
1075: y[8*i+7] += sum8;
1076: }
1078: PetscLogFlops(16*a->nz);
1079: VecRestoreArrayFast(xx,&x);
1080: VecRestoreArrayFast(zz,&y);
1081: return(0);
1082: }
1086: int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1087: {
1088: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1089: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1090: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1091: int ierr,m = b->AIJ->m,n,i,*idx;
1094: if (yy != zz) {VecCopy(yy,zz);}
1095: VecGetArrayFast(xx,&x);
1096: VecGetArrayFast(zz,&y);
1097: for (i=0; i<m; i++) {
1098: idx = a->j + a->i[i] ;
1099: v = a->a + a->i[i] ;
1100: n = a->i[i+1] - a->i[i];
1101: alpha1 = x[8*i];
1102: alpha2 = x[8*i+1];
1103: alpha3 = x[8*i+2];
1104: alpha4 = x[8*i+3];
1105: alpha5 = x[8*i+4];
1106: alpha6 = x[8*i+5];
1107: alpha7 = x[8*i+6];
1108: alpha8 = x[8*i+7];
1109: while (n-->0) {
1110: y[8*(*idx)] += alpha1*(*v);
1111: y[8*(*idx)+1] += alpha2*(*v);
1112: y[8*(*idx)+2] += alpha3*(*v);
1113: y[8*(*idx)+3] += alpha4*(*v);
1114: y[8*(*idx)+4] += alpha5*(*v);
1115: y[8*(*idx)+5] += alpha6*(*v);
1116: y[8*(*idx)+6] += alpha7*(*v);
1117: y[8*(*idx)+7] += alpha8*(*v);
1118: idx++; v++;
1119: }
1120: }
1121: PetscLogFlops(16*a->nz);
1122: VecRestoreArrayFast(xx,&x);
1123: VecRestoreArrayFast(zz,&y);
1124: return(0);
1125: }
1127: /*--------------------------------------------------------------------------------------------*/
1130: int MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1131: {
1132: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1133: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1134: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1135: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1136: int ierr,m = b->AIJ->m,*idx,*ii;
1137: int n,i,jrow,j;
1140: VecGetArrayFast(xx,&x);
1141: VecGetArrayFast(yy,&y);
1142: idx = a->j;
1143: v = a->a;
1144: ii = a->i;
1146: for (i=0; i<m; i++) {
1147: jrow = ii[i];
1148: n = ii[i+1] - jrow;
1149: sum1 = 0.0;
1150: sum2 = 0.0;
1151: sum3 = 0.0;
1152: sum4 = 0.0;
1153: sum5 = 0.0;
1154: sum6 = 0.0;
1155: sum7 = 0.0;
1156: sum8 = 0.0;
1157: sum9 = 0.0;
1158: sum10 = 0.0;
1159: sum11 = 0.0;
1160: sum12 = 0.0;
1161: sum13 = 0.0;
1162: sum14 = 0.0;
1163: sum15 = 0.0;
1164: sum16 = 0.0;
1165: for (j=0; j<n; j++) {
1166: sum1 += v[jrow]*x[16*idx[jrow]];
1167: sum2 += v[jrow]*x[16*idx[jrow]+1];
1168: sum3 += v[jrow]*x[16*idx[jrow]+2];
1169: sum4 += v[jrow]*x[16*idx[jrow]+3];
1170: sum5 += v[jrow]*x[16*idx[jrow]+4];
1171: sum6 += v[jrow]*x[16*idx[jrow]+5];
1172: sum7 += v[jrow]*x[16*idx[jrow]+6];
1173: sum8 += v[jrow]*x[16*idx[jrow]+7];
1174: sum9 += v[jrow]*x[16*idx[jrow]+8];
1175: sum10 += v[jrow]*x[16*idx[jrow]+9];
1176: sum11 += v[jrow]*x[16*idx[jrow]+10];
1177: sum12 += v[jrow]*x[16*idx[jrow]+11];
1178: sum13 += v[jrow]*x[16*idx[jrow]+12];
1179: sum14 += v[jrow]*x[16*idx[jrow]+13];
1180: sum15 += v[jrow]*x[16*idx[jrow]+14];
1181: sum16 += v[jrow]*x[16*idx[jrow]+15];
1182: jrow++;
1183: }
1184: y[16*i] = sum1;
1185: y[16*i+1] = sum2;
1186: y[16*i+2] = sum3;
1187: y[16*i+3] = sum4;
1188: y[16*i+4] = sum5;
1189: y[16*i+5] = sum6;
1190: y[16*i+6] = sum7;
1191: y[16*i+7] = sum8;
1192: y[16*i+8] = sum9;
1193: y[16*i+9] = sum10;
1194: y[16*i+10] = sum11;
1195: y[16*i+11] = sum12;
1196: y[16*i+12] = sum13;
1197: y[16*i+13] = sum14;
1198: y[16*i+14] = sum15;
1199: y[16*i+15] = sum16;
1200: }
1202: PetscLogFlops(32*a->nz - 16*m);
1203: VecRestoreArrayFast(xx,&x);
1204: VecRestoreArrayFast(yy,&y);
1205: return(0);
1206: }
1210: int MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1211: {
1212: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1213: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1214: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1215: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1216: int ierr,m = b->AIJ->m,n,i,*idx;
1219: VecSet(&zero,yy);
1220: VecGetArrayFast(xx,&x);
1221: VecGetArrayFast(yy,&y);
1223: for (i=0; i<m; i++) {
1224: idx = a->j + a->i[i] ;
1225: v = a->a + a->i[i] ;
1226: n = a->i[i+1] - a->i[i];
1227: alpha1 = x[16*i];
1228: alpha2 = x[16*i+1];
1229: alpha3 = x[16*i+2];
1230: alpha4 = x[16*i+3];
1231: alpha5 = x[16*i+4];
1232: alpha6 = x[16*i+5];
1233: alpha7 = x[16*i+6];
1234: alpha8 = x[16*i+7];
1235: alpha9 = x[16*i+8];
1236: alpha10 = x[16*i+9];
1237: alpha11 = x[16*i+10];
1238: alpha12 = x[16*i+11];
1239: alpha13 = x[16*i+12];
1240: alpha14 = x[16*i+13];
1241: alpha15 = x[16*i+14];
1242: alpha16 = x[16*i+15];
1243: while (n-->0) {
1244: y[16*(*idx)] += alpha1*(*v);
1245: y[16*(*idx)+1] += alpha2*(*v);
1246: y[16*(*idx)+2] += alpha3*(*v);
1247: y[16*(*idx)+3] += alpha4*(*v);
1248: y[16*(*idx)+4] += alpha5*(*v);
1249: y[16*(*idx)+5] += alpha6*(*v);
1250: y[16*(*idx)+6] += alpha7*(*v);
1251: y[16*(*idx)+7] += alpha8*(*v);
1252: y[16*(*idx)+8] += alpha9*(*v);
1253: y[16*(*idx)+9] += alpha10*(*v);
1254: y[16*(*idx)+10] += alpha11*(*v);
1255: y[16*(*idx)+11] += alpha12*(*v);
1256: y[16*(*idx)+12] += alpha13*(*v);
1257: y[16*(*idx)+13] += alpha14*(*v);
1258: y[16*(*idx)+14] += alpha15*(*v);
1259: y[16*(*idx)+15] += alpha16*(*v);
1260: idx++; v++;
1261: }
1262: }
1263: PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1264: VecRestoreArrayFast(xx,&x);
1265: VecRestoreArrayFast(yy,&y);
1266: return(0);
1267: }
1271: int MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1272: {
1273: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1274: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1275: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1276: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1277: int ierr,m = b->AIJ->m,*idx,*ii;
1278: int n,i,jrow,j;
1281: if (yy != zz) {VecCopy(yy,zz);}
1282: VecGetArrayFast(xx,&x);
1283: VecGetArrayFast(zz,&y);
1284: idx = a->j;
1285: v = a->a;
1286: ii = a->i;
1288: for (i=0; i<m; i++) {
1289: jrow = ii[i];
1290: n = ii[i+1] - jrow;
1291: sum1 = 0.0;
1292: sum2 = 0.0;
1293: sum3 = 0.0;
1294: sum4 = 0.0;
1295: sum5 = 0.0;
1296: sum6 = 0.0;
1297: sum7 = 0.0;
1298: sum8 = 0.0;
1299: sum9 = 0.0;
1300: sum10 = 0.0;
1301: sum11 = 0.0;
1302: sum12 = 0.0;
1303: sum13 = 0.0;
1304: sum14 = 0.0;
1305: sum15 = 0.0;
1306: sum16 = 0.0;
1307: for (j=0; j<n; j++) {
1308: sum1 += v[jrow]*x[16*idx[jrow]];
1309: sum2 += v[jrow]*x[16*idx[jrow]+1];
1310: sum3 += v[jrow]*x[16*idx[jrow]+2];
1311: sum4 += v[jrow]*x[16*idx[jrow]+3];
1312: sum5 += v[jrow]*x[16*idx[jrow]+4];
1313: sum6 += v[jrow]*x[16*idx[jrow]+5];
1314: sum7 += v[jrow]*x[16*idx[jrow]+6];
1315: sum8 += v[jrow]*x[16*idx[jrow]+7];
1316: sum9 += v[jrow]*x[16*idx[jrow]+8];
1317: sum10 += v[jrow]*x[16*idx[jrow]+9];
1318: sum11 += v[jrow]*x[16*idx[jrow]+10];
1319: sum12 += v[jrow]*x[16*idx[jrow]+11];
1320: sum13 += v[jrow]*x[16*idx[jrow]+12];
1321: sum14 += v[jrow]*x[16*idx[jrow]+13];
1322: sum15 += v[jrow]*x[16*idx[jrow]+14];
1323: sum16 += v[jrow]*x[16*idx[jrow]+15];
1324: jrow++;
1325: }
1326: y[16*i] += sum1;
1327: y[16*i+1] += sum2;
1328: y[16*i+2] += sum3;
1329: y[16*i+3] += sum4;
1330: y[16*i+4] += sum5;
1331: y[16*i+5] += sum6;
1332: y[16*i+6] += sum7;
1333: y[16*i+7] += sum8;
1334: y[16*i+8] += sum9;
1335: y[16*i+9] += sum10;
1336: y[16*i+10] += sum11;
1337: y[16*i+11] += sum12;
1338: y[16*i+12] += sum13;
1339: y[16*i+13] += sum14;
1340: y[16*i+14] += sum15;
1341: y[16*i+15] += sum16;
1342: }
1344: PetscLogFlops(32*a->nz);
1345: VecRestoreArrayFast(xx,&x);
1346: VecRestoreArrayFast(zz,&y);
1347: return(0);
1348: }
1352: int MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1353: {
1354: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1355: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1356: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1357: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1358: int ierr,m = b->AIJ->m,n,i,*idx;
1361: if (yy != zz) {VecCopy(yy,zz);}
1362: VecGetArrayFast(xx,&x);
1363: VecGetArrayFast(zz,&y);
1364: for (i=0; i<m; i++) {
1365: idx = a->j + a->i[i] ;
1366: v = a->a + a->i[i] ;
1367: n = a->i[i+1] - a->i[i];
1368: alpha1 = x[16*i];
1369: alpha2 = x[16*i+1];
1370: alpha3 = x[16*i+2];
1371: alpha4 = x[16*i+3];
1372: alpha5 = x[16*i+4];
1373: alpha6 = x[16*i+5];
1374: alpha7 = x[16*i+6];
1375: alpha8 = x[16*i+7];
1376: alpha9 = x[16*i+8];
1377: alpha10 = x[16*i+9];
1378: alpha11 = x[16*i+10];
1379: alpha12 = x[16*i+11];
1380: alpha13 = x[16*i+12];
1381: alpha14 = x[16*i+13];
1382: alpha15 = x[16*i+14];
1383: alpha16 = x[16*i+15];
1384: while (n-->0) {
1385: y[16*(*idx)] += alpha1*(*v);
1386: y[16*(*idx)+1] += alpha2*(*v);
1387: y[16*(*idx)+2] += alpha3*(*v);
1388: y[16*(*idx)+3] += alpha4*(*v);
1389: y[16*(*idx)+4] += alpha5*(*v);
1390: y[16*(*idx)+5] += alpha6*(*v);
1391: y[16*(*idx)+6] += alpha7*(*v);
1392: y[16*(*idx)+7] += alpha8*(*v);
1393: y[16*(*idx)+8] += alpha9*(*v);
1394: y[16*(*idx)+9] += alpha10*(*v);
1395: y[16*(*idx)+10] += alpha11*(*v);
1396: y[16*(*idx)+11] += alpha12*(*v);
1397: y[16*(*idx)+12] += alpha13*(*v);
1398: y[16*(*idx)+13] += alpha14*(*v);
1399: y[16*(*idx)+14] += alpha15*(*v);
1400: y[16*(*idx)+15] += alpha16*(*v);
1401: idx++; v++;
1402: }
1403: }
1404: PetscLogFlops(32*a->nz);
1405: VecRestoreArrayFast(xx,&x);
1406: VecRestoreArrayFast(zz,&y);
1407: return(0);
1408: }
1410: /*===================================================================================*/
1413: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1414: {
1415: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1416: int ierr;
1419: /* start the scatter */
1420: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1421: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1422: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1423: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1424: return(0);
1425: }
1429: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1430: {
1431: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1432: int ierr;
1434: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1435: VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1436: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1437: VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1438: return(0);
1439: }
1443: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1444: {
1445: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1446: int ierr;
1449: /* start the scatter */
1450: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1451: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1452: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1453: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
1454: return(0);
1455: }
1459: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1460: {
1461: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1462: int ierr;
1464: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1465: VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1466: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1467: VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1468: return(0);
1469: }
1471: /* ---------------------------------------------------------------------------------- */
1472: /*MC
1473: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1474: operations for multicomponent problems. It interpolates each component the same
1475: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
1476: and MATMPIAIJ for distributed matrices.
1478: Operations provided:
1479: . MatMult
1480: . MatMultTranspose
1481: . MatMultAdd
1482: . MatMultTransposeAdd
1484: Level: advanced
1486: M*/
1489: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1490: {
1491: int ierr,size,n;
1492: Mat_MPIMAIJ *b;
1493: Mat B;
1496: PetscObjectReference((PetscObject)A);
1498: if (dof == 1) {
1499: *maij = A;
1500: } else {
1501: MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
1502: B->assembled = PETSC_TRUE;
1504: MPI_Comm_size(A->comm,&size);
1505: if (size == 1) {
1506: MatSetType(B,MATSEQMAIJ);
1507: B->ops->destroy = MatDestroy_SeqMAIJ;
1508: b = (Mat_MPIMAIJ*)B->data;
1509: b->dof = dof;
1510: b->AIJ = A;
1511: if (dof == 2) {
1512: B->ops->mult = MatMult_SeqMAIJ_2;
1513: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
1514: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
1515: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1516: } else if (dof == 3) {
1517: B->ops->mult = MatMult_SeqMAIJ_3;
1518: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
1519: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
1520: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1521: } else if (dof == 4) {
1522: B->ops->mult = MatMult_SeqMAIJ_4;
1523: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
1524: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
1525: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1526: } else if (dof == 5) {
1527: B->ops->mult = MatMult_SeqMAIJ_5;
1528: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
1529: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
1530: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1531: } else if (dof == 6) {
1532: B->ops->mult = MatMult_SeqMAIJ_6;
1533: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
1534: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
1535: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1536: } else if (dof == 8) {
1537: B->ops->mult = MatMult_SeqMAIJ_8;
1538: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
1539: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
1540: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1541: } else if (dof == 16) {
1542: B->ops->mult = MatMult_SeqMAIJ_16;
1543: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
1544: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
1545: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1546: } else {
1547: SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
1548: }
1549: } else {
1550: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1551: IS from,to;
1552: Vec gvec;
1553: int *garray,i;
1555: MatSetType(B,MATMPIMAIJ);
1556: B->ops->destroy = MatDestroy_MPIMAIJ;
1557: b = (Mat_MPIMAIJ*)B->data;
1558: b->dof = dof;
1559: b->A = A;
1560: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1561: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
1563: VecGetSize(mpiaij->lvec,&n);
1564: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
1566: /* create two temporary Index sets for build scatter gather */
1567: PetscMalloc((n+1)*sizeof(int),&garray);
1568: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1569: ISCreateBlock(A->comm,dof,n,garray,&from);
1570: PetscFree(garray);
1571: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
1573: /* create temporary global vector to generate scatter context */
1574: VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);
1576: /* generate the scatter context */
1577: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
1579: ISDestroy(from);
1580: ISDestroy(to);
1581: VecDestroy(gvec);
1583: B->ops->mult = MatMult_MPIMAIJ_dof;
1584: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
1585: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
1586: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1587: }
1588: *maij = B;
1589: }
1590: return(0);
1591: }