Actual source code: maij.c
1: /*$Id: maij.c,v 1.16 2001/04/10 19:35:37 bsmith Exp $*/
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include src/mat/impls/aij/mpi/mpiaij.h
21: typedef struct {
22: int dof; /* number of components */
23: Mat AIJ; /* representation of interpolation for one component */
24: } Mat_SeqMAIJ;
26: typedef struct {
27: int dof; /* number of components */
28: Mat AIJ,OAIJ; /* representation of interpolation for one component */
29: Mat A;
30: VecScatter ctx; /* update ghost points for parallel case */
31: Vec w; /* work space for ghost values for parallel case */
32: } Mat_MPIMAIJ;
34: int MatMAIJGetAIJ(Mat A,Mat *B)
35: {
36: int ierr;
37: PetscTruth ismpimaij,isseqmaij;
40: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
41: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
42: if (ismpimaij) {
43: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
45: *B = b->A;
46: } else if (isseqmaij) {
47: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
49: *B = b->AIJ;
50: } else {
51: *B = A;
52: }
53: return(0);
54: }
56: int MatMAIJRedimension(Mat A,int dof,Mat *B)
57: {
59: Mat Aij;
62: MatMAIJGetAIJ(A,&Aij);
63: MatCreateMAIJ(Aij,dof,B);
64: return(0);
65: }
67: int MatDestroy_SeqMAIJ(Mat A)
68: {
69: int ierr;
70: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
73: if (b->AIJ) {
74: MatDestroy(b->AIJ);
75: }
76: PetscFree(b);
77: return(0);
78: }
80: int MatDestroy_MPIMAIJ(Mat A)
81: {
82: int ierr;
83: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
86: if (b->AIJ) {
87: MatDestroy(b->AIJ);
88: }
89: if (b->OAIJ) {
90: MatDestroy(b->OAIJ);
91: }
92: if (b->A) {
93: MatDestroy(b->A);
94: }
95: if (b->ctx) {
96: VecScatterDestroy(b->ctx);
97: }
98: if (b->w) {
99: VecDestroy(b->w);
100: }
101: PetscFree(b);
102: return(0);
103: }
105: EXTERN_C_BEGIN
106: int MatCreate_MAIJ(Mat A)
107: {
108: int ierr;
109: Mat_MPIMAIJ *b;
112: ierr = PetscNew(Mat_MPIMAIJ,&b);
113: A->data = (void*)b;
114: PetscMemzero(b,sizeof(Mat_MPIMAIJ));
115: PetscMemzero(A->ops,sizeof(struct _MatOps));
116: A->factor = 0;
117: A->mapping = 0;
119: b->AIJ = 0;
120: b->dof = 0;
121: b->OAIJ = 0;
122: b->ctx = 0;
123: b->w = 0;
124: return(0);
125: }
126: EXTERN_C_END
128: /* --------------------------------------------------------------------------------------*/
129: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
130: {
131: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
132: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
133: Scalar *x,*y,*v,sum1, sum2;
134: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
135: int n,i,jrow,j;
138: VecGetArray(xx,&x);
139: VecGetArray(yy,&y);
140: x = x + shift; /* shift for Fortran start by 1 indexing */
141: idx = a->j;
142: v = a->a;
143: ii = a->i;
145: v += shift; /* shift for Fortran start by 1 indexing */
146: idx += shift;
147: for (i=0; i<m; i++) {
148: jrow = ii[i];
149: n = ii[i+1] - jrow;
150: sum1 = 0.0;
151: sum2 = 0.0;
152: for (j=0; j<n; j++) {
153: sum1 += v[jrow]*x[2*idx[jrow]];
154: sum2 += v[jrow]*x[2*idx[jrow]+1];
155: jrow++;
156: }
157: y[2*i] = sum1;
158: y[2*i+1] = sum2;
159: }
161: PetscLogFlops(4*a->nz - 2*m);
162: VecRestoreArray(xx,&x);
163: VecRestoreArray(yy,&y);
164: return(0);
165: }
167: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
168: {
169: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
170: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
171: Scalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
172: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
175: VecSet(&zero,yy);
176: VecGetArray(xx,&x);
177: VecGetArray(yy,&y);
178: y = y + shift; /* shift for Fortran start by 1 indexing */
179: for (i=0; i<m; i++) {
180: idx = a->j + a->i[i] + shift;
181: v = a->a + a->i[i] + shift;
182: n = a->i[i+1] - a->i[i];
183: alpha1 = x[2*i];
184: alpha2 = x[2*i+1];
185: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
186: }
187: PetscLogFlops(4*a->nz - 2*b->AIJ->n);
188: VecRestoreArray(xx,&x);
189: VecRestoreArray(yy,&y);
190: return(0);
191: }
193: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
194: {
195: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
196: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
197: Scalar *x,*y,*v,sum1, sum2;
198: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
199: int n,i,jrow,j;
202: if (yy != zz) {VecCopy(yy,zz);}
203: VecGetArray(xx,&x);
204: VecGetArray(zz,&y);
205: x = x + shift; /* shift for Fortran start by 1 indexing */
206: idx = a->j;
207: v = a->a;
208: ii = a->i;
210: v += shift; /* shift for Fortran start by 1 indexing */
211: idx += shift;
212: for (i=0; i<m; i++) {
213: jrow = ii[i];
214: n = ii[i+1] - jrow;
215: sum1 = 0.0;
216: sum2 = 0.0;
217: for (j=0; j<n; j++) {
218: sum1 += v[jrow]*x[2*idx[jrow]];
219: sum2 += v[jrow]*x[2*idx[jrow]+1];
220: jrow++;
221: }
222: y[2*i] += sum1;
223: y[2*i+1] += sum2;
224: }
226: PetscLogFlops(4*a->nz - 2*m);
227: VecRestoreArray(xx,&x);
228: VecRestoreArray(zz,&y);
229: return(0);
230: }
231: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
232: {
233: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
234: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
235: Scalar *x,*y,*v,alpha1,alpha2;
236: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
239: if (yy != zz) {VecCopy(yy,zz);}
240: VecGetArray(xx,&x);
241: VecGetArray(zz,&y);
242: y = y + shift; /* shift for Fortran start by 1 indexing */
243: for (i=0; i<m; i++) {
244: idx = a->j + a->i[i] + shift;
245: v = a->a + a->i[i] + shift;
246: n = a->i[i+1] - a->i[i];
247: alpha1 = x[2*i];
248: alpha2 = x[2*i+1];
249: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
250: }
251: PetscLogFlops(4*a->nz - 2*b->AIJ->n);
252: VecRestoreArray(xx,&x);
253: VecRestoreArray(zz,&y);
254: return(0);
255: }
256: /* --------------------------------------------------------------------------------------*/
257: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
258: {
259: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
260: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
261: Scalar *x,*y,*v,sum1, sum2, sum3;
262: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
263: int n,i,jrow,j;
266: VecGetArray(xx,&x);
267: VecGetArray(yy,&y);
268: x = x + shift; /* shift for Fortran start by 1 indexing */
269: idx = a->j;
270: v = a->a;
271: ii = a->i;
273: v += shift; /* shift for Fortran start by 1 indexing */
274: idx += shift;
275: for (i=0; i<m; i++) {
276: jrow = ii[i];
277: n = ii[i+1] - jrow;
278: sum1 = 0.0;
279: sum2 = 0.0;
280: sum3 = 0.0;
281: for (j=0; j<n; j++) {
282: sum1 += v[jrow]*x[3*idx[jrow]];
283: sum2 += v[jrow]*x[3*idx[jrow]+1];
284: sum3 += v[jrow]*x[3*idx[jrow]+2];
285: jrow++;
286: }
287: y[3*i] = sum1;
288: y[3*i+1] = sum2;
289: y[3*i+2] = sum3;
290: }
292: PetscLogFlops(6*a->nz - 3*m);
293: VecRestoreArray(xx,&x);
294: VecRestoreArray(yy,&y);
295: return(0);
296: }
298: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
299: {
300: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
301: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
302: Scalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
303: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
306: VecSet(&zero,yy);
307: VecGetArray(xx,&x);
308: VecGetArray(yy,&y);
309: y = y + shift; /* shift for Fortran start by 1 indexing */
310: for (i=0; i<m; i++) {
311: idx = a->j + a->i[i] + shift;
312: v = a->a + a->i[i] + shift;
313: n = a->i[i+1] - a->i[i];
314: alpha1 = x[3*i];
315: alpha2 = x[3*i+1];
316: alpha3 = x[3*i+2];
317: while (n-->0) {
318: y[3*(*idx)] += alpha1*(*v);
319: y[3*(*idx)+1] += alpha2*(*v);
320: y[3*(*idx)+2] += alpha3*(*v);
321: idx++; v++;
322: }
323: }
324: PetscLogFlops(6*a->nz - 3*b->AIJ->n);
325: VecRestoreArray(xx,&x);
326: VecRestoreArray(yy,&y);
327: return(0);
328: }
330: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
331: {
332: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
333: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
334: Scalar *x,*y,*v,sum1, sum2, sum3;
335: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
336: int n,i,jrow,j;
339: if (yy != zz) {VecCopy(yy,zz);}
340: VecGetArray(xx,&x);
341: VecGetArray(zz,&y);
342: x = x + shift; /* shift for Fortran start by 1 indexing */
343: idx = a->j;
344: v = a->a;
345: ii = a->i;
347: v += shift; /* shift for Fortran start by 1 indexing */
348: idx += shift;
349: for (i=0; i<m; i++) {
350: jrow = ii[i];
351: n = ii[i+1] - jrow;
352: sum1 = 0.0;
353: sum2 = 0.0;
354: sum3 = 0.0;
355: for (j=0; j<n; j++) {
356: sum1 += v[jrow]*x[3*idx[jrow]];
357: sum2 += v[jrow]*x[3*idx[jrow]+1];
358: sum3 += v[jrow]*x[3*idx[jrow]+2];
359: jrow++;
360: }
361: y[3*i] += sum1;
362: y[3*i+1] += sum2;
363: y[3*i+2] += sum3;
364: }
366: PetscLogFlops(6*a->nz);
367: VecRestoreArray(xx,&x);
368: VecRestoreArray(zz,&y);
369: return(0);
370: }
371: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
372: {
373: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
374: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
375: Scalar *x,*y,*v,alpha1,alpha2,alpha3;
376: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
379: if (yy != zz) {VecCopy(yy,zz);}
380: VecGetArray(xx,&x);
381: VecGetArray(zz,&y);
382: y = y + shift; /* shift for Fortran start by 1 indexing */
383: for (i=0; i<m; i++) {
384: idx = a->j + a->i[i] + shift;
385: v = a->a + a->i[i] + shift;
386: n = a->i[i+1] - a->i[i];
387: alpha1 = x[3*i];
388: alpha2 = x[3*i+1];
389: alpha3 = x[3*i+2];
390: while (n-->0) {
391: y[3*(*idx)] += alpha1*(*v);
392: y[3*(*idx)+1] += alpha2*(*v);
393: y[3*(*idx)+2] += alpha3*(*v);
394: idx++; v++;
395: }
396: }
397: PetscLogFlops(6*a->nz);
398: VecRestoreArray(xx,&x);
399: VecRestoreArray(zz,&y);
400: return(0);
401: }
403: /* ------------------------------------------------------------------------------*/
404: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
405: {
406: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
407: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
408: Scalar *x,*y,*v,sum1, sum2, sum3, sum4;
409: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
410: int n,i,jrow,j;
413: VecGetArray(xx,&x);
414: VecGetArray(yy,&y);
415: x = x + shift; /* shift for Fortran start by 1 indexing */
416: idx = a->j;
417: v = a->a;
418: ii = a->i;
420: v += shift; /* shift for Fortran start by 1 indexing */
421: idx += shift;
422: for (i=0; i<m; i++) {
423: jrow = ii[i];
424: n = ii[i+1] - jrow;
425: sum1 = 0.0;
426: sum2 = 0.0;
427: sum3 = 0.0;
428: sum4 = 0.0;
429: for (j=0; j<n; j++) {
430: sum1 += v[jrow]*x[4*idx[jrow]];
431: sum2 += v[jrow]*x[4*idx[jrow]+1];
432: sum3 += v[jrow]*x[4*idx[jrow]+2];
433: sum4 += v[jrow]*x[4*idx[jrow]+3];
434: jrow++;
435: }
436: y[4*i] = sum1;
437: y[4*i+1] = sum2;
438: y[4*i+2] = sum3;
439: y[4*i+3] = sum4;
440: }
442: PetscLogFlops(8*a->nz - 4*m);
443: VecRestoreArray(xx,&x);
444: VecRestoreArray(yy,&y);
445: return(0);
446: }
448: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
449: {
450: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
451: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
452: Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
453: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
456: VecSet(&zero,yy);
457: VecGetArray(xx,&x);
458: VecGetArray(yy,&y);
459: y = y + shift; /* shift for Fortran start by 1 indexing */
460: for (i=0; i<m; i++) {
461: idx = a->j + a->i[i] + shift;
462: v = a->a + a->i[i] + shift;
463: n = a->i[i+1] - a->i[i];
464: alpha1 = x[4*i];
465: alpha2 = x[4*i+1];
466: alpha3 = x[4*i+2];
467: alpha4 = x[4*i+3];
468: while (n-->0) {
469: y[4*(*idx)] += alpha1*(*v);
470: y[4*(*idx)+1] += alpha2*(*v);
471: y[4*(*idx)+2] += alpha3*(*v);
472: y[4*(*idx)+3] += alpha4*(*v);
473: idx++; v++;
474: }
475: }
476: PetscLogFlops(8*a->nz - 4*b->AIJ->n);
477: VecRestoreArray(xx,&x);
478: VecRestoreArray(yy,&y);
479: return(0);
480: }
482: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
483: {
484: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
485: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
486: Scalar *x,*y,*v,sum1, sum2, sum3, sum4;
487: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
488: int n,i,jrow,j;
491: if (yy != zz) {VecCopy(yy,zz);}
492: VecGetArray(xx,&x);
493: VecGetArray(zz,&y);
494: x = x + shift; /* shift for Fortran start by 1 indexing */
495: idx = a->j;
496: v = a->a;
497: ii = a->i;
499: v += shift; /* shift for Fortran start by 1 indexing */
500: idx += shift;
501: for (i=0; i<m; i++) {
502: jrow = ii[i];
503: n = ii[i+1] - jrow;
504: sum1 = 0.0;
505: sum2 = 0.0;
506: sum3 = 0.0;
507: sum4 = 0.0;
508: for (j=0; j<n; j++) {
509: sum1 += v[jrow]*x[4*idx[jrow]];
510: sum2 += v[jrow]*x[4*idx[jrow]+1];
511: sum3 += v[jrow]*x[4*idx[jrow]+2];
512: sum4 += v[jrow]*x[4*idx[jrow]+3];
513: jrow++;
514: }
515: y[4*i] += sum1;
516: y[4*i+1] += sum2;
517: y[4*i+2] += sum3;
518: y[4*i+3] += sum4;
519: }
521: PetscLogFlops(8*a->nz - 4*m);
522: VecRestoreArray(xx,&x);
523: VecRestoreArray(zz,&y);
524: return(0);
525: }
526: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
527: {
528: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
529: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
530: Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
531: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
534: if (yy != zz) {VecCopy(yy,zz);}
535: VecGetArray(xx,&x);
536: VecGetArray(zz,&y);
537: y = y + shift; /* shift for Fortran start by 1 indexing */
538: for (i=0; i<m; i++) {
539: idx = a->j + a->i[i] + shift;
540: v = a->a + a->i[i] + shift;
541: n = a->i[i+1] - a->i[i];
542: alpha1 = x[4*i];
543: alpha2 = x[4*i+1];
544: alpha3 = x[4*i+2];
545: alpha4 = x[4*i+3];
546: while (n-->0) {
547: y[4*(*idx)] += alpha1*(*v);
548: y[4*(*idx)+1] += alpha2*(*v);
549: y[4*(*idx)+2] += alpha3*(*v);
550: y[4*(*idx)+3] += alpha4*(*v);
551: idx++; v++;
552: }
553: }
554: PetscLogFlops(8*a->nz - 4*b->AIJ->n);
555: VecRestoreArray(xx,&x);
556: VecRestoreArray(zz,&y);
557: return(0);
558: }
559: /* ------------------------------------------------------------------------------*/
561: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
562: {
563: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
564: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
565: Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
566: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
567: int n,i,jrow,j;
570: VecGetArray(xx,&x);
571: VecGetArray(yy,&y);
572: x = x + shift; /* shift for Fortran start by 1 indexing */
573: idx = a->j;
574: v = a->a;
575: ii = a->i;
577: v += shift; /* shift for Fortran start by 1 indexing */
578: idx += shift;
579: for (i=0; i<m; i++) {
580: jrow = ii[i];
581: n = ii[i+1] - jrow;
582: sum1 = 0.0;
583: sum2 = 0.0;
584: sum3 = 0.0;
585: sum4 = 0.0;
586: sum5 = 0.0;
587: for (j=0; j<n; j++) {
588: sum1 += v[jrow]*x[5*idx[jrow]];
589: sum2 += v[jrow]*x[5*idx[jrow]+1];
590: sum3 += v[jrow]*x[5*idx[jrow]+2];
591: sum4 += v[jrow]*x[5*idx[jrow]+3];
592: sum5 += v[jrow]*x[5*idx[jrow]+4];
593: jrow++;
594: }
595: y[5*i] = sum1;
596: y[5*i+1] = sum2;
597: y[5*i+2] = sum3;
598: y[5*i+3] = sum4;
599: y[5*i+4] = sum5;
600: }
602: PetscLogFlops(10*a->nz - 5*m);
603: VecRestoreArray(xx,&x);
604: VecRestoreArray(yy,&y);
605: return(0);
606: }
608: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
609: {
610: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
611: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
612: Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
613: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
616: VecSet(&zero,yy);
617: VecGetArray(xx,&x);
618: VecGetArray(yy,&y);
619: y = y + shift; /* shift for Fortran start by 1 indexing */
620: for (i=0; i<m; i++) {
621: idx = a->j + a->i[i] + shift;
622: v = a->a + a->i[i] + shift;
623: n = a->i[i+1] - a->i[i];
624: alpha1 = x[5*i];
625: alpha2 = x[5*i+1];
626: alpha3 = x[5*i+2];
627: alpha4 = x[5*i+3];
628: alpha5 = x[5*i+4];
629: while (n-->0) {
630: y[5*(*idx)] += alpha1*(*v);
631: y[5*(*idx)+1] += alpha2*(*v);
632: y[5*(*idx)+2] += alpha3*(*v);
633: y[5*(*idx)+3] += alpha4*(*v);
634: y[5*(*idx)+4] += alpha5*(*v);
635: idx++; v++;
636: }
637: }
638: PetscLogFlops(10*a->nz - 5*b->AIJ->n);
639: VecRestoreArray(xx,&x);
640: VecRestoreArray(yy,&y);
641: return(0);
642: }
644: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
645: {
646: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
647: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
648: Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
649: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
650: int n,i,jrow,j;
653: if (yy != zz) {VecCopy(yy,zz);}
654: VecGetArray(xx,&x);
655: VecGetArray(zz,&y);
656: x = x + shift; /* shift for Fortran start by 1 indexing */
657: idx = a->j;
658: v = a->a;
659: ii = a->i;
661: v += shift; /* shift for Fortran start by 1 indexing */
662: idx += shift;
663: for (i=0; i<m; i++) {
664: jrow = ii[i];
665: n = ii[i+1] - jrow;
666: sum1 = 0.0;
667: sum2 = 0.0;
668: sum3 = 0.0;
669: sum4 = 0.0;
670: sum5 = 0.0;
671: for (j=0; j<n; j++) {
672: sum1 += v[jrow]*x[5*idx[jrow]];
673: sum2 += v[jrow]*x[5*idx[jrow]+1];
674: sum3 += v[jrow]*x[5*idx[jrow]+2];
675: sum4 += v[jrow]*x[5*idx[jrow]+3];
676: sum5 += v[jrow]*x[5*idx[jrow]+4];
677: jrow++;
678: }
679: y[5*i] += sum1;
680: y[5*i+1] += sum2;
681: y[5*i+2] += sum3;
682: y[5*i+3] += sum4;
683: y[5*i+4] += sum5;
684: }
686: PetscLogFlops(10*a->nz);
687: VecRestoreArray(xx,&x);
688: VecRestoreArray(zz,&y);
689: return(0);
690: }
692: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
693: {
694: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
695: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
696: Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
697: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
700: if (yy != zz) {VecCopy(yy,zz);}
701: VecGetArray(xx,&x);
702: VecGetArray(zz,&y);
703: y = y + shift; /* shift for Fortran start by 1 indexing */
704: for (i=0; i<m; i++) {
705: idx = a->j + a->i[i] + shift;
706: v = a->a + a->i[i] + shift;
707: n = a->i[i+1] - a->i[i];
708: alpha1 = x[5*i];
709: alpha2 = x[5*i+1];
710: alpha3 = x[5*i+2];
711: alpha4 = x[5*i+3];
712: alpha5 = x[5*i+4];
713: while (n-->0) {
714: y[5*(*idx)] += alpha1*(*v);
715: y[5*(*idx)+1] += alpha2*(*v);
716: y[5*(*idx)+2] += alpha3*(*v);
717: y[5*(*idx)+3] += alpha4*(*v);
718: y[5*(*idx)+4] += alpha5*(*v);
719: idx++; v++;
720: }
721: }
722: PetscLogFlops(10*a->nz);
723: VecRestoreArray(xx,&x);
724: VecRestoreArray(zz,&y);
725: return(0);
726: }
728: /* ------------------------------------------------------------------------------*/
729: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
730: {
731: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
732: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
733: Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
734: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
735: int n,i,jrow,j;
738: VecGetArray(xx,&x);
739: VecGetArray(yy,&y);
740: x = x + shift; /* shift for Fortran start by 1 indexing */
741: idx = a->j;
742: v = a->a;
743: ii = a->i;
745: v += shift; /* shift for Fortran start by 1 indexing */
746: idx += shift;
747: for (i=0; i<m; i++) {
748: jrow = ii[i];
749: n = ii[i+1] - jrow;
750: sum1 = 0.0;
751: sum2 = 0.0;
752: sum3 = 0.0;
753: sum4 = 0.0;
754: sum5 = 0.0;
755: sum6 = 0.0;
756: for (j=0; j<n; j++) {
757: sum1 += v[jrow]*x[6*idx[jrow]];
758: sum2 += v[jrow]*x[6*idx[jrow]+1];
759: sum3 += v[jrow]*x[6*idx[jrow]+2];
760: sum4 += v[jrow]*x[6*idx[jrow]+3];
761: sum5 += v[jrow]*x[6*idx[jrow]+4];
762: sum6 += v[jrow]*x[6*idx[jrow]+5];
763: jrow++;
764: }
765: y[6*i] = sum1;
766: y[6*i+1] = sum2;
767: y[6*i+2] = sum3;
768: y[6*i+3] = sum4;
769: y[6*i+4] = sum5;
770: y[6*i+5] = sum6;
771: }
773: PetscLogFlops(12*a->nz - 6*m);
774: VecRestoreArray(xx,&x);
775: VecRestoreArray(yy,&y);
776: return(0);
777: }
779: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
780: {
781: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
782: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
783: Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
784: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
787: VecSet(&zero,yy);
788: VecGetArray(xx,&x);
789: VecGetArray(yy,&y);
790: y = y + shift; /* shift for Fortran start by 1 indexing */
791: for (i=0; i<m; i++) {
792: idx = a->j + a->i[i] + shift;
793: v = a->a + a->i[i] + shift;
794: n = a->i[i+1] - a->i[i];
795: alpha1 = x[6*i];
796: alpha2 = x[6*i+1];
797: alpha3 = x[6*i+2];
798: alpha4 = x[6*i+3];
799: alpha5 = x[6*i+4];
800: alpha6 = x[6*i+5];
801: while (n-->0) {
802: y[6*(*idx)] += alpha1*(*v);
803: y[6*(*idx)+1] += alpha2*(*v);
804: y[6*(*idx)+2] += alpha3*(*v);
805: y[6*(*idx)+3] += alpha4*(*v);
806: y[6*(*idx)+4] += alpha5*(*v);
807: y[6*(*idx)+5] += alpha6*(*v);
808: idx++; v++;
809: }
810: }
811: PetscLogFlops(12*a->nz - 6*b->AIJ->n);
812: VecRestoreArray(xx,&x);
813: VecRestoreArray(yy,&y);
814: return(0);
815: }
817: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
818: {
819: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
820: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
821: Scalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
822: int ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
823: int n,i,jrow,j;
826: if (yy != zz) {VecCopy(yy,zz);}
827: VecGetArray(xx,&x);
828: VecGetArray(zz,&y);
829: x = x + shift; /* shift for Fortran start by 1 indexing */
830: idx = a->j;
831: v = a->a;
832: ii = a->i;
834: v += shift; /* shift for Fortran start by 1 indexing */
835: idx += shift;
836: for (i=0; i<m; i++) {
837: jrow = ii[i];
838: n = ii[i+1] - jrow;
839: sum1 = 0.0;
840: sum2 = 0.0;
841: sum3 = 0.0;
842: sum4 = 0.0;
843: sum5 = 0.0;
844: sum6 = 0.0;
845: for (j=0; j<n; j++) {
846: sum1 += v[jrow]*x[6*idx[jrow]];
847: sum2 += v[jrow]*x[6*idx[jrow]+1];
848: sum3 += v[jrow]*x[6*idx[jrow]+2];
849: sum4 += v[jrow]*x[6*idx[jrow]+3];
850: sum5 += v[jrow]*x[6*idx[jrow]+4];
851: sum6 += v[jrow]*x[6*idx[jrow]+5];
852: jrow++;
853: }
854: y[6*i] += sum1;
855: y[6*i+1] += sum2;
856: y[6*i+2] += sum3;
857: y[6*i+3] += sum4;
858: y[6*i+4] += sum5;
859: y[6*i+5] += sum6;
860: }
862: PetscLogFlops(12*a->nz);
863: VecRestoreArray(xx,&x);
864: VecRestoreArray(zz,&y);
865: return(0);
866: }
868: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
869: {
870: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
871: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
872: Scalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
873: int ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;
876: if (yy != zz) {VecCopy(yy,zz);}
877: VecGetArray(xx,&x);
878: VecGetArray(zz,&y);
879: y = y + shift; /* shift for Fortran start by 1 indexing */
880: for (i=0; i<m; i++) {
881: idx = a->j + a->i[i] + shift;
882: v = a->a + a->i[i] + shift;
883: n = a->i[i+1] - a->i[i];
884: alpha1 = x[6*i];
885: alpha2 = x[6*i+1];
886: alpha3 = x[6*i+2];
887: alpha4 = x[6*i+3];
888: alpha5 = x[6*i+4];
889: alpha6 = x[6*i+5];
890: while (n-->0) {
891: y[6*(*idx)] += alpha1*(*v);
892: y[6*(*idx)+1] += alpha2*(*v);
893: y[6*(*idx)+2] += alpha3*(*v);
894: y[6*(*idx)+3] += alpha4*(*v);
895: y[6*(*idx)+4] += alpha5*(*v);
896: y[6*(*idx)+5] += alpha6*(*v);
897: idx++; v++;
898: }
899: }
900: PetscLogFlops(12*a->nz);
901: VecRestoreArray(xx,&x);
902: VecRestoreArray(zz,&y);
903: return(0);
904: }
906: /*===================================================================================*/
907: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
908: {
909: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
910: int ierr;
913: /* start the scatter */
914: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
915: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
916: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
917: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
918: return(0);
919: }
921: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
922: {
923: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
924: int ierr;
926: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
927: VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
928: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
929: VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
930: return(0);
931: }
933: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
934: {
935: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
936: int ierr;
939: /* start the scatter */
940: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
941: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
942: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
943: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
944: return(0);
945: }
947: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
948: {
949: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
950: int ierr;
952: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
953: VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
954: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
955: VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
956: return(0);
957: }
959: /* ---------------------------------------------------------------------------------- */
960: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
961: {
962: int ierr,size,n;
963: Mat_MPIMAIJ *b;
964: Mat B;
967: ierr = PetscObjectReference((PetscObject)A);
969: if (dof == 1) {
970: *maij = A;
971: } else {
972: MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
973: B->assembled = PETSC_TRUE;
975: MPI_Comm_size(A->comm,&size);
976: if (size == 1) {
977: MatSetType(B,MATSEQMAIJ);
978: B->ops->destroy = MatDestroy_SeqMAIJ;
979: b = (Mat_MPIMAIJ*)B->data;
980: b->dof = dof;
981: b->AIJ = A;
982: if (dof == 2) {
983: B->ops->mult = MatMult_SeqMAIJ_2;
984: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
985: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
986: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
987: } else if (dof == 3) {
988: B->ops->mult = MatMult_SeqMAIJ_3;
989: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
990: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
991: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
992: } else if (dof == 4) {
993: B->ops->mult = MatMult_SeqMAIJ_4;
994: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
995: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
996: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
997: } else if (dof == 5) {
998: B->ops->mult = MatMult_SeqMAIJ_5;
999: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
1000: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
1001: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1002: } else if (dof == 6) {
1003: B->ops->mult = MatMult_SeqMAIJ_6;
1004: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
1005: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
1006: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1007: } else {
1008: SETERRQ1(1,"Cannot handle a dof of %dn",dof);
1009: }
1010: } else {
1011: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1012: IS from,to;
1013: Vec gvec;
1014: int *garray,i;
1016: MatSetType(B,MATMPIMAIJ);
1017: B->ops->destroy = MatDestroy_MPIMAIJ;
1018: b = (Mat_MPIMAIJ*)B->data;
1019: b->dof = dof;
1020: b->A = A;
1021: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1022: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
1024: VecGetSize(mpiaij->lvec,&n);
1025: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
1027: /* create two temporary Index sets for build scatter gather */
1028: PetscMalloc((n+1)*sizeof(int),&garray);
1029: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1030: ISCreateBlock(A->comm,dof,n,garray,&from);
1031: PetscFree(garray);
1032: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
1034: /* create temporary global vector to generate scatter context */
1035: VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);
1037: /* generate the scatter context */
1038: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
1040: ISDestroy(from);
1041: ISDestroy(to);
1042: VecDestroy(gvec);
1044: B->ops->mult = MatMult_MPIMAIJ_dof;
1045: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
1046: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
1047: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1048: }
1049: *maij = B;
1050: }
1051: return(0);
1052: }