Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include src/mat/impls/maij/maij.h
21: #include src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: PetscObjectChangeTypeName((PetscObject)A,0);
129: return(0);
130: }
132: /*MC
133: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134: multicomponent problems, interpolating or restricting each component the same way independently.
135: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
137: Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd
143: Level: advanced
145: .seealso: MatCreateSeqDense
146: M*/
151: PetscErrorCode MatCreate_MAIJ(Mat A)
152: {
154: Mat_MPIMAIJ *b;
155: PetscMPIInt size;
158: PetscNewLog(A,Mat_MPIMAIJ,&b);
159: A->data = (void*)b;
160: PetscMemzero(A->ops,sizeof(struct _MatOps));
161: A->factor = 0;
162: A->mapping = 0;
164: b->AIJ = 0;
165: b->dof = 0;
166: b->OAIJ = 0;
167: b->ctx = 0;
168: b->w = 0;
169: MPI_Comm_size(((PetscObject)A)->comm,&size);
170: if (size == 1){
171: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
172: } else {
173: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
174: }
175: return(0);
176: }
179: /* --------------------------------------------------------------------------------------*/
182: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
185: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
186: PetscScalar *x,*y,*v,sum1, sum2;
188: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
189: PetscInt n,i,jrow,j;
192: VecGetArray(xx,&x);
193: VecGetArray(yy,&y);
194: idx = a->j;
195: v = a->a;
196: ii = a->i;
198: for (i=0; i<m; i++) {
199: jrow = ii[i];
200: n = ii[i+1] - jrow;
201: sum1 = 0.0;
202: sum2 = 0.0;
203: nonzerorow += (n>0);
204: for (j=0; j<n; j++) {
205: sum1 += v[jrow]*x[2*idx[jrow]];
206: sum2 += v[jrow]*x[2*idx[jrow]+1];
207: jrow++;
208: }
209: y[2*i] = sum1;
210: y[2*i+1] = sum2;
211: }
213: PetscLogFlops(4*a->nz - 2*nonzerorow);
214: VecRestoreArray(xx,&x);
215: VecRestoreArray(yy,&y);
216: return(0);
217: }
221: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
222: {
223: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
224: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
225: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
227: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
230: VecSet(yy,zero);
231: VecGetArray(xx,&x);
232: VecGetArray(yy,&y);
233:
234: for (i=0; i<m; i++) {
235: idx = a->j + a->i[i] ;
236: v = a->a + a->i[i] ;
237: n = a->i[i+1] - a->i[i];
238: alpha1 = x[2*i];
239: alpha2 = x[2*i+1];
240: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
241: }
242: PetscLogFlops(4*a->nz);
243: VecRestoreArray(xx,&x);
244: VecRestoreArray(yy,&y);
245: return(0);
246: }
250: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
251: {
252: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
253: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
254: PetscScalar *x,*y,*v,sum1, sum2;
256: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
257: PetscInt n,i,jrow,j;
260: if (yy != zz) {VecCopy(yy,zz);}
261: VecGetArray(xx,&x);
262: VecGetArray(zz,&y);
263: idx = a->j;
264: v = a->a;
265: ii = a->i;
267: for (i=0; i<m; i++) {
268: jrow = ii[i];
269: n = ii[i+1] - jrow;
270: sum1 = 0.0;
271: sum2 = 0.0;
272: for (j=0; j<n; j++) {
273: sum1 += v[jrow]*x[2*idx[jrow]];
274: sum2 += v[jrow]*x[2*idx[jrow]+1];
275: jrow++;
276: }
277: y[2*i] += sum1;
278: y[2*i+1] += sum2;
279: }
281: PetscLogFlops(4*a->nz);
282: VecRestoreArray(xx,&x);
283: VecRestoreArray(zz,&y);
284: return(0);
285: }
288: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
289: {
290: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
291: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
292: PetscScalar *x,*y,*v,alpha1,alpha2;
294: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
297: if (yy != zz) {VecCopy(yy,zz);}
298: VecGetArray(xx,&x);
299: VecGetArray(zz,&y);
300:
301: for (i=0; i<m; i++) {
302: idx = a->j + a->i[i] ;
303: v = a->a + a->i[i] ;
304: n = a->i[i+1] - a->i[i];
305: alpha1 = x[2*i];
306: alpha2 = x[2*i+1];
307: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
308: }
309: PetscLogFlops(4*a->nz);
310: VecRestoreArray(xx,&x);
311: VecRestoreArray(zz,&y);
312: return(0);
313: }
314: /* --------------------------------------------------------------------------------------*/
317: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
318: {
319: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
320: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
321: PetscScalar *x,*y,*v,sum1, sum2, sum3;
323: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
324: PetscInt n,i,jrow,j;
327: VecGetArray(xx,&x);
328: VecGetArray(yy,&y);
329: idx = a->j;
330: v = a->a;
331: ii = a->i;
333: for (i=0; i<m; i++) {
334: jrow = ii[i];
335: n = ii[i+1] - jrow;
336: sum1 = 0.0;
337: sum2 = 0.0;
338: sum3 = 0.0;
339: nonzerorow += (n>0);
340: for (j=0; j<n; j++) {
341: sum1 += v[jrow]*x[3*idx[jrow]];
342: sum2 += v[jrow]*x[3*idx[jrow]+1];
343: sum3 += v[jrow]*x[3*idx[jrow]+2];
344: jrow++;
345: }
346: y[3*i] = sum1;
347: y[3*i+1] = sum2;
348: y[3*i+2] = sum3;
349: }
351: PetscLogFlops(6*a->nz - 3*nonzerorow);
352: VecRestoreArray(xx,&x);
353: VecRestoreArray(yy,&y);
354: return(0);
355: }
359: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
360: {
361: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
362: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
363: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
365: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
368: VecSet(yy,zero);
369: VecGetArray(xx,&x);
370: VecGetArray(yy,&y);
371:
372: for (i=0; i<m; i++) {
373: idx = a->j + a->i[i];
374: v = a->a + a->i[i];
375: n = a->i[i+1] - a->i[i];
376: alpha1 = x[3*i];
377: alpha2 = x[3*i+1];
378: alpha3 = x[3*i+2];
379: while (n-->0) {
380: y[3*(*idx)] += alpha1*(*v);
381: y[3*(*idx)+1] += alpha2*(*v);
382: y[3*(*idx)+2] += alpha3*(*v);
383: idx++; v++;
384: }
385: }
386: PetscLogFlops(6*a->nz);
387: VecRestoreArray(xx,&x);
388: VecRestoreArray(yy,&y);
389: return(0);
390: }
394: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
395: {
396: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
397: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
398: PetscScalar *x,*y,*v,sum1, sum2, sum3;
400: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
401: PetscInt n,i,jrow,j;
404: if (yy != zz) {VecCopy(yy,zz);}
405: VecGetArray(xx,&x);
406: VecGetArray(zz,&y);
407: idx = a->j;
408: v = a->a;
409: ii = a->i;
411: for (i=0; i<m; i++) {
412: jrow = ii[i];
413: n = ii[i+1] - jrow;
414: sum1 = 0.0;
415: sum2 = 0.0;
416: sum3 = 0.0;
417: for (j=0; j<n; j++) {
418: sum1 += v[jrow]*x[3*idx[jrow]];
419: sum2 += v[jrow]*x[3*idx[jrow]+1];
420: sum3 += v[jrow]*x[3*idx[jrow]+2];
421: jrow++;
422: }
423: y[3*i] += sum1;
424: y[3*i+1] += sum2;
425: y[3*i+2] += sum3;
426: }
428: PetscLogFlops(6*a->nz);
429: VecRestoreArray(xx,&x);
430: VecRestoreArray(zz,&y);
431: return(0);
432: }
435: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436: {
437: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
438: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
439: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
441: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
444: if (yy != zz) {VecCopy(yy,zz);}
445: VecGetArray(xx,&x);
446: VecGetArray(zz,&y);
447: for (i=0; i<m; i++) {
448: idx = a->j + a->i[i] ;
449: v = a->a + a->i[i] ;
450: n = a->i[i+1] - a->i[i];
451: alpha1 = x[3*i];
452: alpha2 = x[3*i+1];
453: alpha3 = x[3*i+2];
454: while (n-->0) {
455: y[3*(*idx)] += alpha1*(*v);
456: y[3*(*idx)+1] += alpha2*(*v);
457: y[3*(*idx)+2] += alpha3*(*v);
458: idx++; v++;
459: }
460: }
461: PetscLogFlops(6*a->nz);
462: VecRestoreArray(xx,&x);
463: VecRestoreArray(zz,&y);
464: return(0);
465: }
467: /* ------------------------------------------------------------------------------*/
470: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
471: {
472: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
473: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
474: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
476: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
477: PetscInt n,i,jrow,j;
480: VecGetArray(xx,&x);
481: VecGetArray(yy,&y);
482: idx = a->j;
483: v = a->a;
484: ii = a->i;
486: for (i=0; i<m; i++) {
487: jrow = ii[i];
488: n = ii[i+1] - jrow;
489: sum1 = 0.0;
490: sum2 = 0.0;
491: sum3 = 0.0;
492: sum4 = 0.0;
493: nonzerorow += (n>0);
494: for (j=0; j<n; j++) {
495: sum1 += v[jrow]*x[4*idx[jrow]];
496: sum2 += v[jrow]*x[4*idx[jrow]+1];
497: sum3 += v[jrow]*x[4*idx[jrow]+2];
498: sum4 += v[jrow]*x[4*idx[jrow]+3];
499: jrow++;
500: }
501: y[4*i] = sum1;
502: y[4*i+1] = sum2;
503: y[4*i+2] = sum3;
504: y[4*i+3] = sum4;
505: }
507: PetscLogFlops(8*a->nz - 4*nonzerorow);
508: VecRestoreArray(xx,&x);
509: VecRestoreArray(yy,&y);
510: return(0);
511: }
515: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
516: {
517: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
518: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
519: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
521: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
524: VecSet(yy,zero);
525: VecGetArray(xx,&x);
526: VecGetArray(yy,&y);
527: for (i=0; i<m; i++) {
528: idx = a->j + a->i[i] ;
529: v = a->a + a->i[i] ;
530: n = a->i[i+1] - a->i[i];
531: alpha1 = x[4*i];
532: alpha2 = x[4*i+1];
533: alpha3 = x[4*i+2];
534: alpha4 = x[4*i+3];
535: while (n-->0) {
536: y[4*(*idx)] += alpha1*(*v);
537: y[4*(*idx)+1] += alpha2*(*v);
538: y[4*(*idx)+2] += alpha3*(*v);
539: y[4*(*idx)+3] += alpha4*(*v);
540: idx++; v++;
541: }
542: }
543: PetscLogFlops(8*a->nz);
544: VecRestoreArray(xx,&x);
545: VecRestoreArray(yy,&y);
546: return(0);
547: }
551: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
552: {
553: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
554: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
555: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
557: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
558: PetscInt n,i,jrow,j;
561: if (yy != zz) {VecCopy(yy,zz);}
562: VecGetArray(xx,&x);
563: VecGetArray(zz,&y);
564: idx = a->j;
565: v = a->a;
566: ii = a->i;
568: for (i=0; i<m; i++) {
569: jrow = ii[i];
570: n = ii[i+1] - jrow;
571: sum1 = 0.0;
572: sum2 = 0.0;
573: sum3 = 0.0;
574: sum4 = 0.0;
575: for (j=0; j<n; j++) {
576: sum1 += v[jrow]*x[4*idx[jrow]];
577: sum2 += v[jrow]*x[4*idx[jrow]+1];
578: sum3 += v[jrow]*x[4*idx[jrow]+2];
579: sum4 += v[jrow]*x[4*idx[jrow]+3];
580: jrow++;
581: }
582: y[4*i] += sum1;
583: y[4*i+1] += sum2;
584: y[4*i+2] += sum3;
585: y[4*i+3] += sum4;
586: }
588: PetscLogFlops(8*a->nz);
589: VecRestoreArray(xx,&x);
590: VecRestoreArray(zz,&y);
591: return(0);
592: }
595: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596: {
597: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
598: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
599: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
601: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
604: if (yy != zz) {VecCopy(yy,zz);}
605: VecGetArray(xx,&x);
606: VecGetArray(zz,&y);
607:
608: for (i=0; i<m; i++) {
609: idx = a->j + a->i[i] ;
610: v = a->a + a->i[i] ;
611: n = a->i[i+1] - a->i[i];
612: alpha1 = x[4*i];
613: alpha2 = x[4*i+1];
614: alpha3 = x[4*i+2];
615: alpha4 = x[4*i+3];
616: while (n-->0) {
617: y[4*(*idx)] += alpha1*(*v);
618: y[4*(*idx)+1] += alpha2*(*v);
619: y[4*(*idx)+2] += alpha3*(*v);
620: y[4*(*idx)+3] += alpha4*(*v);
621: idx++; v++;
622: }
623: }
624: PetscLogFlops(8*a->nz);
625: VecRestoreArray(xx,&x);
626: VecRestoreArray(zz,&y);
627: return(0);
628: }
629: /* ------------------------------------------------------------------------------*/
633: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634: {
635: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
636: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
637: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
639: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
640: PetscInt n,i,jrow,j;
643: VecGetArray(xx,&x);
644: VecGetArray(yy,&y);
645: idx = a->j;
646: v = a->a;
647: ii = a->i;
649: for (i=0; i<m; i++) {
650: jrow = ii[i];
651: n = ii[i+1] - jrow;
652: sum1 = 0.0;
653: sum2 = 0.0;
654: sum3 = 0.0;
655: sum4 = 0.0;
656: sum5 = 0.0;
657: nonzerorow += (n>0);
658: for (j=0; j<n; j++) {
659: sum1 += v[jrow]*x[5*idx[jrow]];
660: sum2 += v[jrow]*x[5*idx[jrow]+1];
661: sum3 += v[jrow]*x[5*idx[jrow]+2];
662: sum4 += v[jrow]*x[5*idx[jrow]+3];
663: sum5 += v[jrow]*x[5*idx[jrow]+4];
664: jrow++;
665: }
666: y[5*i] = sum1;
667: y[5*i+1] = sum2;
668: y[5*i+2] = sum3;
669: y[5*i+3] = sum4;
670: y[5*i+4] = sum5;
671: }
673: PetscLogFlops(10*a->nz - 5*nonzerorow);
674: VecRestoreArray(xx,&x);
675: VecRestoreArray(yy,&y);
676: return(0);
677: }
681: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
682: {
683: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
684: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
685: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
687: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
690: VecSet(yy,zero);
691: VecGetArray(xx,&x);
692: VecGetArray(yy,&y);
693:
694: for (i=0; i<m; i++) {
695: idx = a->j + a->i[i] ;
696: v = a->a + a->i[i] ;
697: n = a->i[i+1] - a->i[i];
698: alpha1 = x[5*i];
699: alpha2 = x[5*i+1];
700: alpha3 = x[5*i+2];
701: alpha4 = x[5*i+3];
702: alpha5 = x[5*i+4];
703: while (n-->0) {
704: y[5*(*idx)] += alpha1*(*v);
705: y[5*(*idx)+1] += alpha2*(*v);
706: y[5*(*idx)+2] += alpha3*(*v);
707: y[5*(*idx)+3] += alpha4*(*v);
708: y[5*(*idx)+4] += alpha5*(*v);
709: idx++; v++;
710: }
711: }
712: PetscLogFlops(10*a->nz);
713: VecRestoreArray(xx,&x);
714: VecRestoreArray(yy,&y);
715: return(0);
716: }
720: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
721: {
722: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
723: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
724: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
726: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
727: PetscInt n,i,jrow,j;
730: if (yy != zz) {VecCopy(yy,zz);}
731: VecGetArray(xx,&x);
732: VecGetArray(zz,&y);
733: idx = a->j;
734: v = a->a;
735: ii = a->i;
737: for (i=0; i<m; i++) {
738: jrow = ii[i];
739: n = ii[i+1] - jrow;
740: sum1 = 0.0;
741: sum2 = 0.0;
742: sum3 = 0.0;
743: sum4 = 0.0;
744: sum5 = 0.0;
745: for (j=0; j<n; j++) {
746: sum1 += v[jrow]*x[5*idx[jrow]];
747: sum2 += v[jrow]*x[5*idx[jrow]+1];
748: sum3 += v[jrow]*x[5*idx[jrow]+2];
749: sum4 += v[jrow]*x[5*idx[jrow]+3];
750: sum5 += v[jrow]*x[5*idx[jrow]+4];
751: jrow++;
752: }
753: y[5*i] += sum1;
754: y[5*i+1] += sum2;
755: y[5*i+2] += sum3;
756: y[5*i+3] += sum4;
757: y[5*i+4] += sum5;
758: }
760: PetscLogFlops(10*a->nz);
761: VecRestoreArray(xx,&x);
762: VecRestoreArray(zz,&y);
763: return(0);
764: }
768: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
769: {
770: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
771: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
772: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
774: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
777: if (yy != zz) {VecCopy(yy,zz);}
778: VecGetArray(xx,&x);
779: VecGetArray(zz,&y);
780:
781: for (i=0; i<m; i++) {
782: idx = a->j + a->i[i] ;
783: v = a->a + a->i[i] ;
784: n = a->i[i+1] - a->i[i];
785: alpha1 = x[5*i];
786: alpha2 = x[5*i+1];
787: alpha3 = x[5*i+2];
788: alpha4 = x[5*i+3];
789: alpha5 = x[5*i+4];
790: while (n-->0) {
791: y[5*(*idx)] += alpha1*(*v);
792: y[5*(*idx)+1] += alpha2*(*v);
793: y[5*(*idx)+2] += alpha3*(*v);
794: y[5*(*idx)+3] += alpha4*(*v);
795: y[5*(*idx)+4] += alpha5*(*v);
796: idx++; v++;
797: }
798: }
799: PetscLogFlops(10*a->nz);
800: VecRestoreArray(xx,&x);
801: VecRestoreArray(zz,&y);
802: return(0);
803: }
805: /* ------------------------------------------------------------------------------*/
808: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
809: {
810: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
811: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
812: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
814: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
815: PetscInt n,i,jrow,j;
818: VecGetArray(xx,&x);
819: VecGetArray(yy,&y);
820: idx = a->j;
821: v = a->a;
822: ii = a->i;
824: for (i=0; i<m; i++) {
825: jrow = ii[i];
826: n = ii[i+1] - jrow;
827: sum1 = 0.0;
828: sum2 = 0.0;
829: sum3 = 0.0;
830: sum4 = 0.0;
831: sum5 = 0.0;
832: sum6 = 0.0;
833: nonzerorow += (n>0);
834: for (j=0; j<n; j++) {
835: sum1 += v[jrow]*x[6*idx[jrow]];
836: sum2 += v[jrow]*x[6*idx[jrow]+1];
837: sum3 += v[jrow]*x[6*idx[jrow]+2];
838: sum4 += v[jrow]*x[6*idx[jrow]+3];
839: sum5 += v[jrow]*x[6*idx[jrow]+4];
840: sum6 += v[jrow]*x[6*idx[jrow]+5];
841: jrow++;
842: }
843: y[6*i] = sum1;
844: y[6*i+1] = sum2;
845: y[6*i+2] = sum3;
846: y[6*i+3] = sum4;
847: y[6*i+4] = sum5;
848: y[6*i+5] = sum6;
849: }
851: PetscLogFlops(12*a->nz - 6*nonzerorow);
852: VecRestoreArray(xx,&x);
853: VecRestoreArray(yy,&y);
854: return(0);
855: }
859: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
860: {
861: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
862: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
863: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
865: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
868: VecSet(yy,zero);
869: VecGetArray(xx,&x);
870: VecGetArray(yy,&y);
872: for (i=0; i<m; i++) {
873: idx = a->j + a->i[i] ;
874: v = a->a + a->i[i] ;
875: n = a->i[i+1] - a->i[i];
876: alpha1 = x[6*i];
877: alpha2 = x[6*i+1];
878: alpha3 = x[6*i+2];
879: alpha4 = x[6*i+3];
880: alpha5 = x[6*i+4];
881: alpha6 = x[6*i+5];
882: while (n-->0) {
883: y[6*(*idx)] += alpha1*(*v);
884: y[6*(*idx)+1] += alpha2*(*v);
885: y[6*(*idx)+2] += alpha3*(*v);
886: y[6*(*idx)+3] += alpha4*(*v);
887: y[6*(*idx)+4] += alpha5*(*v);
888: y[6*(*idx)+5] += alpha6*(*v);
889: idx++; v++;
890: }
891: }
892: PetscLogFlops(12*a->nz);
893: VecRestoreArray(xx,&x);
894: VecRestoreArray(yy,&y);
895: return(0);
896: }
900: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
901: {
902: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
903: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
904: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
906: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
907: PetscInt n,i,jrow,j;
910: if (yy != zz) {VecCopy(yy,zz);}
911: VecGetArray(xx,&x);
912: VecGetArray(zz,&y);
913: idx = a->j;
914: v = a->a;
915: ii = a->i;
917: for (i=0; i<m; i++) {
918: jrow = ii[i];
919: n = ii[i+1] - jrow;
920: sum1 = 0.0;
921: sum2 = 0.0;
922: sum3 = 0.0;
923: sum4 = 0.0;
924: sum5 = 0.0;
925: sum6 = 0.0;
926: for (j=0; j<n; j++) {
927: sum1 += v[jrow]*x[6*idx[jrow]];
928: sum2 += v[jrow]*x[6*idx[jrow]+1];
929: sum3 += v[jrow]*x[6*idx[jrow]+2];
930: sum4 += v[jrow]*x[6*idx[jrow]+3];
931: sum5 += v[jrow]*x[6*idx[jrow]+4];
932: sum6 += v[jrow]*x[6*idx[jrow]+5];
933: jrow++;
934: }
935: y[6*i] += sum1;
936: y[6*i+1] += sum2;
937: y[6*i+2] += sum3;
938: y[6*i+3] += sum4;
939: y[6*i+4] += sum5;
940: y[6*i+5] += sum6;
941: }
943: PetscLogFlops(12*a->nz);
944: VecRestoreArray(xx,&x);
945: VecRestoreArray(zz,&y);
946: return(0);
947: }
951: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
952: {
953: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
954: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
955: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
957: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
960: if (yy != zz) {VecCopy(yy,zz);}
961: VecGetArray(xx,&x);
962: VecGetArray(zz,&y);
963:
964: for (i=0; i<m; i++) {
965: idx = a->j + a->i[i] ;
966: v = a->a + a->i[i] ;
967: n = a->i[i+1] - a->i[i];
968: alpha1 = x[6*i];
969: alpha2 = x[6*i+1];
970: alpha3 = x[6*i+2];
971: alpha4 = x[6*i+3];
972: alpha5 = x[6*i+4];
973: alpha6 = x[6*i+5];
974: while (n-->0) {
975: y[6*(*idx)] += alpha1*(*v);
976: y[6*(*idx)+1] += alpha2*(*v);
977: y[6*(*idx)+2] += alpha3*(*v);
978: y[6*(*idx)+3] += alpha4*(*v);
979: y[6*(*idx)+4] += alpha5*(*v);
980: y[6*(*idx)+5] += alpha6*(*v);
981: idx++; v++;
982: }
983: }
984: PetscLogFlops(12*a->nz);
985: VecRestoreArray(xx,&x);
986: VecRestoreArray(zz,&y);
987: return(0);
988: }
990: /* ------------------------------------------------------------------------------*/
993: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
994: {
995: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
996: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
997: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
999: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1000: PetscInt n,i,jrow,j;
1003: VecGetArray(xx,&x);
1004: VecGetArray(yy,&y);
1005: idx = a->j;
1006: v = a->a;
1007: ii = a->i;
1009: for (i=0; i<m; i++) {
1010: jrow = ii[i];
1011: n = ii[i+1] - jrow;
1012: sum1 = 0.0;
1013: sum2 = 0.0;
1014: sum3 = 0.0;
1015: sum4 = 0.0;
1016: sum5 = 0.0;
1017: sum6 = 0.0;
1018: sum7 = 0.0;
1019: nonzerorow += (n>0);
1020: for (j=0; j<n; j++) {
1021: sum1 += v[jrow]*x[7*idx[jrow]];
1022: sum2 += v[jrow]*x[7*idx[jrow]+1];
1023: sum3 += v[jrow]*x[7*idx[jrow]+2];
1024: sum4 += v[jrow]*x[7*idx[jrow]+3];
1025: sum5 += v[jrow]*x[7*idx[jrow]+4];
1026: sum6 += v[jrow]*x[7*idx[jrow]+5];
1027: sum7 += v[jrow]*x[7*idx[jrow]+6];
1028: jrow++;
1029: }
1030: y[7*i] = sum1;
1031: y[7*i+1] = sum2;
1032: y[7*i+2] = sum3;
1033: y[7*i+3] = sum4;
1034: y[7*i+4] = sum5;
1035: y[7*i+5] = sum6;
1036: y[7*i+6] = sum7;
1037: }
1039: PetscLogFlops(14*a->nz - 7*nonzerorow);
1040: VecRestoreArray(xx,&x);
1041: VecRestoreArray(yy,&y);
1042: return(0);
1043: }
1047: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1048: {
1049: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1050: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1051: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1053: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1056: VecSet(yy,zero);
1057: VecGetArray(xx,&x);
1058: VecGetArray(yy,&y);
1060: for (i=0; i<m; i++) {
1061: idx = a->j + a->i[i] ;
1062: v = a->a + a->i[i] ;
1063: n = a->i[i+1] - a->i[i];
1064: alpha1 = x[7*i];
1065: alpha2 = x[7*i+1];
1066: alpha3 = x[7*i+2];
1067: alpha4 = x[7*i+3];
1068: alpha5 = x[7*i+4];
1069: alpha6 = x[7*i+5];
1070: alpha7 = x[7*i+6];
1071: while (n-->0) {
1072: y[7*(*idx)] += alpha1*(*v);
1073: y[7*(*idx)+1] += alpha2*(*v);
1074: y[7*(*idx)+2] += alpha3*(*v);
1075: y[7*(*idx)+3] += alpha4*(*v);
1076: y[7*(*idx)+4] += alpha5*(*v);
1077: y[7*(*idx)+5] += alpha6*(*v);
1078: y[7*(*idx)+6] += alpha7*(*v);
1079: idx++; v++;
1080: }
1081: }
1082: PetscLogFlops(14*a->nz);
1083: VecRestoreArray(xx,&x);
1084: VecRestoreArray(yy,&y);
1085: return(0);
1086: }
1090: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1091: {
1092: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1093: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1094: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1096: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1097: PetscInt n,i,jrow,j;
1100: if (yy != zz) {VecCopy(yy,zz);}
1101: VecGetArray(xx,&x);
1102: VecGetArray(zz,&y);
1103: idx = a->j;
1104: v = a->a;
1105: ii = a->i;
1107: for (i=0; i<m; i++) {
1108: jrow = ii[i];
1109: n = ii[i+1] - jrow;
1110: sum1 = 0.0;
1111: sum2 = 0.0;
1112: sum3 = 0.0;
1113: sum4 = 0.0;
1114: sum5 = 0.0;
1115: sum6 = 0.0;
1116: sum7 = 0.0;
1117: for (j=0; j<n; j++) {
1118: sum1 += v[jrow]*x[7*idx[jrow]];
1119: sum2 += v[jrow]*x[7*idx[jrow]+1];
1120: sum3 += v[jrow]*x[7*idx[jrow]+2];
1121: sum4 += v[jrow]*x[7*idx[jrow]+3];
1122: sum5 += v[jrow]*x[7*idx[jrow]+4];
1123: sum6 += v[jrow]*x[7*idx[jrow]+5];
1124: sum7 += v[jrow]*x[7*idx[jrow]+6];
1125: jrow++;
1126: }
1127: y[7*i] += sum1;
1128: y[7*i+1] += sum2;
1129: y[7*i+2] += sum3;
1130: y[7*i+3] += sum4;
1131: y[7*i+4] += sum5;
1132: y[7*i+5] += sum6;
1133: y[7*i+6] += sum7;
1134: }
1136: PetscLogFlops(14*a->nz);
1137: VecRestoreArray(xx,&x);
1138: VecRestoreArray(zz,&y);
1139: return(0);
1140: }
1144: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1145: {
1146: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1147: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1148: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1150: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1153: if (yy != zz) {VecCopy(yy,zz);}
1154: VecGetArray(xx,&x);
1155: VecGetArray(zz,&y);
1156: for (i=0; i<m; i++) {
1157: idx = a->j + a->i[i] ;
1158: v = a->a + a->i[i] ;
1159: n = a->i[i+1] - a->i[i];
1160: alpha1 = x[7*i];
1161: alpha2 = x[7*i+1];
1162: alpha3 = x[7*i+2];
1163: alpha4 = x[7*i+3];
1164: alpha5 = x[7*i+4];
1165: alpha6 = x[7*i+5];
1166: alpha7 = x[7*i+6];
1167: while (n-->0) {
1168: y[7*(*idx)] += alpha1*(*v);
1169: y[7*(*idx)+1] += alpha2*(*v);
1170: y[7*(*idx)+2] += alpha3*(*v);
1171: y[7*(*idx)+3] += alpha4*(*v);
1172: y[7*(*idx)+4] += alpha5*(*v);
1173: y[7*(*idx)+5] += alpha6*(*v);
1174: y[7*(*idx)+6] += alpha7*(*v);
1175: idx++; v++;
1176: }
1177: }
1178: PetscLogFlops(14*a->nz);
1179: VecRestoreArray(xx,&x);
1180: VecRestoreArray(zz,&y);
1181: return(0);
1182: }
1186: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1187: {
1188: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1189: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1190: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1192: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1193: PetscInt n,i,jrow,j;
1196: VecGetArray(xx,&x);
1197: VecGetArray(yy,&y);
1198: idx = a->j;
1199: v = a->a;
1200: ii = a->i;
1202: for (i=0; i<m; i++) {
1203: jrow = ii[i];
1204: n = ii[i+1] - jrow;
1205: sum1 = 0.0;
1206: sum2 = 0.0;
1207: sum3 = 0.0;
1208: sum4 = 0.0;
1209: sum5 = 0.0;
1210: sum6 = 0.0;
1211: sum7 = 0.0;
1212: sum8 = 0.0;
1213: nonzerorow += (n>0);
1214: for (j=0; j<n; j++) {
1215: sum1 += v[jrow]*x[8*idx[jrow]];
1216: sum2 += v[jrow]*x[8*idx[jrow]+1];
1217: sum3 += v[jrow]*x[8*idx[jrow]+2];
1218: sum4 += v[jrow]*x[8*idx[jrow]+3];
1219: sum5 += v[jrow]*x[8*idx[jrow]+4];
1220: sum6 += v[jrow]*x[8*idx[jrow]+5];
1221: sum7 += v[jrow]*x[8*idx[jrow]+6];
1222: sum8 += v[jrow]*x[8*idx[jrow]+7];
1223: jrow++;
1224: }
1225: y[8*i] = sum1;
1226: y[8*i+1] = sum2;
1227: y[8*i+2] = sum3;
1228: y[8*i+3] = sum4;
1229: y[8*i+4] = sum5;
1230: y[8*i+5] = sum6;
1231: y[8*i+6] = sum7;
1232: y[8*i+7] = sum8;
1233: }
1235: PetscLogFlops(16*a->nz - 8*nonzerorow);
1236: VecRestoreArray(xx,&x);
1237: VecRestoreArray(yy,&y);
1238: return(0);
1239: }
1243: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1244: {
1245: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1246: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1247: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1249: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1252: VecSet(yy,zero);
1253: VecGetArray(xx,&x);
1254: VecGetArray(yy,&y);
1256: for (i=0; i<m; i++) {
1257: idx = a->j + a->i[i] ;
1258: v = a->a + a->i[i] ;
1259: n = a->i[i+1] - a->i[i];
1260: alpha1 = x[8*i];
1261: alpha2 = x[8*i+1];
1262: alpha3 = x[8*i+2];
1263: alpha4 = x[8*i+3];
1264: alpha5 = x[8*i+4];
1265: alpha6 = x[8*i+5];
1266: alpha7 = x[8*i+6];
1267: alpha8 = x[8*i+7];
1268: while (n-->0) {
1269: y[8*(*idx)] += alpha1*(*v);
1270: y[8*(*idx)+1] += alpha2*(*v);
1271: y[8*(*idx)+2] += alpha3*(*v);
1272: y[8*(*idx)+3] += alpha4*(*v);
1273: y[8*(*idx)+4] += alpha5*(*v);
1274: y[8*(*idx)+5] += alpha6*(*v);
1275: y[8*(*idx)+6] += alpha7*(*v);
1276: y[8*(*idx)+7] += alpha8*(*v);
1277: idx++; v++;
1278: }
1279: }
1280: PetscLogFlops(16*a->nz);
1281: VecRestoreArray(xx,&x);
1282: VecRestoreArray(yy,&y);
1283: return(0);
1284: }
1288: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1289: {
1290: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1291: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1292: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1294: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1295: PetscInt n,i,jrow,j;
1298: if (yy != zz) {VecCopy(yy,zz);}
1299: VecGetArray(xx,&x);
1300: VecGetArray(zz,&y);
1301: idx = a->j;
1302: v = a->a;
1303: ii = a->i;
1305: for (i=0; i<m; i++) {
1306: jrow = ii[i];
1307: n = ii[i+1] - jrow;
1308: sum1 = 0.0;
1309: sum2 = 0.0;
1310: sum3 = 0.0;
1311: sum4 = 0.0;
1312: sum5 = 0.0;
1313: sum6 = 0.0;
1314: sum7 = 0.0;
1315: sum8 = 0.0;
1316: for (j=0; j<n; j++) {
1317: sum1 += v[jrow]*x[8*idx[jrow]];
1318: sum2 += v[jrow]*x[8*idx[jrow]+1];
1319: sum3 += v[jrow]*x[8*idx[jrow]+2];
1320: sum4 += v[jrow]*x[8*idx[jrow]+3];
1321: sum5 += v[jrow]*x[8*idx[jrow]+4];
1322: sum6 += v[jrow]*x[8*idx[jrow]+5];
1323: sum7 += v[jrow]*x[8*idx[jrow]+6];
1324: sum8 += v[jrow]*x[8*idx[jrow]+7];
1325: jrow++;
1326: }
1327: y[8*i] += sum1;
1328: y[8*i+1] += sum2;
1329: y[8*i+2] += sum3;
1330: y[8*i+3] += sum4;
1331: y[8*i+4] += sum5;
1332: y[8*i+5] += sum6;
1333: y[8*i+6] += sum7;
1334: y[8*i+7] += sum8;
1335: }
1337: PetscLogFlops(16*a->nz);
1338: VecRestoreArray(xx,&x);
1339: VecRestoreArray(zz,&y);
1340: return(0);
1341: }
1345: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1346: {
1347: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1348: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1349: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1351: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1354: if (yy != zz) {VecCopy(yy,zz);}
1355: VecGetArray(xx,&x);
1356: VecGetArray(zz,&y);
1357: for (i=0; i<m; i++) {
1358: idx = a->j + a->i[i] ;
1359: v = a->a + a->i[i] ;
1360: n = a->i[i+1] - a->i[i];
1361: alpha1 = x[8*i];
1362: alpha2 = x[8*i+1];
1363: alpha3 = x[8*i+2];
1364: alpha4 = x[8*i+3];
1365: alpha5 = x[8*i+4];
1366: alpha6 = x[8*i+5];
1367: alpha7 = x[8*i+6];
1368: alpha8 = x[8*i+7];
1369: while (n-->0) {
1370: y[8*(*idx)] += alpha1*(*v);
1371: y[8*(*idx)+1] += alpha2*(*v);
1372: y[8*(*idx)+2] += alpha3*(*v);
1373: y[8*(*idx)+3] += alpha4*(*v);
1374: y[8*(*idx)+4] += alpha5*(*v);
1375: y[8*(*idx)+5] += alpha6*(*v);
1376: y[8*(*idx)+6] += alpha7*(*v);
1377: y[8*(*idx)+7] += alpha8*(*v);
1378: idx++; v++;
1379: }
1380: }
1381: PetscLogFlops(16*a->nz);
1382: VecRestoreArray(xx,&x);
1383: VecRestoreArray(zz,&y);
1384: return(0);
1385: }
1387: /* ------------------------------------------------------------------------------*/
1390: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1391: {
1392: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1393: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1394: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1396: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1397: PetscInt n,i,jrow,j;
1400: VecGetArray(xx,&x);
1401: VecGetArray(yy,&y);
1402: idx = a->j;
1403: v = a->a;
1404: ii = a->i;
1406: for (i=0; i<m; i++) {
1407: jrow = ii[i];
1408: n = ii[i+1] - jrow;
1409: sum1 = 0.0;
1410: sum2 = 0.0;
1411: sum3 = 0.0;
1412: sum4 = 0.0;
1413: sum5 = 0.0;
1414: sum6 = 0.0;
1415: sum7 = 0.0;
1416: sum8 = 0.0;
1417: sum9 = 0.0;
1418: nonzerorow += (n>0);
1419: for (j=0; j<n; j++) {
1420: sum1 += v[jrow]*x[9*idx[jrow]];
1421: sum2 += v[jrow]*x[9*idx[jrow]+1];
1422: sum3 += v[jrow]*x[9*idx[jrow]+2];
1423: sum4 += v[jrow]*x[9*idx[jrow]+3];
1424: sum5 += v[jrow]*x[9*idx[jrow]+4];
1425: sum6 += v[jrow]*x[9*idx[jrow]+5];
1426: sum7 += v[jrow]*x[9*idx[jrow]+6];
1427: sum8 += v[jrow]*x[9*idx[jrow]+7];
1428: sum9 += v[jrow]*x[9*idx[jrow]+8];
1429: jrow++;
1430: }
1431: y[9*i] = sum1;
1432: y[9*i+1] = sum2;
1433: y[9*i+2] = sum3;
1434: y[9*i+3] = sum4;
1435: y[9*i+4] = sum5;
1436: y[9*i+5] = sum6;
1437: y[9*i+6] = sum7;
1438: y[9*i+7] = sum8;
1439: y[9*i+8] = sum9;
1440: }
1442: PetscLogFlops(18*a->nz - 9*nonzerorow);
1443: VecRestoreArray(xx,&x);
1444: VecRestoreArray(yy,&y);
1445: return(0);
1446: }
1448: /* ------------------------------------------------------------------------------*/
1452: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1453: {
1454: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1455: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1456: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1458: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1461: VecSet(yy,zero);
1462: VecGetArray(xx,&x);
1463: VecGetArray(yy,&y);
1465: for (i=0; i<m; i++) {
1466: idx = a->j + a->i[i] ;
1467: v = a->a + a->i[i] ;
1468: n = a->i[i+1] - a->i[i];
1469: alpha1 = x[9*i];
1470: alpha2 = x[9*i+1];
1471: alpha3 = x[9*i+2];
1472: alpha4 = x[9*i+3];
1473: alpha5 = x[9*i+4];
1474: alpha6 = x[9*i+5];
1475: alpha7 = x[9*i+6];
1476: alpha8 = x[9*i+7];
1477: alpha9 = x[9*i+8];
1478: while (n-->0) {
1479: y[9*(*idx)] += alpha1*(*v);
1480: y[9*(*idx)+1] += alpha2*(*v);
1481: y[9*(*idx)+2] += alpha3*(*v);
1482: y[9*(*idx)+3] += alpha4*(*v);
1483: y[9*(*idx)+4] += alpha5*(*v);
1484: y[9*(*idx)+5] += alpha6*(*v);
1485: y[9*(*idx)+6] += alpha7*(*v);
1486: y[9*(*idx)+7] += alpha8*(*v);
1487: y[9*(*idx)+8] += alpha9*(*v);
1488: idx++; v++;
1489: }
1490: }
1491: PetscLogFlops(18*a->nz);
1492: VecRestoreArray(xx,&x);
1493: VecRestoreArray(yy,&y);
1494: return(0);
1495: }
1499: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1500: {
1501: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1502: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1503: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1505: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1506: PetscInt n,i,jrow,j;
1509: if (yy != zz) {VecCopy(yy,zz);}
1510: VecGetArray(xx,&x);
1511: VecGetArray(zz,&y);
1512: idx = a->j;
1513: v = a->a;
1514: ii = a->i;
1516: for (i=0; i<m; i++) {
1517: jrow = ii[i];
1518: n = ii[i+1] - jrow;
1519: sum1 = 0.0;
1520: sum2 = 0.0;
1521: sum3 = 0.0;
1522: sum4 = 0.0;
1523: sum5 = 0.0;
1524: sum6 = 0.0;
1525: sum7 = 0.0;
1526: sum8 = 0.0;
1527: sum9 = 0.0;
1528: for (j=0; j<n; j++) {
1529: sum1 += v[jrow]*x[9*idx[jrow]];
1530: sum2 += v[jrow]*x[9*idx[jrow]+1];
1531: sum3 += v[jrow]*x[9*idx[jrow]+2];
1532: sum4 += v[jrow]*x[9*idx[jrow]+3];
1533: sum5 += v[jrow]*x[9*idx[jrow]+4];
1534: sum6 += v[jrow]*x[9*idx[jrow]+5];
1535: sum7 += v[jrow]*x[9*idx[jrow]+6];
1536: sum8 += v[jrow]*x[9*idx[jrow]+7];
1537: sum9 += v[jrow]*x[9*idx[jrow]+8];
1538: jrow++;
1539: }
1540: y[9*i] += sum1;
1541: y[9*i+1] += sum2;
1542: y[9*i+2] += sum3;
1543: y[9*i+3] += sum4;
1544: y[9*i+4] += sum5;
1545: y[9*i+5] += sum6;
1546: y[9*i+6] += sum7;
1547: y[9*i+7] += sum8;
1548: y[9*i+8] += sum9;
1549: }
1551: PetscLogFlops(18*a->nz);
1552: VecRestoreArray(xx,&x);
1553: VecRestoreArray(zz,&y);
1554: return(0);
1555: }
1559: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1560: {
1561: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1562: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1563: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1565: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1568: if (yy != zz) {VecCopy(yy,zz);}
1569: VecGetArray(xx,&x);
1570: VecGetArray(zz,&y);
1571: for (i=0; i<m; i++) {
1572: idx = a->j + a->i[i] ;
1573: v = a->a + a->i[i] ;
1574: n = a->i[i+1] - a->i[i];
1575: alpha1 = x[9*i];
1576: alpha2 = x[9*i+1];
1577: alpha3 = x[9*i+2];
1578: alpha4 = x[9*i+3];
1579: alpha5 = x[9*i+4];
1580: alpha6 = x[9*i+5];
1581: alpha7 = x[9*i+6];
1582: alpha8 = x[9*i+7];
1583: alpha9 = x[9*i+8];
1584: while (n-->0) {
1585: y[9*(*idx)] += alpha1*(*v);
1586: y[9*(*idx)+1] += alpha2*(*v);
1587: y[9*(*idx)+2] += alpha3*(*v);
1588: y[9*(*idx)+3] += alpha4*(*v);
1589: y[9*(*idx)+4] += alpha5*(*v);
1590: y[9*(*idx)+5] += alpha6*(*v);
1591: y[9*(*idx)+6] += alpha7*(*v);
1592: y[9*(*idx)+7] += alpha8*(*v);
1593: y[9*(*idx)+8] += alpha9*(*v);
1594: idx++; v++;
1595: }
1596: }
1597: PetscLogFlops(18*a->nz);
1598: VecRestoreArray(xx,&x);
1599: VecRestoreArray(zz,&y);
1600: return(0);
1601: }
1602: /*--------------------------------------------------------------------------------------------*/
1605: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1606: {
1607: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1608: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1609: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1611: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1612: PetscInt n,i,jrow,j;
1615: VecGetArray(xx,&x);
1616: VecGetArray(yy,&y);
1617: idx = a->j;
1618: v = a->a;
1619: ii = a->i;
1621: for (i=0; i<m; i++) {
1622: jrow = ii[i];
1623: n = ii[i+1] - jrow;
1624: sum1 = 0.0;
1625: sum2 = 0.0;
1626: sum3 = 0.0;
1627: sum4 = 0.0;
1628: sum5 = 0.0;
1629: sum6 = 0.0;
1630: sum7 = 0.0;
1631: sum8 = 0.0;
1632: sum9 = 0.0;
1633: sum10 = 0.0;
1634: nonzerorow += (n>0);
1635: for (j=0; j<n; j++) {
1636: sum1 += v[jrow]*x[10*idx[jrow]];
1637: sum2 += v[jrow]*x[10*idx[jrow]+1];
1638: sum3 += v[jrow]*x[10*idx[jrow]+2];
1639: sum4 += v[jrow]*x[10*idx[jrow]+3];
1640: sum5 += v[jrow]*x[10*idx[jrow]+4];
1641: sum6 += v[jrow]*x[10*idx[jrow]+5];
1642: sum7 += v[jrow]*x[10*idx[jrow]+6];
1643: sum8 += v[jrow]*x[10*idx[jrow]+7];
1644: sum9 += v[jrow]*x[10*idx[jrow]+8];
1645: sum10 += v[jrow]*x[10*idx[jrow]+9];
1646: jrow++;
1647: }
1648: y[10*i] = sum1;
1649: y[10*i+1] = sum2;
1650: y[10*i+2] = sum3;
1651: y[10*i+3] = sum4;
1652: y[10*i+4] = sum5;
1653: y[10*i+5] = sum6;
1654: y[10*i+6] = sum7;
1655: y[10*i+7] = sum8;
1656: y[10*i+8] = sum9;
1657: y[10*i+9] = sum10;
1658: }
1660: PetscLogFlops(20*a->nz - 10*nonzerorow);
1661: VecRestoreArray(xx,&x);
1662: VecRestoreArray(yy,&y);
1663: return(0);
1664: }
1668: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1669: {
1670: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1671: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1672: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1674: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1675: PetscInt n,i,jrow,j;
1678: if (yy != zz) {VecCopy(yy,zz);}
1679: VecGetArray(xx,&x);
1680: VecGetArray(zz,&y);
1681: idx = a->j;
1682: v = a->a;
1683: ii = a->i;
1685: for (i=0; i<m; i++) {
1686: jrow = ii[i];
1687: n = ii[i+1] - jrow;
1688: sum1 = 0.0;
1689: sum2 = 0.0;
1690: sum3 = 0.0;
1691: sum4 = 0.0;
1692: sum5 = 0.0;
1693: sum6 = 0.0;
1694: sum7 = 0.0;
1695: sum8 = 0.0;
1696: sum9 = 0.0;
1697: sum10 = 0.0;
1698: for (j=0; j<n; j++) {
1699: sum1 += v[jrow]*x[10*idx[jrow]];
1700: sum2 += v[jrow]*x[10*idx[jrow]+1];
1701: sum3 += v[jrow]*x[10*idx[jrow]+2];
1702: sum4 += v[jrow]*x[10*idx[jrow]+3];
1703: sum5 += v[jrow]*x[10*idx[jrow]+4];
1704: sum6 += v[jrow]*x[10*idx[jrow]+5];
1705: sum7 += v[jrow]*x[10*idx[jrow]+6];
1706: sum8 += v[jrow]*x[10*idx[jrow]+7];
1707: sum9 += v[jrow]*x[10*idx[jrow]+8];
1708: sum10 += v[jrow]*x[10*idx[jrow]+9];
1709: jrow++;
1710: }
1711: y[10*i] += sum1;
1712: y[10*i+1] += sum2;
1713: y[10*i+2] += sum3;
1714: y[10*i+3] += sum4;
1715: y[10*i+4] += sum5;
1716: y[10*i+5] += sum6;
1717: y[10*i+6] += sum7;
1718: y[10*i+7] += sum8;
1719: y[10*i+8] += sum9;
1720: y[10*i+9] += sum10;
1721: }
1723: PetscLogFlops(20*a->nz);
1724: VecRestoreArray(xx,&x);
1725: VecRestoreArray(yy,&y);
1726: return(0);
1727: }
1731: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1732: {
1733: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1734: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1735: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1737: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1740: VecSet(yy,zero);
1741: VecGetArray(xx,&x);
1742: VecGetArray(yy,&y);
1744: for (i=0; i<m; i++) {
1745: idx = a->j + a->i[i] ;
1746: v = a->a + a->i[i] ;
1747: n = a->i[i+1] - a->i[i];
1748: alpha1 = x[10*i];
1749: alpha2 = x[10*i+1];
1750: alpha3 = x[10*i+2];
1751: alpha4 = x[10*i+3];
1752: alpha5 = x[10*i+4];
1753: alpha6 = x[10*i+5];
1754: alpha7 = x[10*i+6];
1755: alpha8 = x[10*i+7];
1756: alpha9 = x[10*i+8];
1757: alpha10 = x[10*i+9];
1758: while (n-->0) {
1759: y[10*(*idx)] += alpha1*(*v);
1760: y[10*(*idx)+1] += alpha2*(*v);
1761: y[10*(*idx)+2] += alpha3*(*v);
1762: y[10*(*idx)+3] += alpha4*(*v);
1763: y[10*(*idx)+4] += alpha5*(*v);
1764: y[10*(*idx)+5] += alpha6*(*v);
1765: y[10*(*idx)+6] += alpha7*(*v);
1766: y[10*(*idx)+7] += alpha8*(*v);
1767: y[10*(*idx)+8] += alpha9*(*v);
1768: y[10*(*idx)+9] += alpha10*(*v);
1769: idx++; v++;
1770: }
1771: }
1772: PetscLogFlops(20*a->nz);
1773: VecRestoreArray(xx,&x);
1774: VecRestoreArray(yy,&y);
1775: return(0);
1776: }
1780: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1781: {
1782: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1783: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1784: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1786: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1789: if (yy != zz) {VecCopy(yy,zz);}
1790: VecGetArray(xx,&x);
1791: VecGetArray(zz,&y);
1792: for (i=0; i<m; i++) {
1793: idx = a->j + a->i[i] ;
1794: v = a->a + a->i[i] ;
1795: n = a->i[i+1] - a->i[i];
1796: alpha1 = x[10*i];
1797: alpha2 = x[10*i+1];
1798: alpha3 = x[10*i+2];
1799: alpha4 = x[10*i+3];
1800: alpha5 = x[10*i+4];
1801: alpha6 = x[10*i+5];
1802: alpha7 = x[10*i+6];
1803: alpha8 = x[10*i+7];
1804: alpha9 = x[10*i+8];
1805: alpha10 = x[10*i+9];
1806: while (n-->0) {
1807: y[10*(*idx)] += alpha1*(*v);
1808: y[10*(*idx)+1] += alpha2*(*v);
1809: y[10*(*idx)+2] += alpha3*(*v);
1810: y[10*(*idx)+3] += alpha4*(*v);
1811: y[10*(*idx)+4] += alpha5*(*v);
1812: y[10*(*idx)+5] += alpha6*(*v);
1813: y[10*(*idx)+6] += alpha7*(*v);
1814: y[10*(*idx)+7] += alpha8*(*v);
1815: y[10*(*idx)+8] += alpha9*(*v);
1816: y[10*(*idx)+9] += alpha10*(*v);
1817: idx++; v++;
1818: }
1819: }
1820: PetscLogFlops(20*a->nz);
1821: VecRestoreArray(xx,&x);
1822: VecRestoreArray(zz,&y);
1823: return(0);
1824: }
1827: /*--------------------------------------------------------------------------------------------*/
1830: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1831: {
1832: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1833: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1834: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1835: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1837: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1838: PetscInt n,i,jrow,j;
1841: VecGetArray(xx,&x);
1842: VecGetArray(yy,&y);
1843: idx = a->j;
1844: v = a->a;
1845: ii = a->i;
1847: for (i=0; i<m; i++) {
1848: jrow = ii[i];
1849: n = ii[i+1] - jrow;
1850: sum1 = 0.0;
1851: sum2 = 0.0;
1852: sum3 = 0.0;
1853: sum4 = 0.0;
1854: sum5 = 0.0;
1855: sum6 = 0.0;
1856: sum7 = 0.0;
1857: sum8 = 0.0;
1858: sum9 = 0.0;
1859: sum10 = 0.0;
1860: sum11 = 0.0;
1861: sum12 = 0.0;
1862: sum13 = 0.0;
1863: sum14 = 0.0;
1864: sum15 = 0.0;
1865: sum16 = 0.0;
1866: nonzerorow += (n>0);
1867: for (j=0; j<n; j++) {
1868: sum1 += v[jrow]*x[16*idx[jrow]];
1869: sum2 += v[jrow]*x[16*idx[jrow]+1];
1870: sum3 += v[jrow]*x[16*idx[jrow]+2];
1871: sum4 += v[jrow]*x[16*idx[jrow]+3];
1872: sum5 += v[jrow]*x[16*idx[jrow]+4];
1873: sum6 += v[jrow]*x[16*idx[jrow]+5];
1874: sum7 += v[jrow]*x[16*idx[jrow]+6];
1875: sum8 += v[jrow]*x[16*idx[jrow]+7];
1876: sum9 += v[jrow]*x[16*idx[jrow]+8];
1877: sum10 += v[jrow]*x[16*idx[jrow]+9];
1878: sum11 += v[jrow]*x[16*idx[jrow]+10];
1879: sum12 += v[jrow]*x[16*idx[jrow]+11];
1880: sum13 += v[jrow]*x[16*idx[jrow]+12];
1881: sum14 += v[jrow]*x[16*idx[jrow]+13];
1882: sum15 += v[jrow]*x[16*idx[jrow]+14];
1883: sum16 += v[jrow]*x[16*idx[jrow]+15];
1884: jrow++;
1885: }
1886: y[16*i] = sum1;
1887: y[16*i+1] = sum2;
1888: y[16*i+2] = sum3;
1889: y[16*i+3] = sum4;
1890: y[16*i+4] = sum5;
1891: y[16*i+5] = sum6;
1892: y[16*i+6] = sum7;
1893: y[16*i+7] = sum8;
1894: y[16*i+8] = sum9;
1895: y[16*i+9] = sum10;
1896: y[16*i+10] = sum11;
1897: y[16*i+11] = sum12;
1898: y[16*i+12] = sum13;
1899: y[16*i+13] = sum14;
1900: y[16*i+14] = sum15;
1901: y[16*i+15] = sum16;
1902: }
1904: PetscLogFlops(32*a->nz - 16*nonzerorow);
1905: VecRestoreArray(xx,&x);
1906: VecRestoreArray(yy,&y);
1907: return(0);
1908: }
1912: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1913: {
1914: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1915: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1916: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1917: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1919: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1922: VecSet(yy,zero);
1923: VecGetArray(xx,&x);
1924: VecGetArray(yy,&y);
1926: for (i=0; i<m; i++) {
1927: idx = a->j + a->i[i] ;
1928: v = a->a + a->i[i] ;
1929: n = a->i[i+1] - a->i[i];
1930: alpha1 = x[16*i];
1931: alpha2 = x[16*i+1];
1932: alpha3 = x[16*i+2];
1933: alpha4 = x[16*i+3];
1934: alpha5 = x[16*i+4];
1935: alpha6 = x[16*i+5];
1936: alpha7 = x[16*i+6];
1937: alpha8 = x[16*i+7];
1938: alpha9 = x[16*i+8];
1939: alpha10 = x[16*i+9];
1940: alpha11 = x[16*i+10];
1941: alpha12 = x[16*i+11];
1942: alpha13 = x[16*i+12];
1943: alpha14 = x[16*i+13];
1944: alpha15 = x[16*i+14];
1945: alpha16 = x[16*i+15];
1946: while (n-->0) {
1947: y[16*(*idx)] += alpha1*(*v);
1948: y[16*(*idx)+1] += alpha2*(*v);
1949: y[16*(*idx)+2] += alpha3*(*v);
1950: y[16*(*idx)+3] += alpha4*(*v);
1951: y[16*(*idx)+4] += alpha5*(*v);
1952: y[16*(*idx)+5] += alpha6*(*v);
1953: y[16*(*idx)+6] += alpha7*(*v);
1954: y[16*(*idx)+7] += alpha8*(*v);
1955: y[16*(*idx)+8] += alpha9*(*v);
1956: y[16*(*idx)+9] += alpha10*(*v);
1957: y[16*(*idx)+10] += alpha11*(*v);
1958: y[16*(*idx)+11] += alpha12*(*v);
1959: y[16*(*idx)+12] += alpha13*(*v);
1960: y[16*(*idx)+13] += alpha14*(*v);
1961: y[16*(*idx)+14] += alpha15*(*v);
1962: y[16*(*idx)+15] += alpha16*(*v);
1963: idx++; v++;
1964: }
1965: }
1966: PetscLogFlops(32*a->nz);
1967: VecRestoreArray(xx,&x);
1968: VecRestoreArray(yy,&y);
1969: return(0);
1970: }
1974: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1975: {
1976: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1977: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1978: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1979: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1981: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1982: PetscInt n,i,jrow,j;
1985: if (yy != zz) {VecCopy(yy,zz);}
1986: VecGetArray(xx,&x);
1987: VecGetArray(zz,&y);
1988: idx = a->j;
1989: v = a->a;
1990: ii = a->i;
1992: for (i=0; i<m; i++) {
1993: jrow = ii[i];
1994: n = ii[i+1] - jrow;
1995: sum1 = 0.0;
1996: sum2 = 0.0;
1997: sum3 = 0.0;
1998: sum4 = 0.0;
1999: sum5 = 0.0;
2000: sum6 = 0.0;
2001: sum7 = 0.0;
2002: sum8 = 0.0;
2003: sum9 = 0.0;
2004: sum10 = 0.0;
2005: sum11 = 0.0;
2006: sum12 = 0.0;
2007: sum13 = 0.0;
2008: sum14 = 0.0;
2009: sum15 = 0.0;
2010: sum16 = 0.0;
2011: for (j=0; j<n; j++) {
2012: sum1 += v[jrow]*x[16*idx[jrow]];
2013: sum2 += v[jrow]*x[16*idx[jrow]+1];
2014: sum3 += v[jrow]*x[16*idx[jrow]+2];
2015: sum4 += v[jrow]*x[16*idx[jrow]+3];
2016: sum5 += v[jrow]*x[16*idx[jrow]+4];
2017: sum6 += v[jrow]*x[16*idx[jrow]+5];
2018: sum7 += v[jrow]*x[16*idx[jrow]+6];
2019: sum8 += v[jrow]*x[16*idx[jrow]+7];
2020: sum9 += v[jrow]*x[16*idx[jrow]+8];
2021: sum10 += v[jrow]*x[16*idx[jrow]+9];
2022: sum11 += v[jrow]*x[16*idx[jrow]+10];
2023: sum12 += v[jrow]*x[16*idx[jrow]+11];
2024: sum13 += v[jrow]*x[16*idx[jrow]+12];
2025: sum14 += v[jrow]*x[16*idx[jrow]+13];
2026: sum15 += v[jrow]*x[16*idx[jrow]+14];
2027: sum16 += v[jrow]*x[16*idx[jrow]+15];
2028: jrow++;
2029: }
2030: y[16*i] += sum1;
2031: y[16*i+1] += sum2;
2032: y[16*i+2] += sum3;
2033: y[16*i+3] += sum4;
2034: y[16*i+4] += sum5;
2035: y[16*i+5] += sum6;
2036: y[16*i+6] += sum7;
2037: y[16*i+7] += sum8;
2038: y[16*i+8] += sum9;
2039: y[16*i+9] += sum10;
2040: y[16*i+10] += sum11;
2041: y[16*i+11] += sum12;
2042: y[16*i+12] += sum13;
2043: y[16*i+13] += sum14;
2044: y[16*i+14] += sum15;
2045: y[16*i+15] += sum16;
2046: }
2048: PetscLogFlops(32*a->nz);
2049: VecRestoreArray(xx,&x);
2050: VecRestoreArray(zz,&y);
2051: return(0);
2052: }
2056: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2057: {
2058: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2059: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2060: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2061: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2063: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
2066: if (yy != zz) {VecCopy(yy,zz);}
2067: VecGetArray(xx,&x);
2068: VecGetArray(zz,&y);
2069: for (i=0; i<m; i++) {
2070: idx = a->j + a->i[i] ;
2071: v = a->a + a->i[i] ;
2072: n = a->i[i+1] - a->i[i];
2073: alpha1 = x[16*i];
2074: alpha2 = x[16*i+1];
2075: alpha3 = x[16*i+2];
2076: alpha4 = x[16*i+3];
2077: alpha5 = x[16*i+4];
2078: alpha6 = x[16*i+5];
2079: alpha7 = x[16*i+6];
2080: alpha8 = x[16*i+7];
2081: alpha9 = x[16*i+8];
2082: alpha10 = x[16*i+9];
2083: alpha11 = x[16*i+10];
2084: alpha12 = x[16*i+11];
2085: alpha13 = x[16*i+12];
2086: alpha14 = x[16*i+13];
2087: alpha15 = x[16*i+14];
2088: alpha16 = x[16*i+15];
2089: while (n-->0) {
2090: y[16*(*idx)] += alpha1*(*v);
2091: y[16*(*idx)+1] += alpha2*(*v);
2092: y[16*(*idx)+2] += alpha3*(*v);
2093: y[16*(*idx)+3] += alpha4*(*v);
2094: y[16*(*idx)+4] += alpha5*(*v);
2095: y[16*(*idx)+5] += alpha6*(*v);
2096: y[16*(*idx)+6] += alpha7*(*v);
2097: y[16*(*idx)+7] += alpha8*(*v);
2098: y[16*(*idx)+8] += alpha9*(*v);
2099: y[16*(*idx)+9] += alpha10*(*v);
2100: y[16*(*idx)+10] += alpha11*(*v);
2101: y[16*(*idx)+11] += alpha12*(*v);
2102: y[16*(*idx)+12] += alpha13*(*v);
2103: y[16*(*idx)+13] += alpha14*(*v);
2104: y[16*(*idx)+14] += alpha15*(*v);
2105: y[16*(*idx)+15] += alpha16*(*v);
2106: idx++; v++;
2107: }
2108: }
2109: PetscLogFlops(32*a->nz);
2110: VecRestoreArray(xx,&x);
2111: VecRestoreArray(zz,&y);
2112: return(0);
2113: }
2115: /*--------------------------------------------------------------------------------------------*/
2118: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2119: {
2120: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2121: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2122: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2123: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2125: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
2126: PetscInt n,i,jrow,j;
2129: VecGetArray(xx,&x);
2130: VecGetArray(yy,&y);
2131: idx = a->j;
2132: v = a->a;
2133: ii = a->i;
2135: for (i=0; i<m; i++) {
2136: jrow = ii[i];
2137: n = ii[i+1] - jrow;
2138: sum1 = 0.0;
2139: sum2 = 0.0;
2140: sum3 = 0.0;
2141: sum4 = 0.0;
2142: sum5 = 0.0;
2143: sum6 = 0.0;
2144: sum7 = 0.0;
2145: sum8 = 0.0;
2146: sum9 = 0.0;
2147: sum10 = 0.0;
2148: sum11 = 0.0;
2149: sum12 = 0.0;
2150: sum13 = 0.0;
2151: sum14 = 0.0;
2152: sum15 = 0.0;
2153: sum16 = 0.0;
2154: sum17 = 0.0;
2155: sum18 = 0.0;
2156: nonzerorow += (n>0);
2157: for (j=0; j<n; j++) {
2158: sum1 += v[jrow]*x[18*idx[jrow]];
2159: sum2 += v[jrow]*x[18*idx[jrow]+1];
2160: sum3 += v[jrow]*x[18*idx[jrow]+2];
2161: sum4 += v[jrow]*x[18*idx[jrow]+3];
2162: sum5 += v[jrow]*x[18*idx[jrow]+4];
2163: sum6 += v[jrow]*x[18*idx[jrow]+5];
2164: sum7 += v[jrow]*x[18*idx[jrow]+6];
2165: sum8 += v[jrow]*x[18*idx[jrow]+7];
2166: sum9 += v[jrow]*x[18*idx[jrow]+8];
2167: sum10 += v[jrow]*x[18*idx[jrow]+9];
2168: sum11 += v[jrow]*x[18*idx[jrow]+10];
2169: sum12 += v[jrow]*x[18*idx[jrow]+11];
2170: sum13 += v[jrow]*x[18*idx[jrow]+12];
2171: sum14 += v[jrow]*x[18*idx[jrow]+13];
2172: sum15 += v[jrow]*x[18*idx[jrow]+14];
2173: sum16 += v[jrow]*x[18*idx[jrow]+15];
2174: sum17 += v[jrow]*x[18*idx[jrow]+16];
2175: sum18 += v[jrow]*x[18*idx[jrow]+17];
2176: jrow++;
2177: }
2178: y[18*i] = sum1;
2179: y[18*i+1] = sum2;
2180: y[18*i+2] = sum3;
2181: y[18*i+3] = sum4;
2182: y[18*i+4] = sum5;
2183: y[18*i+5] = sum6;
2184: y[18*i+6] = sum7;
2185: y[18*i+7] = sum8;
2186: y[18*i+8] = sum9;
2187: y[18*i+9] = sum10;
2188: y[18*i+10] = sum11;
2189: y[18*i+11] = sum12;
2190: y[18*i+12] = sum13;
2191: y[18*i+13] = sum14;
2192: y[18*i+14] = sum15;
2193: y[18*i+15] = sum16;
2194: y[18*i+16] = sum17;
2195: y[18*i+17] = sum18;
2196: }
2198: PetscLogFlops(36*a->nz - 18*nonzerorow);
2199: VecRestoreArray(xx,&x);
2200: VecRestoreArray(yy,&y);
2201: return(0);
2202: }
2206: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2207: {
2208: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2209: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2210: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
2211: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2213: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
2216: VecSet(yy,zero);
2217: VecGetArray(xx,&x);
2218: VecGetArray(yy,&y);
2220: for (i=0; i<m; i++) {
2221: idx = a->j + a->i[i] ;
2222: v = a->a + a->i[i] ;
2223: n = a->i[i+1] - a->i[i];
2224: alpha1 = x[18*i];
2225: alpha2 = x[18*i+1];
2226: alpha3 = x[18*i+2];
2227: alpha4 = x[18*i+3];
2228: alpha5 = x[18*i+4];
2229: alpha6 = x[18*i+5];
2230: alpha7 = x[18*i+6];
2231: alpha8 = x[18*i+7];
2232: alpha9 = x[18*i+8];
2233: alpha10 = x[18*i+9];
2234: alpha11 = x[18*i+10];
2235: alpha12 = x[18*i+11];
2236: alpha13 = x[18*i+12];
2237: alpha14 = x[18*i+13];
2238: alpha15 = x[18*i+14];
2239: alpha16 = x[18*i+15];
2240: alpha17 = x[18*i+16];
2241: alpha18 = x[18*i+17];
2242: while (n-->0) {
2243: y[18*(*idx)] += alpha1*(*v);
2244: y[18*(*idx)+1] += alpha2*(*v);
2245: y[18*(*idx)+2] += alpha3*(*v);
2246: y[18*(*idx)+3] += alpha4*(*v);
2247: y[18*(*idx)+4] += alpha5*(*v);
2248: y[18*(*idx)+5] += alpha6*(*v);
2249: y[18*(*idx)+6] += alpha7*(*v);
2250: y[18*(*idx)+7] += alpha8*(*v);
2251: y[18*(*idx)+8] += alpha9*(*v);
2252: y[18*(*idx)+9] += alpha10*(*v);
2253: y[18*(*idx)+10] += alpha11*(*v);
2254: y[18*(*idx)+11] += alpha12*(*v);
2255: y[18*(*idx)+12] += alpha13*(*v);
2256: y[18*(*idx)+13] += alpha14*(*v);
2257: y[18*(*idx)+14] += alpha15*(*v);
2258: y[18*(*idx)+15] += alpha16*(*v);
2259: y[18*(*idx)+16] += alpha17*(*v);
2260: y[18*(*idx)+17] += alpha18*(*v);
2261: idx++; v++;
2262: }
2263: }
2264: PetscLogFlops(36*a->nz);
2265: VecRestoreArray(xx,&x);
2266: VecRestoreArray(yy,&y);
2267: return(0);
2268: }
2272: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2273: {
2274: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2275: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2276: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2277: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2279: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
2280: PetscInt n,i,jrow,j;
2283: if (yy != zz) {VecCopy(yy,zz);}
2284: VecGetArray(xx,&x);
2285: VecGetArray(zz,&y);
2286: idx = a->j;
2287: v = a->a;
2288: ii = a->i;
2290: for (i=0; i<m; i++) {
2291: jrow = ii[i];
2292: n = ii[i+1] - jrow;
2293: sum1 = 0.0;
2294: sum2 = 0.0;
2295: sum3 = 0.0;
2296: sum4 = 0.0;
2297: sum5 = 0.0;
2298: sum6 = 0.0;
2299: sum7 = 0.0;
2300: sum8 = 0.0;
2301: sum9 = 0.0;
2302: sum10 = 0.0;
2303: sum11 = 0.0;
2304: sum12 = 0.0;
2305: sum13 = 0.0;
2306: sum14 = 0.0;
2307: sum15 = 0.0;
2308: sum16 = 0.0;
2309: sum17 = 0.0;
2310: sum18 = 0.0;
2311: for (j=0; j<n; j++) {
2312: sum1 += v[jrow]*x[18*idx[jrow]];
2313: sum2 += v[jrow]*x[18*idx[jrow]+1];
2314: sum3 += v[jrow]*x[18*idx[jrow]+2];
2315: sum4 += v[jrow]*x[18*idx[jrow]+3];
2316: sum5 += v[jrow]*x[18*idx[jrow]+4];
2317: sum6 += v[jrow]*x[18*idx[jrow]+5];
2318: sum7 += v[jrow]*x[18*idx[jrow]+6];
2319: sum8 += v[jrow]*x[18*idx[jrow]+7];
2320: sum9 += v[jrow]*x[18*idx[jrow]+8];
2321: sum10 += v[jrow]*x[18*idx[jrow]+9];
2322: sum11 += v[jrow]*x[18*idx[jrow]+10];
2323: sum12 += v[jrow]*x[18*idx[jrow]+11];
2324: sum13 += v[jrow]*x[18*idx[jrow]+12];
2325: sum14 += v[jrow]*x[18*idx[jrow]+13];
2326: sum15 += v[jrow]*x[18*idx[jrow]+14];
2327: sum16 += v[jrow]*x[18*idx[jrow]+15];
2328: sum17 += v[jrow]*x[18*idx[jrow]+16];
2329: sum18 += v[jrow]*x[18*idx[jrow]+17];
2330: jrow++;
2331: }
2332: y[18*i] += sum1;
2333: y[18*i+1] += sum2;
2334: y[18*i+2] += sum3;
2335: y[18*i+3] += sum4;
2336: y[18*i+4] += sum5;
2337: y[18*i+5] += sum6;
2338: y[18*i+6] += sum7;
2339: y[18*i+7] += sum8;
2340: y[18*i+8] += sum9;
2341: y[18*i+9] += sum10;
2342: y[18*i+10] += sum11;
2343: y[18*i+11] += sum12;
2344: y[18*i+12] += sum13;
2345: y[18*i+13] += sum14;
2346: y[18*i+14] += sum15;
2347: y[18*i+15] += sum16;
2348: y[18*i+16] += sum17;
2349: y[18*i+17] += sum18;
2350: }
2352: PetscLogFlops(36*a->nz);
2353: VecRestoreArray(xx,&x);
2354: VecRestoreArray(zz,&y);
2355: return(0);
2356: }
2360: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2361: {
2362: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2363: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2364: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2365: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2367: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
2370: if (yy != zz) {VecCopy(yy,zz);}
2371: VecGetArray(xx,&x);
2372: VecGetArray(zz,&y);
2373: for (i=0; i<m; i++) {
2374: idx = a->j + a->i[i] ;
2375: v = a->a + a->i[i] ;
2376: n = a->i[i+1] - a->i[i];
2377: alpha1 = x[18*i];
2378: alpha2 = x[18*i+1];
2379: alpha3 = x[18*i+2];
2380: alpha4 = x[18*i+3];
2381: alpha5 = x[18*i+4];
2382: alpha6 = x[18*i+5];
2383: alpha7 = x[18*i+6];
2384: alpha8 = x[18*i+7];
2385: alpha9 = x[18*i+8];
2386: alpha10 = x[18*i+9];
2387: alpha11 = x[18*i+10];
2388: alpha12 = x[18*i+11];
2389: alpha13 = x[18*i+12];
2390: alpha14 = x[18*i+13];
2391: alpha15 = x[18*i+14];
2392: alpha16 = x[18*i+15];
2393: alpha17 = x[18*i+16];
2394: alpha18 = x[18*i+17];
2395: while (n-->0) {
2396: y[18*(*idx)] += alpha1*(*v);
2397: y[18*(*idx)+1] += alpha2*(*v);
2398: y[18*(*idx)+2] += alpha3*(*v);
2399: y[18*(*idx)+3] += alpha4*(*v);
2400: y[18*(*idx)+4] += alpha5*(*v);
2401: y[18*(*idx)+5] += alpha6*(*v);
2402: y[18*(*idx)+6] += alpha7*(*v);
2403: y[18*(*idx)+7] += alpha8*(*v);
2404: y[18*(*idx)+8] += alpha9*(*v);
2405: y[18*(*idx)+9] += alpha10*(*v);
2406: y[18*(*idx)+10] += alpha11*(*v);
2407: y[18*(*idx)+11] += alpha12*(*v);
2408: y[18*(*idx)+12] += alpha13*(*v);
2409: y[18*(*idx)+13] += alpha14*(*v);
2410: y[18*(*idx)+14] += alpha15*(*v);
2411: y[18*(*idx)+15] += alpha16*(*v);
2412: y[18*(*idx)+16] += alpha17*(*v);
2413: y[18*(*idx)+17] += alpha18*(*v);
2414: idx++; v++;
2415: }
2416: }
2417: PetscLogFlops(36*a->nz);
2418: VecRestoreArray(xx,&x);
2419: VecRestoreArray(zz,&y);
2420: return(0);
2421: }
2423: /*===================================================================================*/
2426: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2427: {
2428: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2432: /* start the scatter */
2433: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2434: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2435: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2436: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2437: return(0);
2438: }
2442: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2443: {
2444: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2448: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2449: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2450: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2451: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2452: return(0);
2453: }
2457: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2458: {
2459: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2463: /* start the scatter */
2464: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2465: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2466: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2467: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2468: return(0);
2469: }
2473: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2474: {
2475: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2479: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2480: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2481: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2482: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2483: return(0);
2484: }
2486: /* ----------------------------------------------------------------*/
2489: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2490: {
2491: /* This routine requires testing -- but it's getting better. */
2492: PetscErrorCode ierr;
2493: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2494: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2495: Mat P=pp->AIJ;
2496: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2497: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2498: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2499: PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2500: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2501: MatScalar *ca;
2504: /* Start timer */
2507: /* Get ij structure of P^T */
2508: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2510: cn = pn*ppdof;
2511: /* Allocate ci array, arrays for fill computation and */
2512: /* free space for accumulating nonzero column info */
2513: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2514: ci[0] = 0;
2516: /* Work arrays for rows of P^T*A */
2517: PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2518: PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2519: ptasparserow = ptadenserow + an;
2520: denserow = ptasparserow + an;
2521: sparserow = denserow + cn;
2523: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2524: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2525: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2526: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2527: current_space = free_space;
2529: /* Determine symbolic info for each row of C: */
2530: for (i=0;i<pn;i++) {
2531: ptnzi = pti[i+1] - pti[i];
2532: ptJ = ptj + pti[i];
2533: for (dof=0;dof<ppdof;dof++) {
2534: ptanzi = 0;
2535: /* Determine symbolic row of PtA: */
2536: for (j=0;j<ptnzi;j++) {
2537: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2538: arow = ptJ[j]*ppdof + dof;
2539: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2540: anzj = ai[arow+1] - ai[arow];
2541: ajj = aj + ai[arow];
2542: for (k=0;k<anzj;k++) {
2543: if (!ptadenserow[ajj[k]]) {
2544: ptadenserow[ajj[k]] = -1;
2545: ptasparserow[ptanzi++] = ajj[k];
2546: }
2547: }
2548: }
2549: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2550: ptaj = ptasparserow;
2551: cnzi = 0;
2552: for (j=0;j<ptanzi;j++) {
2553: /* Get offset within block of P */
2554: pshift = *ptaj%ppdof;
2555: /* Get block row of P */
2556: prow = (*ptaj++)/ppdof; /* integer division */
2557: /* P has same number of nonzeros per row as the compressed form */
2558: pnzj = pi[prow+1] - pi[prow];
2559: pjj = pj + pi[prow];
2560: for (k=0;k<pnzj;k++) {
2561: /* Locations in C are shifted by the offset within the block */
2562: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2563: if (!denserow[pjj[k]*ppdof+pshift]) {
2564: denserow[pjj[k]*ppdof+pshift] = -1;
2565: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2566: }
2567: }
2568: }
2570: /* sort sparserow */
2571: PetscSortInt(cnzi,sparserow);
2572:
2573: /* If free space is not available, make more free space */
2574: /* Double the amount of total space in the list */
2575: if (current_space->local_remaining<cnzi) {
2576: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
2577: }
2579: /* Copy data into free space, and zero out denserows */
2580: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2581: current_space->array += cnzi;
2582: current_space->local_used += cnzi;
2583: current_space->local_remaining -= cnzi;
2585: for (j=0;j<ptanzi;j++) {
2586: ptadenserow[ptasparserow[j]] = 0;
2587: }
2588: for (j=0;j<cnzi;j++) {
2589: denserow[sparserow[j]] = 0;
2590: }
2591: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2592: /* For now, we will recompute what is needed. */
2593: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2594: }
2595: }
2596: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2597: /* Allocate space for cj, initialize cj, and */
2598: /* destroy list of free space and other temporary array(s) */
2599: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2600: PetscFreeSpaceContiguous(&free_space,cj);
2601: PetscFree(ptadenserow);
2602:
2603: /* Allocate space for ca */
2604: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2605: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2606:
2607: /* put together the new matrix */
2608: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);
2610: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2611: /* Since these are PETSc arrays, change flags to free them as necessary. */
2612: c = (Mat_SeqAIJ *)((*C)->data);
2613: c->free_a = PETSC_TRUE;
2614: c->free_ij = PETSC_TRUE;
2615: c->nonew = 0;
2617: /* Clean up. */
2618: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2621: return(0);
2622: }
2626: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2627: {
2628: /* This routine requires testing -- first draft only */
2630: PetscInt flops=0;
2631: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2632: Mat P=pp->AIJ;
2633: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
2634: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
2635: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
2636: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2637: PetscInt *ci=c->i,*cj=c->j,*cjj;
2638: PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2639: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2640: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2643: /* Allocate temporary array for storage of one row of A*P */
2644: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2645: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));
2647: apj = (PetscInt *)(apa + cn);
2648: apjdense = apj + cn;
2650: /* Clear old values in C */
2651: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
2653: for (i=0;i<am;i++) {
2654: /* Form sparse row of A*P */
2655: anzi = ai[i+1] - ai[i];
2656: apnzj = 0;
2657: for (j=0;j<anzi;j++) {
2658: /* Get offset within block of P */
2659: pshift = *aj%ppdof;
2660: /* Get block row of P */
2661: prow = *aj++/ppdof; /* integer division */
2662: pnzj = pi[prow+1] - pi[prow];
2663: pjj = pj + pi[prow];
2664: paj = pa + pi[prow];
2665: for (k=0;k<pnzj;k++) {
2666: poffset = pjj[k]*ppdof+pshift;
2667: if (!apjdense[poffset]) {
2668: apjdense[poffset] = -1;
2669: apj[apnzj++] = poffset;
2670: }
2671: apa[poffset] += (*aa)*paj[k];
2672: }
2673: flops += 2*pnzj;
2674: aa++;
2675: }
2677: /* Sort the j index array for quick sparse axpy. */
2678: /* Note: a array does not need sorting as it is in dense storage locations. */
2679: PetscSortInt(apnzj,apj);
2681: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2682: prow = i/ppdof; /* integer division */
2683: pshift = i%ppdof;
2684: poffset = pi[prow];
2685: pnzi = pi[prow+1] - poffset;
2686: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2687: pJ = pj+poffset;
2688: pA = pa+poffset;
2689: for (j=0;j<pnzi;j++) {
2690: crow = (*pJ)*ppdof+pshift;
2691: cjj = cj + ci[crow];
2692: caj = ca + ci[crow];
2693: pJ++;
2694: /* Perform sparse axpy operation. Note cjj includes apj. */
2695: for (k=0,nextap=0;nextap<apnzj;k++) {
2696: if (cjj[k]==apj[nextap]) {
2697: caj[k] += (*pA)*apa[apj[nextap++]];
2698: }
2699: }
2700: flops += 2*apnzj;
2701: pA++;
2702: }
2704: /* Zero the current row info for A*P */
2705: for (j=0;j<apnzj;j++) {
2706: apa[apj[j]] = 0.;
2707: apjdense[apj[j]] = 0;
2708: }
2709: }
2711: /* Assemble the final matrix and clean up */
2712: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2713: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2714: PetscFree(apa);
2715: PetscLogFlops(flops);
2716: return(0);
2717: }
2721: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2722: {
2723: PetscErrorCode ierr;
2726: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2727: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2728: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2729: return(0);
2730: }
2734: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2735: {
2737: SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2738: return(0);
2739: }
2744: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2745: {
2746: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2747: Mat a = b->AIJ,B;
2748: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
2749: PetscErrorCode ierr;
2750: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2751: PetscInt *cols;
2752: PetscScalar *vals;
2755: MatGetSize(a,&m,&n);
2756: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2757: for (i=0; i<m; i++) {
2758: nmax = PetscMax(nmax,aij->ilen[i]);
2759: for (j=0; j<dof; j++) {
2760: ilen[dof*i+j] = aij->ilen[i];
2761: }
2762: }
2763: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2764: PetscFree(ilen);
2765: PetscMalloc(nmax*sizeof(PetscInt),&icols);
2766: ii = 0;
2767: for (i=0; i<m; i++) {
2768: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2769: for (j=0; j<dof; j++) {
2770: for (k=0; k<ncols; k++) {
2771: icols[k] = dof*cols[k]+j;
2772: }
2773: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2774: ii++;
2775: }
2776: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2777: }
2778: PetscFree(icols);
2779: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2780: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2782: if (reuse == MAT_REUSE_MATRIX) {
2783: MatHeaderReplace(A,B);
2784: } else {
2785: *newmat = B;
2786: }
2787: return(0);
2788: }
2791: #include src/mat/impls/aij/mpi/mpiaij.h
2796: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2797: {
2798: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
2799: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2800: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2801: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2802: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2803: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2804: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2805: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2806: PetscInt rstart,cstart,*garray,ii,k;
2807: PetscErrorCode ierr;
2808: PetscScalar *vals,*ovals;
2811: PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2812: for (i=0; i<A->rmap.n/dof; i++) {
2813: nmax = PetscMax(nmax,AIJ->ilen[i]);
2814: onmax = PetscMax(onmax,OAIJ->ilen[i]);
2815: for (j=0; j<dof; j++) {
2816: dnz[dof*i+j] = AIJ->ilen[i];
2817: onz[dof*i+j] = OAIJ->ilen[i];
2818: }
2819: }
2820: MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2821: PetscFree2(dnz,onz);
2823: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2824: rstart = dof*maij->A->rmap.rstart;
2825: cstart = dof*maij->A->cmap.rstart;
2826: garray = mpiaij->garray;
2828: ii = rstart;
2829: for (i=0; i<A->rmap.n/dof; i++) {
2830: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2831: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2832: for (j=0; j<dof; j++) {
2833: for (k=0; k<ncols; k++) {
2834: icols[k] = cstart + dof*cols[k]+j;
2835: }
2836: for (k=0; k<oncols; k++) {
2837: oicols[k] = dof*garray[ocols[k]]+j;
2838: }
2839: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2840: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2841: ii++;
2842: }
2843: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2844: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2845: }
2846: PetscFree2(icols,oicols);
2848: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2849: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2851: if (reuse == MAT_REUSE_MATRIX) {
2852: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
2853: ((PetscObject)A)->refct = 1;
2854: MatHeaderReplace(A,B);
2855: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2856: } else {
2857: *newmat = B;
2858: }
2859: return(0);
2860: }
2864: /* ---------------------------------------------------------------------------------- */
2865: /*MC
2866: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2867: operations for multicomponent problems. It interpolates each component the same
2868: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
2869: and MATMPIAIJ for distributed matrices.
2871: Operations provided:
2872: + MatMult
2873: . MatMultTranspose
2874: . MatMultAdd
2875: . MatMultTransposeAdd
2876: - MatView
2878: Level: advanced
2880: M*/
2883: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2884: {
2886: PetscMPIInt size;
2887: PetscInt n;
2888: Mat_MPIMAIJ *b;
2889: Mat B;
2892: PetscObjectReference((PetscObject)A);
2894: if (dof == 1) {
2895: *maij = A;
2896: } else {
2897: MatCreate(((PetscObject)A)->comm,&B);
2898: MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2899: B->assembled = PETSC_TRUE;
2901: MPI_Comm_size(((PetscObject)A)->comm,&size);
2902: if (size == 1) {
2903: MatSetType(B,MATSEQMAIJ);
2904: B->ops->destroy = MatDestroy_SeqMAIJ;
2905: B->ops->view = MatView_SeqMAIJ;
2906: b = (Mat_MPIMAIJ*)B->data;
2907: b->dof = dof;
2908: b->AIJ = A;
2909: if (dof == 2) {
2910: B->ops->mult = MatMult_SeqMAIJ_2;
2911: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
2912: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
2913: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2914: } else if (dof == 3) {
2915: B->ops->mult = MatMult_SeqMAIJ_3;
2916: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
2917: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
2918: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2919: } else if (dof == 4) {
2920: B->ops->mult = MatMult_SeqMAIJ_4;
2921: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
2922: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
2923: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2924: } else if (dof == 5) {
2925: B->ops->mult = MatMult_SeqMAIJ_5;
2926: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
2927: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
2928: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2929: } else if (dof == 6) {
2930: B->ops->mult = MatMult_SeqMAIJ_6;
2931: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
2932: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
2933: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2934: } else if (dof == 7) {
2935: B->ops->mult = MatMult_SeqMAIJ_7;
2936: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
2937: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
2938: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2939: } else if (dof == 8) {
2940: B->ops->mult = MatMult_SeqMAIJ_8;
2941: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
2942: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
2943: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2944: } else if (dof == 9) {
2945: B->ops->mult = MatMult_SeqMAIJ_9;
2946: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
2947: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
2948: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2949: } else if (dof == 10) {
2950: B->ops->mult = MatMult_SeqMAIJ_10;
2951: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
2952: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
2953: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2954: } else if (dof == 16) {
2955: B->ops->mult = MatMult_SeqMAIJ_16;
2956: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
2957: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
2958: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2959: } else if (dof == 18) {
2960: B->ops->mult = MatMult_SeqMAIJ_18;
2961: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
2962: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
2963: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
2964: } else {
2965: SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2966: }
2967: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2968: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2969: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2970: } else {
2971: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2972: IS from,to;
2973: Vec gvec;
2974: PetscInt *garray,i;
2976: MatSetType(B,MATMPIMAIJ);
2977: B->ops->destroy = MatDestroy_MPIMAIJ;
2978: B->ops->view = MatView_MPIMAIJ;
2979: b = (Mat_MPIMAIJ*)B->data;
2980: b->dof = dof;
2981: b->A = A;
2982: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2983: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
2985: VecGetSize(mpiaij->lvec,&n);
2986: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2987: VecSetBlockSize(b->w,dof);
2989: /* create two temporary Index sets for build scatter gather */
2990: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2991: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2992: ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
2993: PetscFree(garray);
2994: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
2996: /* create temporary global vector to generate scatter context */
2997: VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap.n,dof*A->cmap.N,PETSC_NULL,&gvec);
2998: VecSetBlockSize(gvec,dof);
3000: /* generate the scatter context */
3001: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3003: ISDestroy(from);
3004: ISDestroy(to);
3005: VecDestroy(gvec);
3007: B->ops->mult = MatMult_MPIMAIJ_dof;
3008: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3009: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3010: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3011: B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3012: B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3013: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3014: }
3015: *maij = B;
3016: MatView_Private(B);
3017: }
3018: return(0);
3019: }