Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include src/mat/impls/maij/maij.h
21: #include src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: PetscObjectChangeTypeName((PetscObject)A,0);
129: return(0);
130: }
132: /*MC
133: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
134: multicomponent problems, interpolating or restricting each component the same way independently.
135: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
137: Operations provided:
138: . MatMult
139: . MatMultTranspose
140: . MatMultAdd
141: . MatMultTransposeAdd
143: Level: advanced
145: .seealso: MatCreateSeqDense
146: M*/
151: PetscErrorCode MatCreate_MAIJ(Mat A)
152: {
154: Mat_MPIMAIJ *b;
155: PetscMPIInt size;
158: PetscNewLog(A,Mat_MPIMAIJ,&b);
159: A->data = (void*)b;
160: PetscMemzero(A->ops,sizeof(struct _MatOps));
161: A->factor = 0;
162: A->mapping = 0;
164: b->AIJ = 0;
165: b->dof = 0;
166: b->OAIJ = 0;
167: b->ctx = 0;
168: b->w = 0;
169: MPI_Comm_size(((PetscObject)A)->comm,&size);
170: if (size == 1){
171: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
172: } else {
173: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
174: }
175: return(0);
176: }
179: /* --------------------------------------------------------------------------------------*/
182: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
183: {
184: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
185: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
186: PetscScalar *x,*y,*v,sum1, sum2;
188: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
189: PetscInt n,i,jrow,j;
192: VecGetArray(xx,&x);
193: VecGetArray(yy,&y);
194: idx = a->j;
195: v = a->a;
196: ii = a->i;
198: for (i=0; i<m; i++) {
199: jrow = ii[i];
200: n = ii[i+1] - jrow;
201: sum1 = 0.0;
202: sum2 = 0.0;
203: nonzerorow += (n>0);
204: for (j=0; j<n; j++) {
205: sum1 += v[jrow]*x[2*idx[jrow]];
206: sum2 += v[jrow]*x[2*idx[jrow]+1];
207: jrow++;
208: }
209: y[2*i] = sum1;
210: y[2*i+1] = sum2;
211: }
213: PetscLogFlops(4*a->nz - 2*nonzerorow);
214: VecRestoreArray(xx,&x);
215: VecRestoreArray(yy,&y);
216: return(0);
217: }
221: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
222: {
223: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
224: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
225: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
227: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
230: VecSet(yy,zero);
231: VecGetArray(xx,&x);
232: VecGetArray(yy,&y);
233:
234: for (i=0; i<m; i++) {
235: idx = a->j + a->i[i] ;
236: v = a->a + a->i[i] ;
237: n = a->i[i+1] - a->i[i];
238: alpha1 = x[2*i];
239: alpha2 = x[2*i+1];
240: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
241: }
242: PetscLogFlops(4*a->nz);
243: VecRestoreArray(xx,&x);
244: VecRestoreArray(yy,&y);
245: return(0);
246: }
250: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
251: {
252: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
253: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
254: PetscScalar *x,*y,*v,sum1, sum2;
256: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
257: PetscInt n,i,jrow,j;
260: if (yy != zz) {VecCopy(yy,zz);}
261: VecGetArray(xx,&x);
262: VecGetArray(zz,&y);
263: idx = a->j;
264: v = a->a;
265: ii = a->i;
267: for (i=0; i<m; i++) {
268: jrow = ii[i];
269: n = ii[i+1] - jrow;
270: sum1 = 0.0;
271: sum2 = 0.0;
272: for (j=0; j<n; j++) {
273: sum1 += v[jrow]*x[2*idx[jrow]];
274: sum2 += v[jrow]*x[2*idx[jrow]+1];
275: jrow++;
276: }
277: y[2*i] += sum1;
278: y[2*i+1] += sum2;
279: }
281: PetscLogFlops(4*a->nz);
282: VecRestoreArray(xx,&x);
283: VecRestoreArray(zz,&y);
284: return(0);
285: }
288: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
289: {
290: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
291: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
292: PetscScalar *x,*y,*v,alpha1,alpha2;
294: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
297: if (yy != zz) {VecCopy(yy,zz);}
298: VecGetArray(xx,&x);
299: VecGetArray(zz,&y);
300:
301: for (i=0; i<m; i++) {
302: idx = a->j + a->i[i] ;
303: v = a->a + a->i[i] ;
304: n = a->i[i+1] - a->i[i];
305: alpha1 = x[2*i];
306: alpha2 = x[2*i+1];
307: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
308: }
309: PetscLogFlops(4*a->nz);
310: VecRestoreArray(xx,&x);
311: VecRestoreArray(zz,&y);
312: return(0);
313: }
314: /* --------------------------------------------------------------------------------------*/
317: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
318: {
319: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
320: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
321: PetscScalar *x,*y,*v,sum1, sum2, sum3;
323: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
324: PetscInt n,i,jrow,j;
327: VecGetArray(xx,&x);
328: VecGetArray(yy,&y);
329: idx = a->j;
330: v = a->a;
331: ii = a->i;
333: for (i=0; i<m; i++) {
334: jrow = ii[i];
335: n = ii[i+1] - jrow;
336: sum1 = 0.0;
337: sum2 = 0.0;
338: sum3 = 0.0;
339: nonzerorow += (n>0);
340: for (j=0; j<n; j++) {
341: sum1 += v[jrow]*x[3*idx[jrow]];
342: sum2 += v[jrow]*x[3*idx[jrow]+1];
343: sum3 += v[jrow]*x[3*idx[jrow]+2];
344: jrow++;
345: }
346: y[3*i] = sum1;
347: y[3*i+1] = sum2;
348: y[3*i+2] = sum3;
349: }
351: PetscLogFlops(6*a->nz - 3*nonzerorow);
352: VecRestoreArray(xx,&x);
353: VecRestoreArray(yy,&y);
354: return(0);
355: }
359: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
360: {
361: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
362: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
363: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
365: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
368: VecSet(yy,zero);
369: VecGetArray(xx,&x);
370: VecGetArray(yy,&y);
371:
372: for (i=0; i<m; i++) {
373: idx = a->j + a->i[i];
374: v = a->a + a->i[i];
375: n = a->i[i+1] - a->i[i];
376: alpha1 = x[3*i];
377: alpha2 = x[3*i+1];
378: alpha3 = x[3*i+2];
379: while (n-->0) {
380: y[3*(*idx)] += alpha1*(*v);
381: y[3*(*idx)+1] += alpha2*(*v);
382: y[3*(*idx)+2] += alpha3*(*v);
383: idx++; v++;
384: }
385: }
386: PetscLogFlops(6*a->nz);
387: VecRestoreArray(xx,&x);
388: VecRestoreArray(yy,&y);
389: return(0);
390: }
394: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
395: {
396: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
397: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
398: PetscScalar *x,*y,*v,sum1, sum2, sum3;
400: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
401: PetscInt n,i,jrow,j;
404: if (yy != zz) {VecCopy(yy,zz);}
405: VecGetArray(xx,&x);
406: VecGetArray(zz,&y);
407: idx = a->j;
408: v = a->a;
409: ii = a->i;
411: for (i=0; i<m; i++) {
412: jrow = ii[i];
413: n = ii[i+1] - jrow;
414: sum1 = 0.0;
415: sum2 = 0.0;
416: sum3 = 0.0;
417: for (j=0; j<n; j++) {
418: sum1 += v[jrow]*x[3*idx[jrow]];
419: sum2 += v[jrow]*x[3*idx[jrow]+1];
420: sum3 += v[jrow]*x[3*idx[jrow]+2];
421: jrow++;
422: }
423: y[3*i] += sum1;
424: y[3*i+1] += sum2;
425: y[3*i+2] += sum3;
426: }
428: PetscLogFlops(6*a->nz);
429: VecRestoreArray(xx,&x);
430: VecRestoreArray(zz,&y);
431: return(0);
432: }
435: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436: {
437: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
438: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
439: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
441: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
444: if (yy != zz) {VecCopy(yy,zz);}
445: VecGetArray(xx,&x);
446: VecGetArray(zz,&y);
447: for (i=0; i<m; i++) {
448: idx = a->j + a->i[i] ;
449: v = a->a + a->i[i] ;
450: n = a->i[i+1] - a->i[i];
451: alpha1 = x[3*i];
452: alpha2 = x[3*i+1];
453: alpha3 = x[3*i+2];
454: while (n-->0) {
455: y[3*(*idx)] += alpha1*(*v);
456: y[3*(*idx)+1] += alpha2*(*v);
457: y[3*(*idx)+2] += alpha3*(*v);
458: idx++; v++;
459: }
460: }
461: PetscLogFlops(6*a->nz);
462: VecRestoreArray(xx,&x);
463: VecRestoreArray(zz,&y);
464: return(0);
465: }
467: /* ------------------------------------------------------------------------------*/
470: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
471: {
472: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
473: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
474: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
476: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
477: PetscInt n,i,jrow,j;
480: VecGetArray(xx,&x);
481: VecGetArray(yy,&y);
482: idx = a->j;
483: v = a->a;
484: ii = a->i;
486: for (i=0; i<m; i++) {
487: jrow = ii[i];
488: n = ii[i+1] - jrow;
489: sum1 = 0.0;
490: sum2 = 0.0;
491: sum3 = 0.0;
492: sum4 = 0.0;
493: nonzerorow += (n>0);
494: for (j=0; j<n; j++) {
495: sum1 += v[jrow]*x[4*idx[jrow]];
496: sum2 += v[jrow]*x[4*idx[jrow]+1];
497: sum3 += v[jrow]*x[4*idx[jrow]+2];
498: sum4 += v[jrow]*x[4*idx[jrow]+3];
499: jrow++;
500: }
501: y[4*i] = sum1;
502: y[4*i+1] = sum2;
503: y[4*i+2] = sum3;
504: y[4*i+3] = sum4;
505: }
507: PetscLogFlops(8*a->nz - 4*nonzerorow);
508: VecRestoreArray(xx,&x);
509: VecRestoreArray(yy,&y);
510: return(0);
511: }
515: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
516: {
517: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
518: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
519: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
521: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
524: VecSet(yy,zero);
525: VecGetArray(xx,&x);
526: VecGetArray(yy,&y);
527: for (i=0; i<m; i++) {
528: idx = a->j + a->i[i] ;
529: v = a->a + a->i[i] ;
530: n = a->i[i+1] - a->i[i];
531: alpha1 = x[4*i];
532: alpha2 = x[4*i+1];
533: alpha3 = x[4*i+2];
534: alpha4 = x[4*i+3];
535: while (n-->0) {
536: y[4*(*idx)] += alpha1*(*v);
537: y[4*(*idx)+1] += alpha2*(*v);
538: y[4*(*idx)+2] += alpha3*(*v);
539: y[4*(*idx)+3] += alpha4*(*v);
540: idx++; v++;
541: }
542: }
543: PetscLogFlops(8*a->nz);
544: VecRestoreArray(xx,&x);
545: VecRestoreArray(yy,&y);
546: return(0);
547: }
551: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
552: {
553: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
554: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
555: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
557: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
558: PetscInt n,i,jrow,j;
561: if (yy != zz) {VecCopy(yy,zz);}
562: VecGetArray(xx,&x);
563: VecGetArray(zz,&y);
564: idx = a->j;
565: v = a->a;
566: ii = a->i;
568: for (i=0; i<m; i++) {
569: jrow = ii[i];
570: n = ii[i+1] - jrow;
571: sum1 = 0.0;
572: sum2 = 0.0;
573: sum3 = 0.0;
574: sum4 = 0.0;
575: for (j=0; j<n; j++) {
576: sum1 += v[jrow]*x[4*idx[jrow]];
577: sum2 += v[jrow]*x[4*idx[jrow]+1];
578: sum3 += v[jrow]*x[4*idx[jrow]+2];
579: sum4 += v[jrow]*x[4*idx[jrow]+3];
580: jrow++;
581: }
582: y[4*i] += sum1;
583: y[4*i+1] += sum2;
584: y[4*i+2] += sum3;
585: y[4*i+3] += sum4;
586: }
588: PetscLogFlops(8*a->nz);
589: VecRestoreArray(xx,&x);
590: VecRestoreArray(zz,&y);
591: return(0);
592: }
595: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596: {
597: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
598: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
599: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
601: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
604: if (yy != zz) {VecCopy(yy,zz);}
605: VecGetArray(xx,&x);
606: VecGetArray(zz,&y);
607:
608: for (i=0; i<m; i++) {
609: idx = a->j + a->i[i] ;
610: v = a->a + a->i[i] ;
611: n = a->i[i+1] - a->i[i];
612: alpha1 = x[4*i];
613: alpha2 = x[4*i+1];
614: alpha3 = x[4*i+2];
615: alpha4 = x[4*i+3];
616: while (n-->0) {
617: y[4*(*idx)] += alpha1*(*v);
618: y[4*(*idx)+1] += alpha2*(*v);
619: y[4*(*idx)+2] += alpha3*(*v);
620: y[4*(*idx)+3] += alpha4*(*v);
621: idx++; v++;
622: }
623: }
624: PetscLogFlops(8*a->nz);
625: VecRestoreArray(xx,&x);
626: VecRestoreArray(zz,&y);
627: return(0);
628: }
629: /* ------------------------------------------------------------------------------*/
633: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
634: {
635: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
636: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
637: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
639: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
640: PetscInt n,i,jrow,j;
643: VecGetArray(xx,&x);
644: VecGetArray(yy,&y);
645: idx = a->j;
646: v = a->a;
647: ii = a->i;
649: for (i=0; i<m; i++) {
650: jrow = ii[i];
651: n = ii[i+1] - jrow;
652: sum1 = 0.0;
653: sum2 = 0.0;
654: sum3 = 0.0;
655: sum4 = 0.0;
656: sum5 = 0.0;
657: nonzerorow += (n>0);
658: for (j=0; j<n; j++) {
659: sum1 += v[jrow]*x[5*idx[jrow]];
660: sum2 += v[jrow]*x[5*idx[jrow]+1];
661: sum3 += v[jrow]*x[5*idx[jrow]+2];
662: sum4 += v[jrow]*x[5*idx[jrow]+3];
663: sum5 += v[jrow]*x[5*idx[jrow]+4];
664: jrow++;
665: }
666: y[5*i] = sum1;
667: y[5*i+1] = sum2;
668: y[5*i+2] = sum3;
669: y[5*i+3] = sum4;
670: y[5*i+4] = sum5;
671: }
673: PetscLogFlops(10*a->nz - 5*nonzerorow);
674: VecRestoreArray(xx,&x);
675: VecRestoreArray(yy,&y);
676: return(0);
677: }
681: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
682: {
683: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
684: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
685: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
687: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
690: VecSet(yy,zero);
691: VecGetArray(xx,&x);
692: VecGetArray(yy,&y);
693:
694: for (i=0; i<m; i++) {
695: idx = a->j + a->i[i] ;
696: v = a->a + a->i[i] ;
697: n = a->i[i+1] - a->i[i];
698: alpha1 = x[5*i];
699: alpha2 = x[5*i+1];
700: alpha3 = x[5*i+2];
701: alpha4 = x[5*i+3];
702: alpha5 = x[5*i+4];
703: while (n-->0) {
704: y[5*(*idx)] += alpha1*(*v);
705: y[5*(*idx)+1] += alpha2*(*v);
706: y[5*(*idx)+2] += alpha3*(*v);
707: y[5*(*idx)+3] += alpha4*(*v);
708: y[5*(*idx)+4] += alpha5*(*v);
709: idx++; v++;
710: }
711: }
712: PetscLogFlops(10*a->nz);
713: VecRestoreArray(xx,&x);
714: VecRestoreArray(yy,&y);
715: return(0);
716: }
720: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
721: {
722: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
723: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
724: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
726: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
727: PetscInt n,i,jrow,j;
730: if (yy != zz) {VecCopy(yy,zz);}
731: VecGetArray(xx,&x);
732: VecGetArray(zz,&y);
733: idx = a->j;
734: v = a->a;
735: ii = a->i;
737: for (i=0; i<m; i++) {
738: jrow = ii[i];
739: n = ii[i+1] - jrow;
740: sum1 = 0.0;
741: sum2 = 0.0;
742: sum3 = 0.0;
743: sum4 = 0.0;
744: sum5 = 0.0;
745: for (j=0; j<n; j++) {
746: sum1 += v[jrow]*x[5*idx[jrow]];
747: sum2 += v[jrow]*x[5*idx[jrow]+1];
748: sum3 += v[jrow]*x[5*idx[jrow]+2];
749: sum4 += v[jrow]*x[5*idx[jrow]+3];
750: sum5 += v[jrow]*x[5*idx[jrow]+4];
751: jrow++;
752: }
753: y[5*i] += sum1;
754: y[5*i+1] += sum2;
755: y[5*i+2] += sum3;
756: y[5*i+3] += sum4;
757: y[5*i+4] += sum5;
758: }
760: PetscLogFlops(10*a->nz);
761: VecRestoreArray(xx,&x);
762: VecRestoreArray(zz,&y);
763: return(0);
764: }
768: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
769: {
770: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
771: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
772: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
774: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
777: if (yy != zz) {VecCopy(yy,zz);}
778: VecGetArray(xx,&x);
779: VecGetArray(zz,&y);
780:
781: for (i=0; i<m; i++) {
782: idx = a->j + a->i[i] ;
783: v = a->a + a->i[i] ;
784: n = a->i[i+1] - a->i[i];
785: alpha1 = x[5*i];
786: alpha2 = x[5*i+1];
787: alpha3 = x[5*i+2];
788: alpha4 = x[5*i+3];
789: alpha5 = x[5*i+4];
790: while (n-->0) {
791: y[5*(*idx)] += alpha1*(*v);
792: y[5*(*idx)+1] += alpha2*(*v);
793: y[5*(*idx)+2] += alpha3*(*v);
794: y[5*(*idx)+3] += alpha4*(*v);
795: y[5*(*idx)+4] += alpha5*(*v);
796: idx++; v++;
797: }
798: }
799: PetscLogFlops(10*a->nz);
800: VecRestoreArray(xx,&x);
801: VecRestoreArray(zz,&y);
802: return(0);
803: }
805: /* ------------------------------------------------------------------------------*/
808: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
809: {
810: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
811: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
812: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
814: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
815: PetscInt n,i,jrow,j;
818: VecGetArray(xx,&x);
819: VecGetArray(yy,&y);
820: idx = a->j;
821: v = a->a;
822: ii = a->i;
824: for (i=0; i<m; i++) {
825: jrow = ii[i];
826: n = ii[i+1] - jrow;
827: sum1 = 0.0;
828: sum2 = 0.0;
829: sum3 = 0.0;
830: sum4 = 0.0;
831: sum5 = 0.0;
832: sum6 = 0.0;
833: nonzerorow += (n>0);
834: for (j=0; j<n; j++) {
835: sum1 += v[jrow]*x[6*idx[jrow]];
836: sum2 += v[jrow]*x[6*idx[jrow]+1];
837: sum3 += v[jrow]*x[6*idx[jrow]+2];
838: sum4 += v[jrow]*x[6*idx[jrow]+3];
839: sum5 += v[jrow]*x[6*idx[jrow]+4];
840: sum6 += v[jrow]*x[6*idx[jrow]+5];
841: jrow++;
842: }
843: y[6*i] = sum1;
844: y[6*i+1] = sum2;
845: y[6*i+2] = sum3;
846: y[6*i+3] = sum4;
847: y[6*i+4] = sum5;
848: y[6*i+5] = sum6;
849: }
851: PetscLogFlops(12*a->nz - 6*nonzerorow);
852: VecRestoreArray(xx,&x);
853: VecRestoreArray(yy,&y);
854: return(0);
855: }
859: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
860: {
861: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
862: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
863: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
865: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
868: VecSet(yy,zero);
869: VecGetArray(xx,&x);
870: VecGetArray(yy,&y);
872: for (i=0; i<m; i++) {
873: idx = a->j + a->i[i] ;
874: v = a->a + a->i[i] ;
875: n = a->i[i+1] - a->i[i];
876: alpha1 = x[6*i];
877: alpha2 = x[6*i+1];
878: alpha3 = x[6*i+2];
879: alpha4 = x[6*i+3];
880: alpha5 = x[6*i+4];
881: alpha6 = x[6*i+5];
882: while (n-->0) {
883: y[6*(*idx)] += alpha1*(*v);
884: y[6*(*idx)+1] += alpha2*(*v);
885: y[6*(*idx)+2] += alpha3*(*v);
886: y[6*(*idx)+3] += alpha4*(*v);
887: y[6*(*idx)+4] += alpha5*(*v);
888: y[6*(*idx)+5] += alpha6*(*v);
889: idx++; v++;
890: }
891: }
892: PetscLogFlops(12*a->nz);
893: VecRestoreArray(xx,&x);
894: VecRestoreArray(yy,&y);
895: return(0);
896: }
900: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
901: {
902: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
903: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
904: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
906: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
907: PetscInt n,i,jrow,j;
910: if (yy != zz) {VecCopy(yy,zz);}
911: VecGetArray(xx,&x);
912: VecGetArray(zz,&y);
913: idx = a->j;
914: v = a->a;
915: ii = a->i;
917: for (i=0; i<m; i++) {
918: jrow = ii[i];
919: n = ii[i+1] - jrow;
920: sum1 = 0.0;
921: sum2 = 0.0;
922: sum3 = 0.0;
923: sum4 = 0.0;
924: sum5 = 0.0;
925: sum6 = 0.0;
926: for (j=0; j<n; j++) {
927: sum1 += v[jrow]*x[6*idx[jrow]];
928: sum2 += v[jrow]*x[6*idx[jrow]+1];
929: sum3 += v[jrow]*x[6*idx[jrow]+2];
930: sum4 += v[jrow]*x[6*idx[jrow]+3];
931: sum5 += v[jrow]*x[6*idx[jrow]+4];
932: sum6 += v[jrow]*x[6*idx[jrow]+5];
933: jrow++;
934: }
935: y[6*i] += sum1;
936: y[6*i+1] += sum2;
937: y[6*i+2] += sum3;
938: y[6*i+3] += sum4;
939: y[6*i+4] += sum5;
940: y[6*i+5] += sum6;
941: }
943: PetscLogFlops(12*a->nz);
944: VecRestoreArray(xx,&x);
945: VecRestoreArray(zz,&y);
946: return(0);
947: }
951: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
952: {
953: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
954: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
955: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
957: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
960: if (yy != zz) {VecCopy(yy,zz);}
961: VecGetArray(xx,&x);
962: VecGetArray(zz,&y);
963:
964: for (i=0; i<m; i++) {
965: idx = a->j + a->i[i] ;
966: v = a->a + a->i[i] ;
967: n = a->i[i+1] - a->i[i];
968: alpha1 = x[6*i];
969: alpha2 = x[6*i+1];
970: alpha3 = x[6*i+2];
971: alpha4 = x[6*i+3];
972: alpha5 = x[6*i+4];
973: alpha6 = x[6*i+5];
974: while (n-->0) {
975: y[6*(*idx)] += alpha1*(*v);
976: y[6*(*idx)+1] += alpha2*(*v);
977: y[6*(*idx)+2] += alpha3*(*v);
978: y[6*(*idx)+3] += alpha4*(*v);
979: y[6*(*idx)+4] += alpha5*(*v);
980: y[6*(*idx)+5] += alpha6*(*v);
981: idx++; v++;
982: }
983: }
984: PetscLogFlops(12*a->nz);
985: VecRestoreArray(xx,&x);
986: VecRestoreArray(zz,&y);
987: return(0);
988: }
990: /* ------------------------------------------------------------------------------*/
993: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
994: {
995: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
996: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
997: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
999: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1000: PetscInt n,i,jrow,j;
1003: VecGetArray(xx,&x);
1004: VecGetArray(yy,&y);
1005: idx = a->j;
1006: v = a->a;
1007: ii = a->i;
1009: for (i=0; i<m; i++) {
1010: jrow = ii[i];
1011: n = ii[i+1] - jrow;
1012: sum1 = 0.0;
1013: sum2 = 0.0;
1014: sum3 = 0.0;
1015: sum4 = 0.0;
1016: sum5 = 0.0;
1017: sum6 = 0.0;
1018: sum7 = 0.0;
1019: nonzerorow += (n>0);
1020: for (j=0; j<n; j++) {
1021: sum1 += v[jrow]*x[7*idx[jrow]];
1022: sum2 += v[jrow]*x[7*idx[jrow]+1];
1023: sum3 += v[jrow]*x[7*idx[jrow]+2];
1024: sum4 += v[jrow]*x[7*idx[jrow]+3];
1025: sum5 += v[jrow]*x[7*idx[jrow]+4];
1026: sum6 += v[jrow]*x[7*idx[jrow]+5];
1027: sum7 += v[jrow]*x[7*idx[jrow]+6];
1028: jrow++;
1029: }
1030: y[7*i] = sum1;
1031: y[7*i+1] = sum2;
1032: y[7*i+2] = sum3;
1033: y[7*i+3] = sum4;
1034: y[7*i+4] = sum5;
1035: y[7*i+5] = sum6;
1036: y[7*i+6] = sum7;
1037: }
1039: PetscLogFlops(14*a->nz - 7*nonzerorow);
1040: VecRestoreArray(xx,&x);
1041: VecRestoreArray(yy,&y);
1042: return(0);
1043: }
1047: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1048: {
1049: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1050: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1051: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1053: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1056: VecSet(yy,zero);
1057: VecGetArray(xx,&x);
1058: VecGetArray(yy,&y);
1060: for (i=0; i<m; i++) {
1061: idx = a->j + a->i[i] ;
1062: v = a->a + a->i[i] ;
1063: n = a->i[i+1] - a->i[i];
1064: alpha1 = x[7*i];
1065: alpha2 = x[7*i+1];
1066: alpha3 = x[7*i+2];
1067: alpha4 = x[7*i+3];
1068: alpha5 = x[7*i+4];
1069: alpha6 = x[7*i+5];
1070: alpha7 = x[7*i+6];
1071: while (n-->0) {
1072: y[7*(*idx)] += alpha1*(*v);
1073: y[7*(*idx)+1] += alpha2*(*v);
1074: y[7*(*idx)+2] += alpha3*(*v);
1075: y[7*(*idx)+3] += alpha4*(*v);
1076: y[7*(*idx)+4] += alpha5*(*v);
1077: y[7*(*idx)+5] += alpha6*(*v);
1078: y[7*(*idx)+6] += alpha7*(*v);
1079: idx++; v++;
1080: }
1081: }
1082: PetscLogFlops(14*a->nz);
1083: VecRestoreArray(xx,&x);
1084: VecRestoreArray(yy,&y);
1085: return(0);
1086: }
1090: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1091: {
1092: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1093: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1094: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1096: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1097: PetscInt n,i,jrow,j;
1100: if (yy != zz) {VecCopy(yy,zz);}
1101: VecGetArray(xx,&x);
1102: VecGetArray(zz,&y);
1103: idx = a->j;
1104: v = a->a;
1105: ii = a->i;
1107: for (i=0; i<m; i++) {
1108: jrow = ii[i];
1109: n = ii[i+1] - jrow;
1110: sum1 = 0.0;
1111: sum2 = 0.0;
1112: sum3 = 0.0;
1113: sum4 = 0.0;
1114: sum5 = 0.0;
1115: sum6 = 0.0;
1116: sum7 = 0.0;
1117: for (j=0; j<n; j++) {
1118: sum1 += v[jrow]*x[7*idx[jrow]];
1119: sum2 += v[jrow]*x[7*idx[jrow]+1];
1120: sum3 += v[jrow]*x[7*idx[jrow]+2];
1121: sum4 += v[jrow]*x[7*idx[jrow]+3];
1122: sum5 += v[jrow]*x[7*idx[jrow]+4];
1123: sum6 += v[jrow]*x[7*idx[jrow]+5];
1124: sum7 += v[jrow]*x[7*idx[jrow]+6];
1125: jrow++;
1126: }
1127: y[7*i] += sum1;
1128: y[7*i+1] += sum2;
1129: y[7*i+2] += sum3;
1130: y[7*i+3] += sum4;
1131: y[7*i+4] += sum5;
1132: y[7*i+5] += sum6;
1133: y[7*i+6] += sum7;
1134: }
1136: PetscLogFlops(14*a->nz);
1137: VecRestoreArray(xx,&x);
1138: VecRestoreArray(zz,&y);
1139: return(0);
1140: }
1144: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1145: {
1146: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1147: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1148: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1150: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1153: if (yy != zz) {VecCopy(yy,zz);}
1154: VecGetArray(xx,&x);
1155: VecGetArray(zz,&y);
1156: for (i=0; i<m; i++) {
1157: idx = a->j + a->i[i] ;
1158: v = a->a + a->i[i] ;
1159: n = a->i[i+1] - a->i[i];
1160: alpha1 = x[7*i];
1161: alpha2 = x[7*i+1];
1162: alpha3 = x[7*i+2];
1163: alpha4 = x[7*i+3];
1164: alpha5 = x[7*i+4];
1165: alpha6 = x[7*i+5];
1166: alpha7 = x[7*i+6];
1167: while (n-->0) {
1168: y[7*(*idx)] += alpha1*(*v);
1169: y[7*(*idx)+1] += alpha2*(*v);
1170: y[7*(*idx)+2] += alpha3*(*v);
1171: y[7*(*idx)+3] += alpha4*(*v);
1172: y[7*(*idx)+4] += alpha5*(*v);
1173: y[7*(*idx)+5] += alpha6*(*v);
1174: y[7*(*idx)+6] += alpha7*(*v);
1175: idx++; v++;
1176: }
1177: }
1178: PetscLogFlops(14*a->nz);
1179: VecRestoreArray(xx,&x);
1180: VecRestoreArray(zz,&y);
1181: return(0);
1182: }
1186: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1187: {
1188: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1189: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1190: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1192: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1193: PetscInt n,i,jrow,j;
1196: VecGetArray(xx,&x);
1197: VecGetArray(yy,&y);
1198: idx = a->j;
1199: v = a->a;
1200: ii = a->i;
1202: for (i=0; i<m; i++) {
1203: jrow = ii[i];
1204: n = ii[i+1] - jrow;
1205: sum1 = 0.0;
1206: sum2 = 0.0;
1207: sum3 = 0.0;
1208: sum4 = 0.0;
1209: sum5 = 0.0;
1210: sum6 = 0.0;
1211: sum7 = 0.0;
1212: sum8 = 0.0;
1213: nonzerorow += (n>0);
1214: for (j=0; j<n; j++) {
1215: sum1 += v[jrow]*x[8*idx[jrow]];
1216: sum2 += v[jrow]*x[8*idx[jrow]+1];
1217: sum3 += v[jrow]*x[8*idx[jrow]+2];
1218: sum4 += v[jrow]*x[8*idx[jrow]+3];
1219: sum5 += v[jrow]*x[8*idx[jrow]+4];
1220: sum6 += v[jrow]*x[8*idx[jrow]+5];
1221: sum7 += v[jrow]*x[8*idx[jrow]+6];
1222: sum8 += v[jrow]*x[8*idx[jrow]+7];
1223: jrow++;
1224: }
1225: y[8*i] = sum1;
1226: y[8*i+1] = sum2;
1227: y[8*i+2] = sum3;
1228: y[8*i+3] = sum4;
1229: y[8*i+4] = sum5;
1230: y[8*i+5] = sum6;
1231: y[8*i+6] = sum7;
1232: y[8*i+7] = sum8;
1233: }
1235: PetscLogFlops(16*a->nz - 8*nonzerorow);
1236: VecRestoreArray(xx,&x);
1237: VecRestoreArray(yy,&y);
1238: return(0);
1239: }
1243: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1244: {
1245: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1246: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1247: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1249: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1252: VecSet(yy,zero);
1253: VecGetArray(xx,&x);
1254: VecGetArray(yy,&y);
1256: for (i=0; i<m; i++) {
1257: idx = a->j + a->i[i] ;
1258: v = a->a + a->i[i] ;
1259: n = a->i[i+1] - a->i[i];
1260: alpha1 = x[8*i];
1261: alpha2 = x[8*i+1];
1262: alpha3 = x[8*i+2];
1263: alpha4 = x[8*i+3];
1264: alpha5 = x[8*i+4];
1265: alpha6 = x[8*i+5];
1266: alpha7 = x[8*i+6];
1267: alpha8 = x[8*i+7];
1268: while (n-->0) {
1269: y[8*(*idx)] += alpha1*(*v);
1270: y[8*(*idx)+1] += alpha2*(*v);
1271: y[8*(*idx)+2] += alpha3*(*v);
1272: y[8*(*idx)+3] += alpha4*(*v);
1273: y[8*(*idx)+4] += alpha5*(*v);
1274: y[8*(*idx)+5] += alpha6*(*v);
1275: y[8*(*idx)+6] += alpha7*(*v);
1276: y[8*(*idx)+7] += alpha8*(*v);
1277: idx++; v++;
1278: }
1279: }
1280: PetscLogFlops(16*a->nz);
1281: VecRestoreArray(xx,&x);
1282: VecRestoreArray(yy,&y);
1283: return(0);
1284: }
1288: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1289: {
1290: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1291: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1292: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1294: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1295: PetscInt n,i,jrow,j;
1298: if (yy != zz) {VecCopy(yy,zz);}
1299: VecGetArray(xx,&x);
1300: VecGetArray(zz,&y);
1301: idx = a->j;
1302: v = a->a;
1303: ii = a->i;
1305: for (i=0; i<m; i++) {
1306: jrow = ii[i];
1307: n = ii[i+1] - jrow;
1308: sum1 = 0.0;
1309: sum2 = 0.0;
1310: sum3 = 0.0;
1311: sum4 = 0.0;
1312: sum5 = 0.0;
1313: sum6 = 0.0;
1314: sum7 = 0.0;
1315: sum8 = 0.0;
1316: for (j=0; j<n; j++) {
1317: sum1 += v[jrow]*x[8*idx[jrow]];
1318: sum2 += v[jrow]*x[8*idx[jrow]+1];
1319: sum3 += v[jrow]*x[8*idx[jrow]+2];
1320: sum4 += v[jrow]*x[8*idx[jrow]+3];
1321: sum5 += v[jrow]*x[8*idx[jrow]+4];
1322: sum6 += v[jrow]*x[8*idx[jrow]+5];
1323: sum7 += v[jrow]*x[8*idx[jrow]+6];
1324: sum8 += v[jrow]*x[8*idx[jrow]+7];
1325: jrow++;
1326: }
1327: y[8*i] += sum1;
1328: y[8*i+1] += sum2;
1329: y[8*i+2] += sum3;
1330: y[8*i+3] += sum4;
1331: y[8*i+4] += sum5;
1332: y[8*i+5] += sum6;
1333: y[8*i+6] += sum7;
1334: y[8*i+7] += sum8;
1335: }
1337: PetscLogFlops(16*a->nz);
1338: VecRestoreArray(xx,&x);
1339: VecRestoreArray(zz,&y);
1340: return(0);
1341: }
1345: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1346: {
1347: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1348: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1349: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1351: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1354: if (yy != zz) {VecCopy(yy,zz);}
1355: VecGetArray(xx,&x);
1356: VecGetArray(zz,&y);
1357: for (i=0; i<m; i++) {
1358: idx = a->j + a->i[i] ;
1359: v = a->a + a->i[i] ;
1360: n = a->i[i+1] - a->i[i];
1361: alpha1 = x[8*i];
1362: alpha2 = x[8*i+1];
1363: alpha3 = x[8*i+2];
1364: alpha4 = x[8*i+3];
1365: alpha5 = x[8*i+4];
1366: alpha6 = x[8*i+5];
1367: alpha7 = x[8*i+6];
1368: alpha8 = x[8*i+7];
1369: while (n-->0) {
1370: y[8*(*idx)] += alpha1*(*v);
1371: y[8*(*idx)+1] += alpha2*(*v);
1372: y[8*(*idx)+2] += alpha3*(*v);
1373: y[8*(*idx)+3] += alpha4*(*v);
1374: y[8*(*idx)+4] += alpha5*(*v);
1375: y[8*(*idx)+5] += alpha6*(*v);
1376: y[8*(*idx)+6] += alpha7*(*v);
1377: y[8*(*idx)+7] += alpha8*(*v);
1378: idx++; v++;
1379: }
1380: }
1381: PetscLogFlops(16*a->nz);
1382: VecRestoreArray(xx,&x);
1383: VecRestoreArray(zz,&y);
1384: return(0);
1385: }
1387: /* ------------------------------------------------------------------------------*/
1390: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1391: {
1392: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1393: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1394: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1396: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1397: PetscInt n,i,jrow,j;
1400: VecGetArray(xx,&x);
1401: VecGetArray(yy,&y);
1402: idx = a->j;
1403: v = a->a;
1404: ii = a->i;
1406: for (i=0; i<m; i++) {
1407: jrow = ii[i];
1408: n = ii[i+1] - jrow;
1409: sum1 = 0.0;
1410: sum2 = 0.0;
1411: sum3 = 0.0;
1412: sum4 = 0.0;
1413: sum5 = 0.0;
1414: sum6 = 0.0;
1415: sum7 = 0.0;
1416: sum8 = 0.0;
1417: sum9 = 0.0;
1418: nonzerorow += (n>0);
1419: for (j=0; j<n; j++) {
1420: sum1 += v[jrow]*x[9*idx[jrow]];
1421: sum2 += v[jrow]*x[9*idx[jrow]+1];
1422: sum3 += v[jrow]*x[9*idx[jrow]+2];
1423: sum4 += v[jrow]*x[9*idx[jrow]+3];
1424: sum5 += v[jrow]*x[9*idx[jrow]+4];
1425: sum6 += v[jrow]*x[9*idx[jrow]+5];
1426: sum7 += v[jrow]*x[9*idx[jrow]+6];
1427: sum8 += v[jrow]*x[9*idx[jrow]+7];
1428: sum9 += v[jrow]*x[9*idx[jrow]+8];
1429: jrow++;
1430: }
1431: y[9*i] = sum1;
1432: y[9*i+1] = sum2;
1433: y[9*i+2] = sum3;
1434: y[9*i+3] = sum4;
1435: y[9*i+4] = sum5;
1436: y[9*i+5] = sum6;
1437: y[9*i+6] = sum7;
1438: y[9*i+7] = sum8;
1439: y[9*i+8] = sum9;
1440: }
1442: PetscLogFlops(18*a->nz - 9*nonzerorow);
1443: VecRestoreArray(xx,&x);
1444: VecRestoreArray(yy,&y);
1445: return(0);
1446: }
1448: /* ------------------------------------------------------------------------------*/
1452: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1453: {
1454: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1455: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1456: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1458: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1461: VecSet(yy,zero);
1462: VecGetArray(xx,&x);
1463: VecGetArray(yy,&y);
1465: for (i=0; i<m; i++) {
1466: idx = a->j + a->i[i] ;
1467: v = a->a + a->i[i] ;
1468: n = a->i[i+1] - a->i[i];
1469: alpha1 = x[9*i];
1470: alpha2 = x[9*i+1];
1471: alpha3 = x[9*i+2];
1472: alpha4 = x[9*i+3];
1473: alpha5 = x[9*i+4];
1474: alpha6 = x[9*i+5];
1475: alpha7 = x[9*i+6];
1476: alpha8 = x[9*i+7];
1477: alpha9 = x[9*i+8];
1478: while (n-->0) {
1479: y[9*(*idx)] += alpha1*(*v);
1480: y[9*(*idx)+1] += alpha2*(*v);
1481: y[9*(*idx)+2] += alpha3*(*v);
1482: y[9*(*idx)+3] += alpha4*(*v);
1483: y[9*(*idx)+4] += alpha5*(*v);
1484: y[9*(*idx)+5] += alpha6*(*v);
1485: y[9*(*idx)+6] += alpha7*(*v);
1486: y[9*(*idx)+7] += alpha8*(*v);
1487: y[9*(*idx)+8] += alpha9*(*v);
1488: idx++; v++;
1489: }
1490: }
1491: PetscLogFlops(18*a->nz);
1492: VecRestoreArray(xx,&x);
1493: VecRestoreArray(yy,&y);
1494: return(0);
1495: }
1499: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1500: {
1501: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1502: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1503: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1505: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1506: PetscInt n,i,jrow,j;
1509: if (yy != zz) {VecCopy(yy,zz);}
1510: VecGetArray(xx,&x);
1511: VecGetArray(zz,&y);
1512: idx = a->j;
1513: v = a->a;
1514: ii = a->i;
1516: for (i=0; i<m; i++) {
1517: jrow = ii[i];
1518: n = ii[i+1] - jrow;
1519: sum1 = 0.0;
1520: sum2 = 0.0;
1521: sum3 = 0.0;
1522: sum4 = 0.0;
1523: sum5 = 0.0;
1524: sum6 = 0.0;
1525: sum7 = 0.0;
1526: sum8 = 0.0;
1527: sum9 = 0.0;
1528: for (j=0; j<n; j++) {
1529: sum1 += v[jrow]*x[9*idx[jrow]];
1530: sum2 += v[jrow]*x[9*idx[jrow]+1];
1531: sum3 += v[jrow]*x[9*idx[jrow]+2];
1532: sum4 += v[jrow]*x[9*idx[jrow]+3];
1533: sum5 += v[jrow]*x[9*idx[jrow]+4];
1534: sum6 += v[jrow]*x[9*idx[jrow]+5];
1535: sum7 += v[jrow]*x[9*idx[jrow]+6];
1536: sum8 += v[jrow]*x[9*idx[jrow]+7];
1537: sum9 += v[jrow]*x[9*idx[jrow]+8];
1538: jrow++;
1539: }
1540: y[9*i] += sum1;
1541: y[9*i+1] += sum2;
1542: y[9*i+2] += sum3;
1543: y[9*i+3] += sum4;
1544: y[9*i+4] += sum5;
1545: y[9*i+5] += sum6;
1546: y[9*i+6] += sum7;
1547: y[9*i+7] += sum8;
1548: y[9*i+8] += sum9;
1549: }
1551: PetscLogFlops(18*a->nz);
1552: VecRestoreArray(xx,&x);
1553: VecRestoreArray(zz,&y);
1554: return(0);
1555: }
1559: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1560: {
1561: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1562: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1563: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1565: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1568: if (yy != zz) {VecCopy(yy,zz);}
1569: VecGetArray(xx,&x);
1570: VecGetArray(zz,&y);
1571: for (i=0; i<m; i++) {
1572: idx = a->j + a->i[i] ;
1573: v = a->a + a->i[i] ;
1574: n = a->i[i+1] - a->i[i];
1575: alpha1 = x[9*i];
1576: alpha2 = x[9*i+1];
1577: alpha3 = x[9*i+2];
1578: alpha4 = x[9*i+3];
1579: alpha5 = x[9*i+4];
1580: alpha6 = x[9*i+5];
1581: alpha7 = x[9*i+6];
1582: alpha8 = x[9*i+7];
1583: alpha9 = x[9*i+8];
1584: while (n-->0) {
1585: y[9*(*idx)] += alpha1*(*v);
1586: y[9*(*idx)+1] += alpha2*(*v);
1587: y[9*(*idx)+2] += alpha3*(*v);
1588: y[9*(*idx)+3] += alpha4*(*v);
1589: y[9*(*idx)+4] += alpha5*(*v);
1590: y[9*(*idx)+5] += alpha6*(*v);
1591: y[9*(*idx)+6] += alpha7*(*v);
1592: y[9*(*idx)+7] += alpha8*(*v);
1593: y[9*(*idx)+8] += alpha9*(*v);
1594: idx++; v++;
1595: }
1596: }
1597: PetscLogFlops(18*a->nz);
1598: VecRestoreArray(xx,&x);
1599: VecRestoreArray(zz,&y);
1600: return(0);
1601: }
1602: /*--------------------------------------------------------------------------------------------*/
1605: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1606: {
1607: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1608: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1609: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1611: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1612: PetscInt n,i,jrow,j;
1615: VecGetArray(xx,&x);
1616: VecGetArray(yy,&y);
1617: idx = a->j;
1618: v = a->a;
1619: ii = a->i;
1621: for (i=0; i<m; i++) {
1622: jrow = ii[i];
1623: n = ii[i+1] - jrow;
1624: sum1 = 0.0;
1625: sum2 = 0.0;
1626: sum3 = 0.0;
1627: sum4 = 0.0;
1628: sum5 = 0.0;
1629: sum6 = 0.0;
1630: sum7 = 0.0;
1631: sum8 = 0.0;
1632: sum9 = 0.0;
1633: sum10 = 0.0;
1634: nonzerorow += (n>0);
1635: for (j=0; j<n; j++) {
1636: sum1 += v[jrow]*x[10*idx[jrow]];
1637: sum2 += v[jrow]*x[10*idx[jrow]+1];
1638: sum3 += v[jrow]*x[10*idx[jrow]+2];
1639: sum4 += v[jrow]*x[10*idx[jrow]+3];
1640: sum5 += v[jrow]*x[10*idx[jrow]+4];
1641: sum6 += v[jrow]*x[10*idx[jrow]+5];
1642: sum7 += v[jrow]*x[10*idx[jrow]+6];
1643: sum8 += v[jrow]*x[10*idx[jrow]+7];
1644: sum9 += v[jrow]*x[10*idx[jrow]+8];
1645: sum10 += v[jrow]*x[10*idx[jrow]+9];
1646: jrow++;
1647: }
1648: y[10*i] = sum1;
1649: y[10*i+1] = sum2;
1650: y[10*i+2] = sum3;
1651: y[10*i+3] = sum4;
1652: y[10*i+4] = sum5;
1653: y[10*i+5] = sum6;
1654: y[10*i+6] = sum7;
1655: y[10*i+7] = sum8;
1656: y[10*i+8] = sum9;
1657: y[10*i+9] = sum10;
1658: }
1660: PetscLogFlops(20*a->nz - 10*nonzerorow);
1661: VecRestoreArray(xx,&x);
1662: VecRestoreArray(yy,&y);
1663: return(0);
1664: }
1668: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1669: {
1670: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1671: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1672: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1674: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1675: PetscInt n,i,jrow,j;
1678: if (yy != zz) {VecCopy(yy,zz);}
1679: VecGetArray(xx,&x);
1680: VecGetArray(zz,&y);
1681: idx = a->j;
1682: v = a->a;
1683: ii = a->i;
1685: for (i=0; i<m; i++) {
1686: jrow = ii[i];
1687: n = ii[i+1] - jrow;
1688: sum1 = 0.0;
1689: sum2 = 0.0;
1690: sum3 = 0.0;
1691: sum4 = 0.0;
1692: sum5 = 0.0;
1693: sum6 = 0.0;
1694: sum7 = 0.0;
1695: sum8 = 0.0;
1696: sum9 = 0.0;
1697: sum10 = 0.0;
1698: for (j=0; j<n; j++) {
1699: sum1 += v[jrow]*x[10*idx[jrow]];
1700: sum2 += v[jrow]*x[10*idx[jrow]+1];
1701: sum3 += v[jrow]*x[10*idx[jrow]+2];
1702: sum4 += v[jrow]*x[10*idx[jrow]+3];
1703: sum5 += v[jrow]*x[10*idx[jrow]+4];
1704: sum6 += v[jrow]*x[10*idx[jrow]+5];
1705: sum7 += v[jrow]*x[10*idx[jrow]+6];
1706: sum8 += v[jrow]*x[10*idx[jrow]+7];
1707: sum9 += v[jrow]*x[10*idx[jrow]+8];
1708: sum10 += v[jrow]*x[10*idx[jrow]+9];
1709: jrow++;
1710: }
1711: y[10*i] += sum1;
1712: y[10*i+1] += sum2;
1713: y[10*i+2] += sum3;
1714: y[10*i+3] += sum4;
1715: y[10*i+4] += sum5;
1716: y[10*i+5] += sum6;
1717: y[10*i+6] += sum7;
1718: y[10*i+7] += sum8;
1719: y[10*i+8] += sum9;
1720: y[10*i+9] += sum10;
1721: }
1723: PetscLogFlops(20*a->nz);
1724: VecRestoreArray(xx,&x);
1725: VecRestoreArray(yy,&y);
1726: return(0);
1727: }
1731: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1732: {
1733: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1734: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1735: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1737: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1740: VecSet(yy,zero);
1741: VecGetArray(xx,&x);
1742: VecGetArray(yy,&y);
1744: for (i=0; i<m; i++) {
1745: idx = a->j + a->i[i] ;
1746: v = a->a + a->i[i] ;
1747: n = a->i[i+1] - a->i[i];
1748: alpha1 = x[10*i];
1749: alpha2 = x[10*i+1];
1750: alpha3 = x[10*i+2];
1751: alpha4 = x[10*i+3];
1752: alpha5 = x[10*i+4];
1753: alpha6 = x[10*i+5];
1754: alpha7 = x[10*i+6];
1755: alpha8 = x[10*i+7];
1756: alpha9 = x[10*i+8];
1757: alpha10 = x[10*i+9];
1758: while (n-->0) {
1759: y[10*(*idx)] += alpha1*(*v);
1760: y[10*(*idx)+1] += alpha2*(*v);
1761: y[10*(*idx)+2] += alpha3*(*v);
1762: y[10*(*idx)+3] += alpha4*(*v);
1763: y[10*(*idx)+4] += alpha5*(*v);
1764: y[10*(*idx)+5] += alpha6*(*v);
1765: y[10*(*idx)+6] += alpha7*(*v);
1766: y[10*(*idx)+7] += alpha8*(*v);
1767: y[10*(*idx)+8] += alpha9*(*v);
1768: y[10*(*idx)+9] += alpha10*(*v);
1769: idx++; v++;
1770: }
1771: }
1772: PetscLogFlops(20*a->nz);
1773: VecRestoreArray(xx,&x);
1774: VecRestoreArray(yy,&y);
1775: return(0);
1776: }
1780: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1781: {
1782: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1783: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1784: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1786: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1789: if (yy != zz) {VecCopy(yy,zz);}
1790: VecGetArray(xx,&x);
1791: VecGetArray(zz,&y);
1792: for (i=0; i<m; i++) {
1793: idx = a->j + a->i[i] ;
1794: v = a->a + a->i[i] ;
1795: n = a->i[i+1] - a->i[i];
1796: alpha1 = x[10*i];
1797: alpha2 = x[10*i+1];
1798: alpha3 = x[10*i+2];
1799: alpha4 = x[10*i+3];
1800: alpha5 = x[10*i+4];
1801: alpha6 = x[10*i+5];
1802: alpha7 = x[10*i+6];
1803: alpha8 = x[10*i+7];
1804: alpha9 = x[10*i+8];
1805: alpha10 = x[10*i+9];
1806: while (n-->0) {
1807: y[10*(*idx)] += alpha1*(*v);
1808: y[10*(*idx)+1] += alpha2*(*v);
1809: y[10*(*idx)+2] += alpha3*(*v);
1810: y[10*(*idx)+3] += alpha4*(*v);
1811: y[10*(*idx)+4] += alpha5*(*v);
1812: y[10*(*idx)+5] += alpha6*(*v);
1813: y[10*(*idx)+6] += alpha7*(*v);
1814: y[10*(*idx)+7] += alpha8*(*v);
1815: y[10*(*idx)+8] += alpha9*(*v);
1816: y[10*(*idx)+9] += alpha10*(*v);
1817: idx++; v++;
1818: }
1819: }
1820: PetscLogFlops(20*a->nz);
1821: VecRestoreArray(xx,&x);
1822: VecRestoreArray(zz,&y);
1823: return(0);
1824: }
1827: /*--------------------------------------------------------------------------------------------*/
1830: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1831: {
1832: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1833: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1834: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1835: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1837: PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii;
1838: PetscInt n,i,jrow,j;
1841: VecGetArray(xx,&x);
1842: VecGetArray(yy,&y);
1843: idx = a->j;
1844: v = a->a;
1845: ii = a->i;
1847: for (i=0; i<m; i++) {
1848: jrow = ii[i];
1849: n = ii[i+1] - jrow;
1850: sum1 = 0.0;
1851: sum2 = 0.0;
1852: sum3 = 0.0;
1853: sum4 = 0.0;
1854: sum5 = 0.0;
1855: sum6 = 0.0;
1856: sum7 = 0.0;
1857: sum8 = 0.0;
1858: sum9 = 0.0;
1859: sum10 = 0.0;
1860: sum11 = 0.0;
1861: sum12 = 0.0;
1862: sum13 = 0.0;
1863: sum14 = 0.0;
1864: sum15 = 0.0;
1865: sum16 = 0.0;
1866: nonzerorow += (n>0);
1867: for (j=0; j<n; j++) {
1868: sum1 += v[jrow]*x[16*idx[jrow]];
1869: sum2 += v[jrow]*x[16*idx[jrow]+1];
1870: sum3 += v[jrow]*x[16*idx[jrow]+2];
1871: sum4 += v[jrow]*x[16*idx[jrow]+3];
1872: sum5 += v[jrow]*x[16*idx[jrow]+4];
1873: sum6 += v[jrow]*x[16*idx[jrow]+5];
1874: sum7 += v[jrow]*x[16*idx[jrow]+6];
1875: sum8 += v[jrow]*x[16*idx[jrow]+7];
1876: sum9 += v[jrow]*x[16*idx[jrow]+8];
1877: sum10 += v[jrow]*x[16*idx[jrow]+9];
1878: sum11 += v[jrow]*x[16*idx[jrow]+10];
1879: sum12 += v[jrow]*x[16*idx[jrow]+11];
1880: sum13 += v[jrow]*x[16*idx[jrow]+12];
1881: sum14 += v[jrow]*x[16*idx[jrow]+13];
1882: sum15 += v[jrow]*x[16*idx[jrow]+14];
1883: sum16 += v[jrow]*x[16*idx[jrow]+15];
1884: jrow++;
1885: }
1886: y[16*i] = sum1;
1887: y[16*i+1] = sum2;
1888: y[16*i+2] = sum3;
1889: y[16*i+3] = sum4;
1890: y[16*i+4] = sum5;
1891: y[16*i+5] = sum6;
1892: y[16*i+6] = sum7;
1893: y[16*i+7] = sum8;
1894: y[16*i+8] = sum9;
1895: y[16*i+9] = sum10;
1896: y[16*i+10] = sum11;
1897: y[16*i+11] = sum12;
1898: y[16*i+12] = sum13;
1899: y[16*i+13] = sum14;
1900: y[16*i+14] = sum15;
1901: y[16*i+15] = sum16;
1902: }
1904: PetscLogFlops(32*a->nz - 16*nonzerorow);
1905: VecRestoreArray(xx,&x);
1906: VecRestoreArray(yy,&y);
1907: return(0);
1908: }
1912: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1913: {
1914: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1915: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1916: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1917: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1919: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1922: VecSet(yy,zero);
1923: VecGetArray(xx,&x);
1924: VecGetArray(yy,&y);
1926: for (i=0; i<m; i++) {
1927: idx = a->j + a->i[i] ;
1928: v = a->a + a->i[i] ;
1929: n = a->i[i+1] - a->i[i];
1930: alpha1 = x[16*i];
1931: alpha2 = x[16*i+1];
1932: alpha3 = x[16*i+2];
1933: alpha4 = x[16*i+3];
1934: alpha5 = x[16*i+4];
1935: alpha6 = x[16*i+5];
1936: alpha7 = x[16*i+6];
1937: alpha8 = x[16*i+7];
1938: alpha9 = x[16*i+8];
1939: alpha10 = x[16*i+9];
1940: alpha11 = x[16*i+10];
1941: alpha12 = x[16*i+11];
1942: alpha13 = x[16*i+12];
1943: alpha14 = x[16*i+13];
1944: alpha15 = x[16*i+14];
1945: alpha16 = x[16*i+15];
1946: while (n-->0) {
1947: y[16*(*idx)] += alpha1*(*v);
1948: y[16*(*idx)+1] += alpha2*(*v);
1949: y[16*(*idx)+2] += alpha3*(*v);
1950: y[16*(*idx)+3] += alpha4*(*v);
1951: y[16*(*idx)+4] += alpha5*(*v);
1952: y[16*(*idx)+5] += alpha6*(*v);
1953: y[16*(*idx)+6] += alpha7*(*v);
1954: y[16*(*idx)+7] += alpha8*(*v);
1955: y[16*(*idx)+8] += alpha9*(*v);
1956: y[16*(*idx)+9] += alpha10*(*v);
1957: y[16*(*idx)+10] += alpha11*(*v);
1958: y[16*(*idx)+11] += alpha12*(*v);
1959: y[16*(*idx)+12] += alpha13*(*v);
1960: y[16*(*idx)+13] += alpha14*(*v);
1961: y[16*(*idx)+14] += alpha15*(*v);
1962: y[16*(*idx)+15] += alpha16*(*v);
1963: idx++; v++;
1964: }
1965: }
1966: PetscLogFlops(32*a->nz);
1967: VecRestoreArray(xx,&x);
1968: VecRestoreArray(yy,&y);
1969: return(0);
1970: }
1974: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1975: {
1976: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1977: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1978: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1979: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1981: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1982: PetscInt n,i,jrow,j;
1985: if (yy != zz) {VecCopy(yy,zz);}
1986: VecGetArray(xx,&x);
1987: VecGetArray(zz,&y);
1988: idx = a->j;
1989: v = a->a;
1990: ii = a->i;
1992: for (i=0; i<m; i++) {
1993: jrow = ii[i];
1994: n = ii[i+1] - jrow;
1995: sum1 = 0.0;
1996: sum2 = 0.0;
1997: sum3 = 0.0;
1998: sum4 = 0.0;
1999: sum5 = 0.0;
2000: sum6 = 0.0;
2001: sum7 = 0.0;
2002: sum8 = 0.0;
2003: sum9 = 0.0;
2004: sum10 = 0.0;
2005: sum11 = 0.0;
2006: sum12 = 0.0;
2007: sum13 = 0.0;
2008: sum14 = 0.0;
2009: sum15 = 0.0;
2010: sum16 = 0.0;
2011: for (j=0; j<n; j++) {
2012: sum1 += v[jrow]*x[16*idx[jrow]];
2013: sum2 += v[jrow]*x[16*idx[jrow]+1];
2014: sum3 += v[jrow]*x[16*idx[jrow]+2];
2015: sum4 += v[jrow]*x[16*idx[jrow]+3];
2016: sum5 += v[jrow]*x[16*idx[jrow]+4];
2017: sum6 += v[jrow]*x[16*idx[jrow]+5];
2018: sum7 += v[jrow]*x[16*idx[jrow]+6];
2019: sum8 += v[jrow]*x[16*idx[jrow]+7];
2020: sum9 += v[jrow]*x[16*idx[jrow]+8];
2021: sum10 += v[jrow]*x[16*idx[jrow]+9];
2022: sum11 += v[jrow]*x[16*idx[jrow]+10];
2023: sum12 += v[jrow]*x[16*idx[jrow]+11];
2024: sum13 += v[jrow]*x[16*idx[jrow]+12];
2025: sum14 += v[jrow]*x[16*idx[jrow]+13];
2026: sum15 += v[jrow]*x[16*idx[jrow]+14];
2027: sum16 += v[jrow]*x[16*idx[jrow]+15];
2028: jrow++;
2029: }
2030: y[16*i] += sum1;
2031: y[16*i+1] += sum2;
2032: y[16*i+2] += sum3;
2033: y[16*i+3] += sum4;
2034: y[16*i+4] += sum5;
2035: y[16*i+5] += sum6;
2036: y[16*i+6] += sum7;
2037: y[16*i+7] += sum8;
2038: y[16*i+8] += sum9;
2039: y[16*i+9] += sum10;
2040: y[16*i+10] += sum11;
2041: y[16*i+11] += sum12;
2042: y[16*i+12] += sum13;
2043: y[16*i+13] += sum14;
2044: y[16*i+14] += sum15;
2045: y[16*i+15] += sum16;
2046: }
2048: PetscLogFlops(32*a->nz);
2049: VecRestoreArray(xx,&x);
2050: VecRestoreArray(zz,&y);
2051: return(0);
2052: }
2056: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2057: {
2058: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2059: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2060: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2061: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2063: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
2066: if (yy != zz) {VecCopy(yy,zz);}
2067: VecGetArray(xx,&x);
2068: VecGetArray(zz,&y);
2069: for (i=0; i<m; i++) {
2070: idx = a->j + a->i[i] ;
2071: v = a->a + a->i[i] ;
2072: n = a->i[i+1] - a->i[i];
2073: alpha1 = x[16*i];
2074: alpha2 = x[16*i+1];
2075: alpha3 = x[16*i+2];
2076: alpha4 = x[16*i+3];
2077: alpha5 = x[16*i+4];
2078: alpha6 = x[16*i+5];
2079: alpha7 = x[16*i+6];
2080: alpha8 = x[16*i+7];
2081: alpha9 = x[16*i+8];
2082: alpha10 = x[16*i+9];
2083: alpha11 = x[16*i+10];
2084: alpha12 = x[16*i+11];
2085: alpha13 = x[16*i+12];
2086: alpha14 = x[16*i+13];
2087: alpha15 = x[16*i+14];
2088: alpha16 = x[16*i+15];
2089: while (n-->0) {
2090: y[16*(*idx)] += alpha1*(*v);
2091: y[16*(*idx)+1] += alpha2*(*v);
2092: y[16*(*idx)+2] += alpha3*(*v);
2093: y[16*(*idx)+3] += alpha4*(*v);
2094: y[16*(*idx)+4] += alpha5*(*v);
2095: y[16*(*idx)+5] += alpha6*(*v);
2096: y[16*(*idx)+6] += alpha7*(*v);
2097: y[16*(*idx)+7] += alpha8*(*v);
2098: y[16*(*idx)+8] += alpha9*(*v);
2099: y[16*(*idx)+9] += alpha10*(*v);
2100: y[16*(*idx)+10] += alpha11*(*v);
2101: y[16*(*idx)+11] += alpha12*(*v);
2102: y[16*(*idx)+12] += alpha13*(*v);
2103: y[16*(*idx)+13] += alpha14*(*v);
2104: y[16*(*idx)+14] += alpha15*(*v);
2105: y[16*(*idx)+15] += alpha16*(*v);
2106: idx++; v++;
2107: }
2108: }
2109: PetscLogFlops(32*a->nz);
2110: VecRestoreArray(xx,&x);
2111: VecRestoreArray(zz,&y);
2112: return(0);
2113: }
2115: /*===================================================================================*/
2118: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2119: {
2120: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2124: /* start the scatter */
2125: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2126: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2127: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2128: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2129: return(0);
2130: }
2134: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2135: {
2136: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2140: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2141: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2142: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2143: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2144: return(0);
2145: }
2149: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2150: {
2151: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2155: /* start the scatter */
2156: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2157: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2158: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2159: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2160: return(0);
2161: }
2165: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2166: {
2167: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2171: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2172: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2173: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2174: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2175: return(0);
2176: }
2178: /* ----------------------------------------------------------------*/
2181: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2182: {
2183: /* This routine requires testing -- but it's getting better. */
2184: PetscErrorCode ierr;
2185: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2186: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2187: Mat P=pp->AIJ;
2188: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2189: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2190: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2191: PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2192: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2193: MatScalar *ca;
2196: /* Start timer */
2199: /* Get ij structure of P^T */
2200: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2202: cn = pn*ppdof;
2203: /* Allocate ci array, arrays for fill computation and */
2204: /* free space for accumulating nonzero column info */
2205: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2206: ci[0] = 0;
2208: /* Work arrays for rows of P^T*A */
2209: PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2210: PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2211: ptasparserow = ptadenserow + an;
2212: denserow = ptasparserow + an;
2213: sparserow = denserow + cn;
2215: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2216: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2217: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2218: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2219: current_space = free_space;
2221: /* Determine symbolic info for each row of C: */
2222: for (i=0;i<pn;i++) {
2223: ptnzi = pti[i+1] - pti[i];
2224: ptJ = ptj + pti[i];
2225: for (dof=0;dof<ppdof;dof++) {
2226: ptanzi = 0;
2227: /* Determine symbolic row of PtA: */
2228: for (j=0;j<ptnzi;j++) {
2229: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2230: arow = ptJ[j]*ppdof + dof;
2231: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2232: anzj = ai[arow+1] - ai[arow];
2233: ajj = aj + ai[arow];
2234: for (k=0;k<anzj;k++) {
2235: if (!ptadenserow[ajj[k]]) {
2236: ptadenserow[ajj[k]] = -1;
2237: ptasparserow[ptanzi++] = ajj[k];
2238: }
2239: }
2240: }
2241: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2242: ptaj = ptasparserow;
2243: cnzi = 0;
2244: for (j=0;j<ptanzi;j++) {
2245: /* Get offset within block of P */
2246: pshift = *ptaj%ppdof;
2247: /* Get block row of P */
2248: prow = (*ptaj++)/ppdof; /* integer division */
2249: /* P has same number of nonzeros per row as the compressed form */
2250: pnzj = pi[prow+1] - pi[prow];
2251: pjj = pj + pi[prow];
2252: for (k=0;k<pnzj;k++) {
2253: /* Locations in C are shifted by the offset within the block */
2254: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2255: if (!denserow[pjj[k]*ppdof+pshift]) {
2256: denserow[pjj[k]*ppdof+pshift] = -1;
2257: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2258: }
2259: }
2260: }
2262: /* sort sparserow */
2263: PetscSortInt(cnzi,sparserow);
2264:
2265: /* If free space is not available, make more free space */
2266: /* Double the amount of total space in the list */
2267: if (current_space->local_remaining<cnzi) {
2268: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
2269: }
2271: /* Copy data into free space, and zero out denserows */
2272: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2273: current_space->array += cnzi;
2274: current_space->local_used += cnzi;
2275: current_space->local_remaining -= cnzi;
2277: for (j=0;j<ptanzi;j++) {
2278: ptadenserow[ptasparserow[j]] = 0;
2279: }
2280: for (j=0;j<cnzi;j++) {
2281: denserow[sparserow[j]] = 0;
2282: }
2283: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2284: /* For now, we will recompute what is needed. */
2285: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2286: }
2287: }
2288: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2289: /* Allocate space for cj, initialize cj, and */
2290: /* destroy list of free space and other temporary array(s) */
2291: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2292: PetscFreeSpaceContiguous(&free_space,cj);
2293: PetscFree(ptadenserow);
2294:
2295: /* Allocate space for ca */
2296: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2297: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2298:
2299: /* put together the new matrix */
2300: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);
2302: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2303: /* Since these are PETSc arrays, change flags to free them as necessary. */
2304: c = (Mat_SeqAIJ *)((*C)->data);
2305: c->free_a = PETSC_TRUE;
2306: c->free_ij = PETSC_TRUE;
2307: c->nonew = 0;
2309: /* Clean up. */
2310: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2313: return(0);
2314: }
2318: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2319: {
2320: /* This routine requires testing -- first draft only */
2322: PetscInt flops=0;
2323: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2324: Mat P=pp->AIJ;
2325: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
2326: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
2327: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
2328: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2329: PetscInt *ci=c->i,*cj=c->j,*cjj;
2330: PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2331: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2332: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2335: /* Allocate temporary array for storage of one row of A*P */
2336: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2337: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));
2339: apj = (PetscInt *)(apa + cn);
2340: apjdense = apj + cn;
2342: /* Clear old values in C */
2343: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
2345: for (i=0;i<am;i++) {
2346: /* Form sparse row of A*P */
2347: anzi = ai[i+1] - ai[i];
2348: apnzj = 0;
2349: for (j=0;j<anzi;j++) {
2350: /* Get offset within block of P */
2351: pshift = *aj%ppdof;
2352: /* Get block row of P */
2353: prow = *aj++/ppdof; /* integer division */
2354: pnzj = pi[prow+1] - pi[prow];
2355: pjj = pj + pi[prow];
2356: paj = pa + pi[prow];
2357: for (k=0;k<pnzj;k++) {
2358: poffset = pjj[k]*ppdof+pshift;
2359: if (!apjdense[poffset]) {
2360: apjdense[poffset] = -1;
2361: apj[apnzj++] = poffset;
2362: }
2363: apa[poffset] += (*aa)*paj[k];
2364: }
2365: flops += 2*pnzj;
2366: aa++;
2367: }
2369: /* Sort the j index array for quick sparse axpy. */
2370: /* Note: a array does not need sorting as it is in dense storage locations. */
2371: PetscSortInt(apnzj,apj);
2373: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2374: prow = i/ppdof; /* integer division */
2375: pshift = i%ppdof;
2376: poffset = pi[prow];
2377: pnzi = pi[prow+1] - poffset;
2378: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2379: pJ = pj+poffset;
2380: pA = pa+poffset;
2381: for (j=0;j<pnzi;j++) {
2382: crow = (*pJ)*ppdof+pshift;
2383: cjj = cj + ci[crow];
2384: caj = ca + ci[crow];
2385: pJ++;
2386: /* Perform sparse axpy operation. Note cjj includes apj. */
2387: for (k=0,nextap=0;nextap<apnzj;k++) {
2388: if (cjj[k]==apj[nextap]) {
2389: caj[k] += (*pA)*apa[apj[nextap++]];
2390: }
2391: }
2392: flops += 2*apnzj;
2393: pA++;
2394: }
2396: /* Zero the current row info for A*P */
2397: for (j=0;j<apnzj;j++) {
2398: apa[apj[j]] = 0.;
2399: apjdense[apj[j]] = 0;
2400: }
2401: }
2403: /* Assemble the final matrix and clean up */
2404: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2405: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2406: PetscFree(apa);
2407: PetscLogFlops(flops);
2408: return(0);
2409: }
2413: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2414: {
2415: PetscErrorCode ierr;
2418: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
2419: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
2420: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
2421: return(0);
2422: }
2426: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
2427: {
2429: SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
2430: return(0);
2431: }
2436: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2437: {
2438: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2439: Mat a = b->AIJ,B;
2440: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
2441: PetscErrorCode ierr;
2442: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2443: PetscInt *cols;
2444: PetscScalar *vals;
2447: MatGetSize(a,&m,&n);
2448: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2449: for (i=0; i<m; i++) {
2450: nmax = PetscMax(nmax,aij->ilen[i]);
2451: for (j=0; j<dof; j++) {
2452: ilen[dof*i+j] = aij->ilen[i];
2453: }
2454: }
2455: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2456: PetscFree(ilen);
2457: PetscMalloc(nmax*sizeof(PetscInt),&icols);
2458: ii = 0;
2459: for (i=0; i<m; i++) {
2460: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2461: for (j=0; j<dof; j++) {
2462: for (k=0; k<ncols; k++) {
2463: icols[k] = dof*cols[k]+j;
2464: }
2465: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2466: ii++;
2467: }
2468: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2469: }
2470: PetscFree(icols);
2471: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2472: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2474: if (reuse == MAT_REUSE_MATRIX) {
2475: MatHeaderReplace(A,B);
2476: } else {
2477: *newmat = B;
2478: }
2479: return(0);
2480: }
2483: #include src/mat/impls/aij/mpi/mpiaij.h
2488: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2489: {
2490: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
2491: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2492: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2493: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2494: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2495: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2496: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
2497: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
2498: PetscInt rstart,cstart,*garray,ii,k;
2499: PetscErrorCode ierr;
2500: PetscScalar *vals,*ovals;
2503: PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2504: for (i=0; i<A->rmap.n/dof; i++) {
2505: nmax = PetscMax(nmax,AIJ->ilen[i]);
2506: onmax = PetscMax(onmax,OAIJ->ilen[i]);
2507: for (j=0; j<dof; j++) {
2508: dnz[dof*i+j] = AIJ->ilen[i];
2509: onz[dof*i+j] = OAIJ->ilen[i];
2510: }
2511: }
2512: MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2513: PetscFree2(dnz,onz);
2515: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2516: rstart = dof*maij->A->rmap.rstart;
2517: cstart = dof*maij->A->cmap.rstart;
2518: garray = mpiaij->garray;
2520: ii = rstart;
2521: for (i=0; i<A->rmap.n/dof; i++) {
2522: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2523: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2524: for (j=0; j<dof; j++) {
2525: for (k=0; k<ncols; k++) {
2526: icols[k] = cstart + dof*cols[k]+j;
2527: }
2528: for (k=0; k<oncols; k++) {
2529: oicols[k] = dof*garray[ocols[k]]+j;
2530: }
2531: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2532: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2533: ii++;
2534: }
2535: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2536: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2537: }
2538: PetscFree2(icols,oicols);
2540: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2541: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2543: if (reuse == MAT_REUSE_MATRIX) {
2544: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
2545: ((PetscObject)A)->refct = 1;
2546: MatHeaderReplace(A,B);
2547: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
2548: } else {
2549: *newmat = B;
2550: }
2551: return(0);
2552: }
2556: /* ---------------------------------------------------------------------------------- */
2557: /*MC
2558: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2559: operations for multicomponent problems. It interpolates each component the same
2560: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
2561: and MATMPIAIJ for distributed matrices.
2563: Operations provided:
2564: + MatMult
2565: . MatMultTranspose
2566: . MatMultAdd
2567: . MatMultTransposeAdd
2568: - MatView
2570: Level: advanced
2572: M*/
2575: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2576: {
2578: PetscMPIInt size;
2579: PetscInt n;
2580: Mat_MPIMAIJ *b;
2581: Mat B;
2584: PetscObjectReference((PetscObject)A);
2586: if (dof == 1) {
2587: *maij = A;
2588: } else {
2589: MatCreate(((PetscObject)A)->comm,&B);
2590: MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2591: B->assembled = PETSC_TRUE;
2593: MPI_Comm_size(((PetscObject)A)->comm,&size);
2594: if (size == 1) {
2595: MatSetType(B,MATSEQMAIJ);
2596: B->ops->destroy = MatDestroy_SeqMAIJ;
2597: B->ops->view = MatView_SeqMAIJ;
2598: b = (Mat_MPIMAIJ*)B->data;
2599: b->dof = dof;
2600: b->AIJ = A;
2601: if (dof == 2) {
2602: B->ops->mult = MatMult_SeqMAIJ_2;
2603: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
2604: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
2605: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2606: } else if (dof == 3) {
2607: B->ops->mult = MatMult_SeqMAIJ_3;
2608: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
2609: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
2610: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2611: } else if (dof == 4) {
2612: B->ops->mult = MatMult_SeqMAIJ_4;
2613: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
2614: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
2615: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2616: } else if (dof == 5) {
2617: B->ops->mult = MatMult_SeqMAIJ_5;
2618: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
2619: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
2620: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2621: } else if (dof == 6) {
2622: B->ops->mult = MatMult_SeqMAIJ_6;
2623: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
2624: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
2625: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2626: } else if (dof == 7) {
2627: B->ops->mult = MatMult_SeqMAIJ_7;
2628: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
2629: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
2630: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2631: } else if (dof == 8) {
2632: B->ops->mult = MatMult_SeqMAIJ_8;
2633: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
2634: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
2635: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2636: } else if (dof == 9) {
2637: B->ops->mult = MatMult_SeqMAIJ_9;
2638: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
2639: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
2640: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2641: } else if (dof == 10) {
2642: B->ops->mult = MatMult_SeqMAIJ_10;
2643: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
2644: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
2645: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2646: } else if (dof == 16) {
2647: B->ops->mult = MatMult_SeqMAIJ_16;
2648: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
2649: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
2650: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2651: } else {
2652: SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2653: }
2654: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2655: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2656: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2657: } else {
2658: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2659: IS from,to;
2660: Vec gvec;
2661: PetscInt *garray,i;
2663: MatSetType(B,MATMPIMAIJ);
2664: B->ops->destroy = MatDestroy_MPIMAIJ;
2665: B->ops->view = MatView_MPIMAIJ;
2666: b = (Mat_MPIMAIJ*)B->data;
2667: b->dof = dof;
2668: b->A = A;
2669: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2670: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
2672: VecGetSize(mpiaij->lvec,&n);
2673: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2674: VecSetBlockSize(b->w,dof);
2676: /* create two temporary Index sets for build scatter gather */
2677: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2678: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2679: ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
2680: PetscFree(garray);
2681: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
2683: /* create temporary global vector to generate scatter context */
2684: VecCreateMPI(((PetscObject)A)->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);
2685: VecSetBlockSize(gvec,dof);
2687: /* generate the scatter context */
2688: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
2690: ISDestroy(from);
2691: ISDestroy(to);
2692: VecDestroy(gvec);
2694: B->ops->mult = MatMult_MPIMAIJ_dof;
2695: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
2696: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
2697: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2698: B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
2699: B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
2700: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2701: }
2702: *maij = B;
2703: MatView_Private(B);
2704: }
2705: return(0);
2706: }