Actual source code: bdfact.c
2: /* Block diagonal matrix format - factorization and triangular solves */
4: #include src/mat/impls/bdiag/seq/bdiag.h
5: #include src/inline/ilu.h
9: PetscErrorCode MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
10: {
11: PetscTruth idn;
15: if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
16: if (isrow) {
17: ISIdentity(isrow,&idn);
18: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
19: }
20: if (iscol) {
21: ISIdentity(iscol,&idn);
22: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
23: }
24: if (info->levels != 0) {
25: SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
26: }
27: MatConvert(A,MATSAME,B);
29: /* Must set to zero for repeated calls with different nonzero structure */
30: (*B)->factor = 0;
31: return(0);
32: }
36: PetscErrorCode MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info)
37: {
38: PetscTruth idn;
42: /* For now, no fill is allocated in symbolic factorization phase, so we
43: directly use the input matrix for numeric factorization. */
44: if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
45: if (isrow) {
46: ISIdentity(isrow,&idn);
47: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
48: }
49: if (iscol) {
50: ISIdentity(iscol,&idn);
51: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
52: }
53: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
54: MatLUFactorNumeric(A,&A);
55: return(0);
56: }
58: /* --------------------------------------------------------------------------*/
61: PetscErrorCode MatLUFactorNumeric_SeqBDiag_N(Mat A,Mat *B)
62: {
63: Mat C = *B;
64: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
65: PetscInt k,d,d2,dgk,elim_row,elim_col,bs = A->bs,knb,knb2,bs2 = bs*bs;
67: PetscInt dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock;
68: PetscInt *diag = a->diag, m = A->m,mainbd = a->mainbd,*dgptr,len,i;
69: PetscScalar **dv = a->diagv,*dd = dv[mainbd],*v_work;
70: PetscScalar *multiplier;
73: /* Copy input matrix to factored matrix if we've already factored the
74: matrix before AND the nonzero structure remains the same. This is done
75: in symbolic factorization the first time through, but there's no symbolic
76: factorization for successive calls with same matrix sparsity structure. */
77: if (C->factor == FACTOR_LU) {
78: for (i=0; i<a->nd; i++) {
79: len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
80: d = diag[i];
81: if (d > 0) {
82: PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
83: } else {
84: PetscMemcpy(dv[i],a1->diagv[i],len);
85: }
86: }
87: }
89: if (!a->pivot) {
90: PetscMalloc((m+1)*sizeof(PetscInt),&a->pivot);
91: PetscLogObjectMemory(C,m*sizeof(PetscInt));
92: }
93: PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
94: multiplier = v_work + bs;
95: PetscMalloc((mblock+nblock+1)*sizeof(PetscInt),&dgptr);
96: PetscMemzero(dgptr,(mblock+nblock)*sizeof(PetscInt));
97: for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
98: for (k=0; k<mblock; k++) { /* k = block pivot_row */
99: knb = k*bs; knb2 = knb*bs;
100: /* invert the diagonal block */
101: Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
102: for (d=mainbd-1; d>=0; d--) {
103: elim_row = k + diag[d];
104: if (elim_row < mblock) { /* sweep down */
105: /* dv[d][knb2]: test if entire block is zero? */
106: Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
107: for (d2=d+1; d2<nd; d2++) {
108: elim_col = elim_row - diag[d2];
109: if (elim_col >=0 && elim_col < nblock) {
110: dgk = k - elim_col;
111: if ((dnum = dgptr[dgk+mblock])) {
112: Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
113: &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
114: }
115: }
116: }
117: }
118: }
119: }
120: PetscFree(dgptr);
121: PetscFree(v_work);
122: if (!a->solvework) {
123: PetscMalloc(bs*sizeof(PetscScalar),&a->solvework);
124: }
125: C->factor = FACTOR_LU;
126: return(0);
127: }
131: PetscErrorCode MatLUFactorNumeric_SeqBDiag_1(Mat A,Mat *B)
132: {
133: Mat C = *B;
134: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
136: PetscInt k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len;
137: PetscInt *diag = a->diag,n = A->n,m = A->m,mainbd = a->mainbd,*dgptr;
138: PetscScalar **dv = a->diagv,*dd = dv[mainbd],mult;
141: /* Copy input matrix to factored matrix if we've already factored the
142: matrix before AND the nonzero structure remains the same. This is done
143: in symbolic factorization the first time through, but there's no symbolic
144: factorization for successive calls with same matrix sparsity structure. */
145: if (C->factor == FACTOR_LU) {
146: for (i=0; i<nd; i++) {
147: len = a->bdlen[i] * sizeof(PetscScalar);
148: d = diag[i];
149: if (d > 0) {
150: PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
151: } else {
152: PetscMemcpy(dv[i],a1->diagv[i],len);
153: }
154: }
155: }
157: PetscMalloc((m+n+1)*sizeof(PetscInt),&dgptr);
158: PetscMemzero(dgptr,(m+n)*sizeof(PetscInt));
159: for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
160: for (k=0; k<m; k++) { /* k = pivot_row */
161: dd[k] = 1.0/dd[k];
162: for (d=mainbd-1; d>=0; d--) {
163: elim_row = k + diag[d];
164: if (elim_row < m) { /* sweep down */
165: if (dv[d][elim_row] != 0.0) {
166: dv[d][elim_row] *= dd[k];
167: mult = dv[d][elim_row];
168: for (d2=d+1; d2<nd; d2++) {
169: elim_col = elim_row - diag[d2];
170: dgk = k - elim_col;
171: if (elim_col >=0 && elim_col < n) {
172: if ((dnum = dgptr[dgk+m])) {
173: dv[d2][elim_row] -= mult * dv[dnum-1][k];
174: }
175: }
176: }
177: }
178: }
179: }
180: }
181: PetscFree(dgptr);
182: C->factor = FACTOR_LU;
183: return(0);
184: }
186: /* -----------------------------------------------------------------*/
190: PetscErrorCode MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
191: {
192: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
194: PetscInt i,d,loc,mainbd = a->mainbd;
195: PetscInt n = A->n,m = A->m,*diag = a->diag,col;
196: PetscScalar *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;
199: VecGetArray(xx,&x);
200: VecGetArray(yy,&y);
201: /* forward solve the lower triangular part */
202: for (i=0; i<m; i++) {
203: sum = x[i];
204: for (d=0; d<mainbd; d++) {
205: loc = i - diag[d];
206: if (loc >= 0) sum -= dv[d][i] * y[loc];
207: }
208: y[i] = sum;
209: }
210: /* backward solve the upper triangular part */
211: for (i=m-1; i>=0; i--) {
212: sum = y[i];
213: for (d=mainbd+1; d<a->nd; d++) {
214: col = i - diag[d];
215: if (col < n) sum -= dv[d][i] * y[col];
216: }
217: y[i] = sum*dd[i];
218: }
219: VecRestoreArray(xx,&x);
220: VecRestoreArray(yy,&y);
221: PetscLogFlops(2*a->nz - A->n);
222: return(0);
223: }
227: PetscErrorCode MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
228: {
229: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
230: PetscInt i,d,loc,mainbd = a->mainbd;
231: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
233: PetscInt m = A->m,*diag = a->diag,col;
234: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
235: PetscScalar w0,w1,sum0,sum1;
238: VecGetArray(xx,&x);
239: VecGetArray(yy,&y);
240: PetscMemcpy(y,x,m*sizeof(PetscScalar));
242: /* forward solve the lower triangular part */
243: if (mainbd != 0) {
244: inb = 0;
245: for (i=0; i<mblock; i++) {
246: sum0 = sum1 = 0.0;
247: for (d=0; d<mainbd; d++) {
248: loc = 2*(i - diag[d]);
249: if (loc >= 0) {
250: dvt = &dv[d][4*i];
251: w0 = y[loc]; w1 = y[loc+1];
252: sum0 += dvt[0]*w0 + dvt[2]*w1;
253: sum1 += dvt[1]*w0 + dvt[3]*w1;
254: }
255: }
256: y[inb] -= sum0; y[inb+1] -= sum1;
258: inb += 2;
259: }
260: }
261: /* backward solve the upper triangular part */
262: inb = 2*(mblock-1); inb2 = 2*inb;
263: for (i=mblock-1; i>=0; i--) {
264: sum0 = y[inb]; sum1 = y[inb+1];
265: for (d=mainbd+1; d<a->nd; d++) {
266: col = 2*(i - diag[d]);
267: if (col < 2*nblock) {
268: dvt = &dv[d][4*i];
269: w0 = y[col]; w1 = y[col+1];
270: sum0 -= dvt[0]*w0 + dvt[2]*w1;
271: sum1 -= dvt[1]*w0 + dvt[3]*w1;
272: }
273: }
274: dvt = dd+inb2;
275: y[inb] = dvt[0]*sum0 + dvt[2]*sum1;
276: y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
277: inb -= 2; inb2 -= 4;
278: }
279: VecRestoreArray(xx,&x);
280: VecRestoreArray(yy,&y);
281: PetscLogFlops(2*a->nz - A->n);
282: return(0);
283: }
287: PetscErrorCode MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
288: {
289: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
290: PetscInt i,d,loc,mainbd = a->mainbd;
291: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
293: PetscInt m = A->m,*diag = a->diag,col;
294: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
295: PetscScalar w0,w1,w2,sum0,sum1,sum2;
298: VecGetArray(xx,&x);
299: VecGetArray(yy,&y);
300: PetscMemcpy(y,x,m*sizeof(PetscScalar));
302: /* forward solve the lower triangular part */
303: if (mainbd != 0) {
304: inb = 0;
305: for (i=0; i<mblock; i++) {
306: sum0 = sum1 = sum2 = 0.0;
307: for (d=0; d<mainbd; d++) {
308: loc = 3*(i - diag[d]);
309: if (loc >= 0) {
310: dvt = &dv[d][9*i];
311: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
312: sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
313: sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
314: sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
315: }
316: }
317: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
318: inb += 3;
319: }
320: }
321: /* backward solve the upper triangular part */
322: inb = 3*(mblock-1); inb2 = 3*inb;
323: for (i=mblock-1; i>=0; i--) {
324: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2];
325: for (d=mainbd+1; d<a->nd; d++) {
326: col = 3*(i - diag[d]);
327: if (col < 3*nblock) {
328: dvt = &dv[d][9*i];
329: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
330: sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
331: sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
332: sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
333: }
334: }
335: dvt = dd+inb2;
336: y[inb] = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
337: y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
338: y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
339: inb -= 3; inb2 -= 9;
340: }
341: VecRestoreArray(xx,&x);
342: VecRestoreArray(yy,&y);
343: PetscLogFlops(2*a->nz - A->n);
344: return(0);
345: }
349: PetscErrorCode MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
350: {
351: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
352: PetscInt i,d,loc,mainbd = a->mainbd;
353: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
355: PetscInt m = A->m,*diag = a->diag,col;
356: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
357: PetscScalar w0,w1,w2,w3,sum0,sum1,sum2,sum3;
360: VecGetArray(xx,&x);
361: VecGetArray(yy,&y);
362: PetscMemcpy(y,x,m*sizeof(PetscScalar));
364: /* forward solve the lower triangular part */
365: if (mainbd != 0) {
366: inb = 0;
367: for (i=0; i<mblock; i++) {
368: sum0 = sum1 = sum2 = sum3 = 0.0;
369: for (d=0; d<mainbd; d++) {
370: loc = 4*(i - diag[d]);
371: if (loc >= 0) {
372: dvt = &dv[d][16*i];
373: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
374: sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
375: sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
376: sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
377: sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
378: }
379: }
380: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
381: inb += 4;
382: }
383: }
384: /* backward solve the upper triangular part */
385: inb = 4*(mblock-1); inb2 = 4*inb;
386: for (i=mblock-1; i>=0; i--) {
387: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2]; sum3 = y[inb+3];
388: for (d=mainbd+1; d<a->nd; d++) {
389: col = 4*(i - diag[d]);
390: if (col < 4*nblock) {
391: dvt = &dv[d][16*i];
392: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
393: sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
394: sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
395: sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
396: sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
397: }
398: }
399: dvt = dd+inb2;
400: y[inb] = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
401: y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
402: y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
403: y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
404: inb -= 4; inb2 -= 16;
405: }
406: VecRestoreArray(xx,&x);
407: VecRestoreArray(yy,&y);
408: PetscLogFlops(2*a->nz - A->n);
409: return(0);
410: }
414: PetscErrorCode MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
415: {
416: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
417: PetscInt i,d,loc,mainbd = a->mainbd;
418: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
420: PetscInt m = A->m,*diag = a->diag,col;
421: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
422: PetscScalar w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;
425: VecGetArray(xx,&x);
426: VecGetArray(yy,&y);
427: PetscMemcpy(y,x,m*sizeof(PetscScalar));
429: /* forward solve the lower triangular part */
430: if (mainbd != 0) {
431: inb = 0;
432: for (i=0; i<mblock; i++) {
433: sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
434: for (d=0; d<mainbd; d++) {
435: loc = 5*(i - diag[d]);
436: if (loc >= 0) {
437: dvt = &dv[d][25*i];
438: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
439: sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
440: sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
441: sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
442: sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
443: sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
444: }
445: }
446: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
447: y[inb+4] -= sum4;
448: inb += 5;
449: }
450: }
451: /* backward solve the upper triangular part */
452: inb = 5*(mblock-1); inb2 = 5*inb;
453: for (i=mblock-1; i>=0; i--) {
454: sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
455: for (d=mainbd+1; d<a->nd; d++) {
456: col = 5*(i - diag[d]);
457: if (col < 5*nblock) {
458: dvt = &dv[d][25*i];
459: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
460: sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
461: sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
462: sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
463: sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
464: sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
465: }
466: }
467: dvt = dd+inb2;
468: y[inb] = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
469: + dvt[20]*sum4;
470: y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
471: + dvt[21]*sum4;
472: y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
473: + dvt[22]*sum4;
474: y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
475: + dvt[23]*sum4;
476: y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
477: + dvt[24]*sum4;
478: inb -= 5; inb2 -= 25;
479: }
480: VecRestoreArray(xx,&x);
481: VecRestoreArray(yy,&y);
482: PetscLogFlops(2*a->nz - A->n);
483: return(0);
484: }
488: PetscErrorCode MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
489: {
490: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
491: PetscInt i,d,loc,mainbd = a->mainbd;
492: PetscInt mblock = a->mblock,nblock = a->nblock,inb,inb2;
494: PetscInt bs = A->bs,m = A->m,*diag = a->diag,col,bs2 = bs*bs;
495: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
496: PetscScalar *work = a->solvework;
499: VecGetArray(xx,&x);
500: VecGetArray(yy,&y);
501: PetscMemcpy(y,x,m*sizeof(PetscScalar));
503: /* forward solve the lower triangular part */
504: if (mainbd != 0) {
505: inb = 0;
506: for (i=0; i<mblock; i++) {
507: for (d=0; d<mainbd; d++) {
508: loc = i - diag[d];
509: if (loc >= 0) {
510: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
511: }
512: }
513: inb += bs;
514: }
515: }
516: /* backward solve the upper triangular part */
517: inb = bs*(mblock-1); inb2 = inb*bs;
518: for (i=mblock-1; i>=0; i--) {
519: for (d=mainbd+1; d<a->nd; d++) {
520: col = i - diag[d];
521: if (col < nblock) {
522: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
523: }
524: }
525: Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
526: PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
527: inb -= bs; inb2 -= bs2;
528: }
529: VecRestoreArray(xx,&x);
530: VecRestoreArray(yy,&y);
531: PetscLogFlops(2*a->nz - A->n);
532: return(0);
533: }