Actual source code: bdfact.c
1: /*$Id: bdfact.c,v 1.65 2001/08/07 03:02:53 balay Exp $*/
3: /* Block diagonal matrix format - factorization and triangular solves */
5: #include src/mat/impls/bdiag/seq/bdiag.h
6: #include src/vec/vecimpl.h
7: #include src/inline/ilu.h
9: #undef __FUNCT__
11: int MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *B)
12: {
13: PetscTruth idn;
14: int ierr;
17: if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
18: if (isrow) {
19: ISIdentity(isrow,&idn);
20: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
21: }
22: if (iscol) {
23: ISIdentity(iscol,&idn);
24: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
25: }
26: if (info->levels != 0) {
27: SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
28: }
29: MatConvert(A,MATSAME,B);
31: /* Must set to zero for repeated calls with different nonzero structure */
32: (*B)->factor = 0;
33: return(0);
34: }
36: #undef __FUNCT__
38: int MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatILUInfo *info)
39: {
40: PetscTruth idn;
41: int ierr;
44: /* For now, no fill is allocated in symbolic factorization phase, so we
45: directly use the input matrix for numeric factorization. */
46: if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
47: if (isrow) {
48: ISIdentity(isrow,&idn);
49: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
50: }
51: if (iscol) {
52: ISIdentity(iscol,&idn);
53: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
54: }
55: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
56: MatLUFactorNumeric(A,&A);
57: return(0);
58: }
60: /* --------------------------------------------------------------------------*/
61: #undef __FUNCT__
63: int MatLUFactorNumeric_SeqBDiag_N(Mat A,Mat *B)
64: {
65: Mat C = *B;
66: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
67: int k,d,d2,dgk,elim_row,elim_col,bs = a->bs,knb,knb2,bs2 = bs*bs;
68: int dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock,ierr;
69: int *diag = a->diag, m = A->m,mainbd = a->mainbd,*dgptr,len,i;
70: PetscScalar **dv = a->diagv,*dd = dv[mainbd],*v_work;
71: PetscScalar *multiplier;
74: /* Copy input matrix to factored matrix if we've already factored the
75: matrix before AND the nonzero structure remains the same. This is done
76: in symbolic factorization the first time through, but there's no symbolic
77: factorization for successive calls with same matrix sparsity structure. */
78: if (C->factor == FACTOR_LU) {
79: for (i=0; i<a->nd; i++) {
80: len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
81: d = diag[i];
82: if (d > 0) {
83: PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
84: } else {
85: PetscMemcpy(dv[i],a1->diagv[i],len);
86: }
87: }
88: }
90: if (!a->pivot) {
91: PetscMalloc((m+1)*sizeof(int),&a->pivot);
92: PetscLogObjectMemory(C,m*sizeof(int));
93: }
94: ierr = PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
95: multiplier = v_work + bs;
96: ierr = PetscMalloc((mblock+nblock+1)*sizeof(int),&dgptr);
97: ierr = PetscMemzero(dgptr,(mblock+nblock)*sizeof(int));
98: for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
99: for (k=0; k<mblock; k++) { /* k = block pivot_row */
100: knb = k*bs; knb2 = knb*bs;
101: /* invert the diagonal block */
102: Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
103: for (d=mainbd-1; d>=0; d--) {
104: elim_row = k + diag[d];
105: if (elim_row < mblock) { /* sweep down */
106: /* dv[d][knb2]: test if entire block is zero? */
107: Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
108: for (d2=d+1; d2<nd; d2++) {
109: elim_col = elim_row - diag[d2];
110: if (elim_col >=0 && elim_col < nblock) {
111: dgk = k - elim_col;
112: if ((dnum = dgptr[dgk+mblock])) {
113: Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
114: &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
115: }
116: }
117: }
118: }
119: }
120: }
121: PetscFree(dgptr);
122: PetscFree(v_work);
123: C->factor = FACTOR_LU;
124: return(0);
125: }
127: #undef __FUNCT__
129: int MatLUFactorNumeric_SeqBDiag_1(Mat A,Mat *B)
130: {
131: Mat C = *B;
132: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
133: int k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len,ierr;
134: int *diag = a->diag,n = A->n,m = A->m,mainbd = a->mainbd,*dgptr;
135: PetscScalar **dv = a->diagv,*dd = dv[mainbd],mult;
138: /* Copy input matrix to factored matrix if we've already factored the
139: matrix before AND the nonzero structure remains the same. This is done
140: in symbolic factorization the first time through, but there's no symbolic
141: factorization for successive calls with same matrix sparsity structure. */
142: if (C->factor == FACTOR_LU) {
143: for (i=0; i<nd; i++) {
144: len = a->bdlen[i] * sizeof(PetscScalar);
145: d = diag[i];
146: if (d > 0) {
147: PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
148: } else {
149: PetscMemcpy(dv[i],a1->diagv[i],len);
150: }
151: }
152: }
154: PetscMalloc((m+n+1)*sizeof(int),&dgptr);
155: ierr = PetscMemzero(dgptr,(m+n)*sizeof(int));
156: for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
157: for (k=0; k<m; k++) { /* k = pivot_row */
158: dd[k] = 1.0/dd[k];
159: for (d=mainbd-1; d>=0; d--) {
160: elim_row = k + diag[d];
161: if (elim_row < m) { /* sweep down */
162: if (dv[d][elim_row] != 0.0) {
163: dv[d][elim_row] *= dd[k];
164: mult = dv[d][elim_row];
165: for (d2=d+1; d2<nd; d2++) {
166: elim_col = elim_row - diag[d2];
167: dgk = k - elim_col;
168: if (elim_col >=0 && elim_col < n) {
169: if ((dnum = dgptr[dgk+m])) {
170: dv[d2][elim_row] -= mult * dv[dnum-1][k];
171: }
172: }
173: }
174: }
175: }
176: }
177: }
178: PetscFree(dgptr);
179: C->factor = FACTOR_LU;
180: return(0);
181: }
183: /* -----------------------------------------------------------------*/
185: #undef __FUNCT__
187: int MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
188: {
189: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
190: int ierr,i,d,loc,mainbd = a->mainbd;
191: int n = A->n,m = A->m,*diag = a->diag,col;
192: PetscScalar *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;
195: VecGetArray(xx,&x);
196: VecGetArray(yy,&y);
197: /* forward solve the lower triangular part */
198: for (i=0; i<m; i++) {
199: sum = x[i];
200: for (d=0; d<mainbd; d++) {
201: loc = i - diag[d];
202: if (loc >= 0) sum -= dv[d][i] * y[loc];
203: }
204: y[i] = sum;
205: }
206: /* backward solve the upper triangular part */
207: for (i=m-1; i>=0; i--) {
208: sum = y[i];
209: for (d=mainbd+1; d<a->nd; d++) {
210: col = i - diag[d];
211: if (col < n) sum -= dv[d][i] * y[col];
212: }
213: y[i] = sum*dd[i];
214: }
215: VecRestoreArray(xx,&x);
216: VecRestoreArray(yy,&y);
217: PetscLogFlops(2*a->nz - A->n);
218: return(0);
219: }
221: #undef __FUNCT__
223: int MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
224: {
225: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
226: int i,d,loc,mainbd = a->mainbd;
227: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
228: int ierr,m = A->m,*diag = a->diag,col;
229: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
230: PetscScalar w0,w1,sum0,sum1;
233: VecGetArray(xx,&x);
234: VecGetArray(yy,&y);
235: PetscMemcpy(y,x,m*sizeof(PetscScalar));
237: /* forward solve the lower triangular part */
238: if (mainbd != 0) {
239: inb = 0;
240: for (i=0; i<mblock; i++) {
241: sum0 = sum1 = 0.0;
242: for (d=0; d<mainbd; d++) {
243: loc = 2*(i - diag[d]);
244: if (loc >= 0) {
245: dvt = &dv[d][4*i];
246: w0 = y[loc]; w1 = y[loc+1];
247: sum0 += dvt[0]*w0 + dvt[2]*w1;
248: sum1 += dvt[1]*w0 + dvt[3]*w1;
249: }
250: }
251: y[inb] -= sum0; y[inb+1] -= sum1;
253: inb += 2;
254: }
255: }
256: /* backward solve the upper triangular part */
257: inb = 2*(mblock-1); inb2 = 2*inb;
258: for (i=mblock-1; i>=0; i--) {
259: sum0 = y[inb]; sum1 = y[inb+1];
260: for (d=mainbd+1; d<a->nd; d++) {
261: col = 2*(i - diag[d]);
262: if (col < 2*nblock) {
263: dvt = &dv[d][4*i];
264: w0 = y[col]; w1 = y[col+1];
265: sum0 -= dvt[0]*w0 + dvt[2]*w1;
266: sum1 -= dvt[1]*w0 + dvt[3]*w1;
267: }
268: }
269: dvt = dd+inb2;
270: y[inb] = dvt[0]*sum0 + dvt[2]*sum1;
271: y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
272: inb -= 2; inb2 -= 4;
273: }
274: VecRestoreArray(xx,&x);
275: VecRestoreArray(yy,&y);
276: PetscLogFlops(2*a->nz - A->n);
277: return(0);
278: }
280: #undef __FUNCT__
282: int MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
283: {
284: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
285: int i,d,loc,mainbd = a->mainbd;
286: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
287: int ierr,m = A->m,*diag = a->diag,col;
288: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
289: PetscScalar w0,w1,w2,sum0,sum1,sum2;
292: VecGetArray(xx,&x);
293: VecGetArray(yy,&y);
294: PetscMemcpy(y,x,m*sizeof(PetscScalar));
296: /* forward solve the lower triangular part */
297: if (mainbd != 0) {
298: inb = 0;
299: for (i=0; i<mblock; i++) {
300: sum0 = sum1 = sum2 = 0.0;
301: for (d=0; d<mainbd; d++) {
302: loc = 3*(i - diag[d]);
303: if (loc >= 0) {
304: dvt = &dv[d][9*i];
305: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
306: sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
307: sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
308: sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
309: }
310: }
311: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
312: inb += 3;
313: }
314: }
315: /* backward solve the upper triangular part */
316: inb = 3*(mblock-1); inb2 = 3*inb;
317: for (i=mblock-1; i>=0; i--) {
318: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2];
319: for (d=mainbd+1; d<a->nd; d++) {
320: col = 3*(i - diag[d]);
321: if (col < 3*nblock) {
322: dvt = &dv[d][9*i];
323: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
324: sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
325: sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
326: sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
327: }
328: }
329: dvt = dd+inb2;
330: y[inb] = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
331: y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
332: y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
333: inb -= 3; inb2 -= 9;
334: }
335: VecRestoreArray(xx,&x);
336: VecRestoreArray(yy,&y);
337: PetscLogFlops(2*a->nz - A->n);
338: return(0);
339: }
341: #undef __FUNCT__
343: int MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
344: {
345: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
346: int i,d,loc,mainbd = a->mainbd;
347: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
348: int ierr,m = A->m,*diag = a->diag,col;
349: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
350: PetscScalar w0,w1,w2,w3,sum0,sum1,sum2,sum3;
353: VecGetArray(xx,&x);
354: VecGetArray(yy,&y);
355: PetscMemcpy(y,x,m*sizeof(PetscScalar));
357: /* forward solve the lower triangular part */
358: if (mainbd != 0) {
359: inb = 0;
360: for (i=0; i<mblock; i++) {
361: sum0 = sum1 = sum2 = sum3 = 0.0;
362: for (d=0; d<mainbd; d++) {
363: loc = 4*(i - diag[d]);
364: if (loc >= 0) {
365: dvt = &dv[d][16*i];
366: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
367: sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
368: sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
369: sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
370: sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
371: }
372: }
373: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
374: inb += 4;
375: }
376: }
377: /* backward solve the upper triangular part */
378: inb = 4*(mblock-1); inb2 = 4*inb;
379: for (i=mblock-1; i>=0; i--) {
380: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2]; sum3 = y[inb+3];
381: for (d=mainbd+1; d<a->nd; d++) {
382: col = 4*(i - diag[d]);
383: if (col < 4*nblock) {
384: dvt = &dv[d][16*i];
385: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
386: sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
387: sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
388: sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
389: sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
390: }
391: }
392: dvt = dd+inb2;
393: y[inb] = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
394: y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
395: y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
396: y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
397: inb -= 4; inb2 -= 16;
398: }
399: VecRestoreArray(xx,&x);
400: VecRestoreArray(yy,&y);
401: PetscLogFlops(2*a->nz - A->n);
402: return(0);
403: }
405: #undef __FUNCT__
407: int MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
408: {
409: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
410: int i,d,loc,mainbd = a->mainbd;
411: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
412: int ierr,m = A->m,*diag = a->diag,col;
413: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
414: PetscScalar w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;
417: VecGetArray(xx,&x);
418: VecGetArray(yy,&y);
419: PetscMemcpy(y,x,m*sizeof(PetscScalar));
421: /* forward solve the lower triangular part */
422: if (mainbd != 0) {
423: inb = 0;
424: for (i=0; i<mblock; i++) {
425: sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
426: for (d=0; d<mainbd; d++) {
427: loc = 5*(i - diag[d]);
428: if (loc >= 0) {
429: dvt = &dv[d][25*i];
430: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
431: sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
432: sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
433: sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
434: sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
435: sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
436: }
437: }
438: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
439: y[inb+4] -= sum4;
440: inb += 5;
441: }
442: }
443: /* backward solve the upper triangular part */
444: inb = 5*(mblock-1); inb2 = 5*inb;
445: for (i=mblock-1; i>=0; i--) {
446: sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
447: for (d=mainbd+1; d<a->nd; d++) {
448: col = 5*(i - diag[d]);
449: if (col < 5*nblock) {
450: dvt = &dv[d][25*i];
451: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
452: sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
453: sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
454: sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
455: sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
456: sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
457: }
458: }
459: dvt = dd+inb2;
460: y[inb] = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
461: + dvt[20]*sum4;
462: y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
463: + dvt[21]*sum4;
464: y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
465: + dvt[22]*sum4;
466: y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
467: + dvt[23]*sum4;
468: y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
469: + dvt[24]*sum4;
470: inb -= 5; inb2 -= 25;
471: }
472: VecRestoreArray(xx,&x);
473: VecRestoreArray(yy,&y);
474: PetscLogFlops(2*a->nz - A->n);
475: return(0);
476: }
478: #undef __FUNCT__
480: int MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
481: {
482: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
483: int i,d,loc,mainbd = a->mainbd;
484: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
485: int ierr,bs = a->bs,m = A->m,*diag = a->diag,col,bs2 = bs*bs;
486: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
487: PetscScalar work[25];
490: VecGetArray(xx,&x);
491: VecGetArray(yy,&y);
492: if (bs > 25) SETERRQ(PETSC_ERR_SUP,"Blocks must be smaller then 25");
493: PetscMemcpy(y,x,m*sizeof(PetscScalar));
495: /* forward solve the lower triangular part */
496: if (mainbd != 0) {
497: inb = 0;
498: for (i=0; i<mblock; i++) {
499: for (d=0; d<mainbd; d++) {
500: loc = i - diag[d];
501: if (loc >= 0) {
502: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
503: }
504: }
505: inb += bs;
506: }
507: }
508: /* backward solve the upper triangular part */
509: inb = bs*(mblock-1); inb2 = inb*bs;
510: for (i=mblock-1; i>=0; i--) {
511: for (d=mainbd+1; d<a->nd; d++) {
512: col = i - diag[d];
513: if (col < nblock) {
514: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
515: }
516: }
517: Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
518: PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
519: inb -= bs; inb2 -= bs2;
520: }
521: VecRestoreArray(xx,&x);
522: VecRestoreArray(yy,&y);
523: PetscLogFlops(2*a->nz - A->n);
524: return(0);
525: }