Actual source code: baijfact2.c
1: /*$Id: baijfact2.c,v 1.49 2001/04/03 14:33:37 bsmith Exp $*/
2: /*
3: Factorization code for BAIJ format.
4: */
6: #include "src/mat/impls/baij/seq/baij.h"
7: #include "src/vec/vecimpl.h"
8: #include "src/inline/ilu.h"
9: #include "src/inline/dot.h"
11: int MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
12: {
13: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
14: int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz;
15: int *diag = a->diag;
16: MatScalar *aa=a->a,*v;
17: Scalar s1,*x,*b;
20: VecGetArray(bb,&b);
21: VecGetArray(xx,&x);
23: /* forward solve the U^T */
24: for (i=0; i<n; i++) {
26: v = aa + diag[i];
27: /* multiply by the inverse of the block diagonal */
28: s1 = (*v++)*b[i];
29: vi = aj + diag[i] + 1;
30: nz = ai[i+1] - diag[i] - 1;
31: while (nz--) {
32: x[*vi++] -= (*v++)*s1;
33: }
34: x[i] = s1;
35: }
36: /* backward solve the L^T */
37: for (i=n-1; i>=0; i--){
38: v = aa + diag[i] - 1;
39: vi = aj + diag[i] - 1;
40: nz = diag[i] - ai[i];
41: s1 = x[i];
42: while (nz--) {
43: x[*vi--] -= (*v--)*s1;
44: }
45: }
46: VecRestoreArray(bb,&b);
47: VecRestoreArray(xx,&x);
48: PetscLogFlops(2*(a->nz) - A->n);
49: return(0);
50: }
52: int MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
53: {
54: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
55: int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
56: int *diag = a->diag,oidx;
57: MatScalar *aa=a->a,*v;
58: Scalar s1,s2,x1,x2;
59: Scalar *x,*b;
62: VecGetArray(bb,&b);
63: VecGetArray(xx,&x);
65: /* forward solve the U^T */
66: idx = 0;
67: for (i=0; i<n; i++) {
69: v = aa + 4*diag[i];
70: /* multiply by the inverse of the block diagonal */
71: x1 = b[idx]; x2 = b[1+idx];
72: s1 = v[0]*x1 + v[1]*x2;
73: s2 = v[2]*x1 + v[3]*x2;
74: v += 4;
76: vi = aj + diag[i] + 1;
77: nz = ai[i+1] - diag[i] - 1;
78: while (nz--) {
79: oidx = 2*(*vi++);
80: x[oidx] -= v[0]*s1 + v[1]*s2;
81: x[oidx+1] -= v[2]*s1 + v[3]*s2;
82: v += 4;
83: }
84: x[idx] = s1;x[1+idx] = s2;
85: idx += 2;
86: }
87: /* backward solve the L^T */
88: for (i=n-1; i>=0; i--){
89: v = aa + 4*diag[i] - 4;
90: vi = aj + diag[i] - 1;
91: nz = diag[i] - ai[i];
92: idt = 2*i;
93: s1 = x[idt]; s2 = x[1+idt];
94: while (nz--) {
95: idx = 2*(*vi--);
96: x[idx] -= v[0]*s1 + v[1]*s2;
97: x[idx+1] -= v[2]*s1 + v[3]*s2;
98: v -= 4;
99: }
100: }
101: VecRestoreArray(bb,&b);
102: VecRestoreArray(xx,&x);
103: PetscLogFlops(2*4*(a->nz) - 2*A->n);
104: return(0);
105: }
107: int MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
108: {
109: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
110: int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
111: int *diag = a->diag,oidx;
112: MatScalar *aa=a->a,*v;
113: Scalar s1,s2,s3,x1,x2,x3;
114: Scalar *x,*b;
117: VecGetArray(bb,&b);
118: VecGetArray(xx,&x);
120: /* forward solve the U^T */
121: idx = 0;
122: for (i=0; i<n; i++) {
124: v = aa + 9*diag[i];
125: /* multiply by the inverse of the block diagonal */
126: x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx];
127: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3;
128: s2 = v[3]*x1 + v[4]*x2 + v[5]*x3;
129: s3 = v[6]*x1 + v[7]*x2 + v[8]*x3;
130: v += 9;
132: vi = aj + diag[i] + 1;
133: nz = ai[i+1] - diag[i] - 1;
134: while (nz--) {
135: oidx = 3*(*vi++);
136: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
137: x[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
138: x[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
139: v += 9;
140: }
141: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;
142: idx += 3;
143: }
144: /* backward solve the L^T */
145: for (i=n-1; i>=0; i--){
146: v = aa + 9*diag[i] - 9;
147: vi = aj + diag[i] - 1;
148: nz = diag[i] - ai[i];
149: idt = 3*i;
150: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];
151: while (nz--) {
152: idx = 3*(*vi--);
153: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
154: x[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
155: x[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
156: v -= 9;
157: }
158: }
159: VecRestoreArray(bb,&b);
160: VecRestoreArray(xx,&x);
161: PetscLogFlops(2*9*(a->nz) - 3*A->n);
162: return(0);
163: }
165: int MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
166: {
167: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
168: int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
169: int *diag = a->diag,oidx;
170: MatScalar *aa=a->a,*v;
171: Scalar s1,s2,s3,s4,x1,x2,x3,x4;
172: Scalar *x,*b;
175: VecGetArray(bb,&b);
176: VecGetArray(xx,&x);
178: /* forward solve the U^T */
179: idx = 0;
180: for (i=0; i<n; i++) {
182: v = aa + 16*diag[i];
183: /* multiply by the inverse of the block diagonal */
184: x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; x4 = b[3+idx];
185: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
186: s2 = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
187: s3 = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
188: s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
189: v += 16;
191: vi = aj + diag[i] + 1;
192: nz = ai[i+1] - diag[i] - 1;
193: while (nz--) {
194: oidx = 4*(*vi++);
195: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
196: x[oidx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
197: x[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
198: x[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
199: v += 16;
200: }
201: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4;
202: idx += 4;
203: }
204: /* backward solve the L^T */
205: for (i=n-1; i>=0; i--){
206: v = aa + 16*diag[i] - 16;
207: vi = aj + diag[i] - 1;
208: nz = diag[i] - ai[i];
209: idt = 4*i;
210: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt];
211: while (nz--) {
212: idx = 4*(*vi--);
213: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
214: x[idx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
215: x[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
216: x[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
217: v -= 16;
218: }
219: }
220: VecRestoreArray(bb,&b);
221: VecRestoreArray(xx,&x);
222: PetscLogFlops(2*16*(a->nz) - 4*A->n);
223: return(0);
224: }
226: int MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
227: {
228: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
229: int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
230: int *diag = a->diag,oidx;
231: MatScalar *aa=a->a,*v;
232: Scalar s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
233: Scalar *x,*b;
236: VecGetArray(bb,&b);
237: VecGetArray(xx,&x);
239: /* forward solve the U^T */
240: idx = 0;
241: for (i=0; i<n; i++) {
243: v = aa + 25*diag[i];
244: /* multiply by the inverse of the block diagonal */
245: x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
246: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
247: s2 = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
248: s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
249: s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
250: s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
251: v += 25;
253: vi = aj + diag[i] + 1;
254: nz = ai[i+1] - diag[i] - 1;
255: while (nz--) {
256: oidx = 5*(*vi++);
257: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
258: x[oidx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
259: x[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
260: x[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
261: x[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
262: v += 25;
263: }
264: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
265: idx += 5;
266: }
267: /* backward solve the L^T */
268: for (i=n-1; i>=0; i--){
269: v = aa + 25*diag[i] - 25;
270: vi = aj + diag[i] - 1;
271: nz = diag[i] - ai[i];
272: idt = 5*i;
273: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
274: while (nz--) {
275: idx = 5*(*vi--);
276: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
277: x[idx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
278: x[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
279: x[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
280: x[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
281: v -= 25;
282: }
283: }
284: VecRestoreArray(bb,&b);
285: VecRestoreArray(xx,&x);
286: PetscLogFlops(2*25*(a->nz) - 5*A->n);
287: return(0);
288: }
290: int MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
291: {
292: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
293: int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
294: int *diag = a->diag,oidx;
295: MatScalar *aa=a->a,*v;
296: Scalar s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
297: Scalar *x,*b;
300: VecGetArray(bb,&b);
301: VecGetArray(xx,&x);
303: /* forward solve the U^T */
304: idx = 0;
305: for (i=0; i<n; i++) {
307: v = aa + 36*diag[i];
308: /* multiply by the inverse of the block diagonal */
309: x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
310: x6 = b[5+idx];
311: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6;
312: s2 = v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6;
313: s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
314: s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
315: s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
316: s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
317: v += 36;
319: vi = aj + diag[i] + 1;
320: nz = ai[i+1] - diag[i] - 1;
321: while (nz--) {
322: oidx = 6*(*vi++);
323: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
324: x[oidx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
325: x[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
326: x[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
327: x[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
328: x[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
329: v += 36;
330: }
331: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
332: x[5+idx] = s6;
333: idx += 6;
334: }
335: /* backward solve the L^T */
336: for (i=n-1; i>=0; i--){
337: v = aa + 36*diag[i] - 36;
338: vi = aj + diag[i] - 1;
339: nz = diag[i] - ai[i];
340: idt = 6*i;
341: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
342: s6 = x[5+idt];
343: while (nz--) {
344: idx = 6*(*vi--);
345: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
346: x[idx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
347: x[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
348: x[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
349: x[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
350: x[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
351: v -= 36;
352: }
353: }
354: VecRestoreArray(bb,&b);
355: VecRestoreArray(xx,&x);
356: PetscLogFlops(2*36*(a->nz) - 6*A->n);
357: return(0);
358: }
360: int MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
361: {
362: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
363: int ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
364: int *diag = a->diag,oidx;
365: MatScalar *aa=a->a,*v;
366: Scalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
367: Scalar *x,*b;
370: VecGetArray(bb,&b);
371: VecGetArray(xx,&x);
373: /* forward solve the U^T */
374: idx = 0;
375: for (i=0; i<n; i++) {
377: v = aa + 49*diag[i];
378: /* multiply by the inverse of the block diagonal */
379: x1 = b[idx]; x2 = b[1+idx]; x3 = b[2+idx]; x4 = b[3+idx]; x5 = b[4+idx];
380: x6 = b[5+idx]; x7 = b[6+idx];
381: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
382: s2 = v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
383: s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
384: s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
385: s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
386: s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
387: s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
388: v += 49;
390: vi = aj + diag[i] + 1;
391: nz = ai[i+1] - diag[i] - 1;
392: while (nz--) {
393: oidx = 7*(*vi++);
394: x[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
395: x[oidx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
396: x[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
397: x[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
398: x[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
399: x[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
400: x[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
401: v += 49;
402: }
403: x[idx] = s1;x[1+idx] = s2; x[2+idx] = s3;x[3+idx] = s4; x[4+idx] = s5;
404: x[5+idx] = s6;x[6+idx] = s7;
405: idx += 7;
406: }
407: /* backward solve the L^T */
408: for (i=n-1; i>=0; i--){
409: v = aa + 49*diag[i] - 49;
410: vi = aj + diag[i] - 1;
411: nz = diag[i] - ai[i];
412: idt = 7*i;
413: s1 = x[idt]; s2 = x[1+idt]; s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
414: s6 = x[5+idt];s7 = x[6+idt];
415: while (nz--) {
416: idx = 7*(*vi--);
417: x[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
418: x[idx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
419: x[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
420: x[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
421: x[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
422: x[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
423: x[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
424: v -= 49;
425: }
426: }
427: VecRestoreArray(bb,&b);
428: VecRestoreArray(xx,&x);
429: PetscLogFlops(2*49*(a->nz) - 7*A->n);
430: return(0);
431: }
433: /*---------------------------------------------------------------------------------------------*/
434: int MatSolveTranspose_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
435: {
436: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
437: IS iscol=a->col,isrow=a->row;
438: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
439: int *diag = a->diag;
440: MatScalar *aa=a->a,*v;
441: Scalar s1,*x,*b,*t;
444: VecGetArray(bb,&b);
445: VecGetArray(xx,&x);
446: t = a->solve_work;
448: ISGetIndices(isrow,&rout); r = rout;
449: ISGetIndices(iscol,&cout); c = cout;
451: /* copy the b into temp work space according to permutation */
452: for (i=0; i<n; i++) {
453: t[i] = b[c[i]];
454: }
456: /* forward solve the U^T */
457: for (i=0; i<n; i++) {
459: v = aa + diag[i];
460: /* multiply by the inverse of the block diagonal */
461: s1 = (*v++)*t[i];
462: vi = aj + diag[i] + 1;
463: nz = ai[i+1] - diag[i] - 1;
464: while (nz--) {
465: t[*vi++] -= (*v++)*s1;
466: }
467: t[i] = s1;
468: }
469: /* backward solve the L^T */
470: for (i=n-1; i>=0; i--){
471: v = aa + diag[i] - 1;
472: vi = aj + diag[i] - 1;
473: nz = diag[i] - ai[i];
474: s1 = t[i];
475: while (nz--) {
476: t[*vi--] -= (*v--)*s1;
477: }
478: }
480: /* copy t into x according to permutation */
481: for (i=0; i<n; i++) {
482: x[r[i]] = t[i];
483: }
485: ISRestoreIndices(isrow,&rout);
486: ISRestoreIndices(iscol,&cout);
487: VecRestoreArray(bb,&b);
488: VecRestoreArray(xx,&x);
489: PetscLogFlops(2*(a->nz) - A->n);
490: return(0);
491: }
493: int MatSolveTranspose_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
494: {
495: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
496: IS iscol=a->col,isrow=a->row;
497: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
498: int *diag = a->diag,ii,ic,ir,oidx;
499: MatScalar *aa=a->a,*v;
500: Scalar s1,s2,x1,x2;
501: Scalar *x,*b,*t;
504: VecGetArray(bb,&b);
505: VecGetArray(xx,&x);
506: t = a->solve_work;
508: ISGetIndices(isrow,&rout); r = rout;
509: ISGetIndices(iscol,&cout); c = cout;
511: /* copy the b into temp work space according to permutation */
512: ii = 0;
513: for (i=0; i<n; i++) {
514: ic = 2*c[i];
515: t[ii] = b[ic];
516: t[ii+1] = b[ic+1];
517: ii += 2;
518: }
520: /* forward solve the U^T */
521: idx = 0;
522: for (i=0; i<n; i++) {
524: v = aa + 4*diag[i];
525: /* multiply by the inverse of the block diagonal */
526: x1 = t[idx]; x2 = t[1+idx];
527: s1 = v[0]*x1 + v[1]*x2;
528: s2 = v[2]*x1 + v[3]*x2;
529: v += 4;
531: vi = aj + diag[i] + 1;
532: nz = ai[i+1] - diag[i] - 1;
533: while (nz--) {
534: oidx = 2*(*vi++);
535: t[oidx] -= v[0]*s1 + v[1]*s2;
536: t[oidx+1] -= v[2]*s1 + v[3]*s2;
537: v += 4;
538: }
539: t[idx] = s1;t[1+idx] = s2;
540: idx += 2;
541: }
542: /* backward solve the L^T */
543: for (i=n-1; i>=0; i--){
544: v = aa + 4*diag[i] - 4;
545: vi = aj + diag[i] - 1;
546: nz = diag[i] - ai[i];
547: idt = 2*i;
548: s1 = t[idt]; s2 = t[1+idt];
549: while (nz--) {
550: idx = 2*(*vi--);
551: t[idx] -= v[0]*s1 + v[1]*s2;
552: t[idx+1] -= v[2]*s1 + v[3]*s2;
553: v -= 4;
554: }
555: }
557: /* copy t into x according to permutation */
558: ii = 0;
559: for (i=0; i<n; i++) {
560: ir = 2*r[i];
561: x[ir] = t[ii];
562: x[ir+1] = t[ii+1];
563: ii += 2;
564: }
566: ISRestoreIndices(isrow,&rout);
567: ISRestoreIndices(iscol,&cout);
568: VecRestoreArray(bb,&b);
569: VecRestoreArray(xx,&x);
570: PetscLogFlops(2*4*(a->nz) - 2*A->n);
571: return(0);
572: }
574: int MatSolveTranspose_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
575: {
576: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
577: IS iscol=a->col,isrow=a->row;
578: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
579: int *diag = a->diag,ii,ic,ir,oidx;
580: MatScalar *aa=a->a,*v;
581: Scalar s1,s2,s3,x1,x2,x3;
582: Scalar *x,*b,*t;
585: VecGetArray(bb,&b);
586: VecGetArray(xx,&x);
587: t = a->solve_work;
589: ISGetIndices(isrow,&rout); r = rout;
590: ISGetIndices(iscol,&cout); c = cout;
592: /* copy the b into temp work space according to permutation */
593: ii = 0;
594: for (i=0; i<n; i++) {
595: ic = 3*c[i];
596: t[ii] = b[ic];
597: t[ii+1] = b[ic+1];
598: t[ii+2] = b[ic+2];
599: ii += 3;
600: }
602: /* forward solve the U^T */
603: idx = 0;
604: for (i=0; i<n; i++) {
606: v = aa + 9*diag[i];
607: /* multiply by the inverse of the block diagonal */
608: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
609: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3;
610: s2 = v[3]*x1 + v[4]*x2 + v[5]*x3;
611: s3 = v[6]*x1 + v[7]*x2 + v[8]*x3;
612: v += 9;
614: vi = aj + diag[i] + 1;
615: nz = ai[i+1] - diag[i] - 1;
616: while (nz--) {
617: oidx = 3*(*vi++);
618: t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
619: t[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
620: t[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
621: v += 9;
622: }
623: t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;
624: idx += 3;
625: }
626: /* backward solve the L^T */
627: for (i=n-1; i>=0; i--){
628: v = aa + 9*diag[i] - 9;
629: vi = aj + diag[i] - 1;
630: nz = diag[i] - ai[i];
631: idt = 3*i;
632: s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];
633: while (nz--) {
634: idx = 3*(*vi--);
635: t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
636: t[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
637: t[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
638: v -= 9;
639: }
640: }
642: /* copy t into x according to permutation */
643: ii = 0;
644: for (i=0; i<n; i++) {
645: ir = 3*r[i];
646: x[ir] = t[ii];
647: x[ir+1] = t[ii+1];
648: x[ir+2] = t[ii+2];
649: ii += 3;
650: }
652: ISRestoreIndices(isrow,&rout);
653: ISRestoreIndices(iscol,&cout);
654: VecRestoreArray(bb,&b);
655: VecRestoreArray(xx,&x);
656: PetscLogFlops(2*9*(a->nz) - 3*A->n);
657: return(0);
658: }
660: int MatSolveTranspose_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
661: {
662: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
663: IS iscol=a->col,isrow=a->row;
664: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
665: int *diag = a->diag,ii,ic,ir,oidx;
666: MatScalar *aa=a->a,*v;
667: Scalar s1,s2,s3,s4,x1,x2,x3,x4;
668: Scalar *x,*b,*t;
671: VecGetArray(bb,&b);
672: VecGetArray(xx,&x);
673: t = a->solve_work;
675: ISGetIndices(isrow,&rout); r = rout;
676: ISGetIndices(iscol,&cout); c = cout;
678: /* copy the b into temp work space according to permutation */
679: ii = 0;
680: for (i=0; i<n; i++) {
681: ic = 4*c[i];
682: t[ii] = b[ic];
683: t[ii+1] = b[ic+1];
684: t[ii+2] = b[ic+2];
685: t[ii+3] = b[ic+3];
686: ii += 4;
687: }
689: /* forward solve the U^T */
690: idx = 0;
691: for (i=0; i<n; i++) {
693: v = aa + 16*diag[i];
694: /* multiply by the inverse of the block diagonal */
695: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; x4 = t[3+idx];
696: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
697: s2 = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
698: s3 = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
699: s4 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
700: v += 16;
702: vi = aj + diag[i] + 1;
703: nz = ai[i+1] - diag[i] - 1;
704: while (nz--) {
705: oidx = 4*(*vi++);
706: t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
707: t[oidx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
708: t[oidx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
709: t[oidx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
710: v += 16;
711: }
712: t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4;
713: idx += 4;
714: }
715: /* backward solve the L^T */
716: for (i=n-1; i>=0; i--){
717: v = aa + 16*diag[i] - 16;
718: vi = aj + diag[i] - 1;
719: nz = diag[i] - ai[i];
720: idt = 4*i;
721: s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt];
722: while (nz--) {
723: idx = 4*(*vi--);
724: t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4;
725: t[idx+1] -= v[4]*s1 + v[5]*s2 + v[6]*s3 + v[7]*s4;
726: t[idx+2] -= v[8]*s1 + v[9]*s2 + v[10]*s3 + v[11]*s4;
727: t[idx+3] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4;
728: v -= 16;
729: }
730: }
732: /* copy t into x according to permutation */
733: ii = 0;
734: for (i=0; i<n; i++) {
735: ir = 4*r[i];
736: x[ir] = t[ii];
737: x[ir+1] = t[ii+1];
738: x[ir+2] = t[ii+2];
739: x[ir+3] = t[ii+3];
740: ii += 4;
741: }
743: ISRestoreIndices(isrow,&rout);
744: ISRestoreIndices(iscol,&cout);
745: VecRestoreArray(bb,&b);
746: VecRestoreArray(xx,&x);
747: PetscLogFlops(2*16*(a->nz) - 4*A->n);
748: return(0);
749: }
751: int MatSolveTranspose_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
752: {
753: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
754: IS iscol=a->col,isrow=a->row;
755: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
756: int *diag = a->diag,ii,ic,ir,oidx;
757: MatScalar *aa=a->a,*v;
758: Scalar s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
759: Scalar *x,*b,*t;
762: VecGetArray(bb,&b);
763: VecGetArray(xx,&x);
764: t = a->solve_work;
766: ISGetIndices(isrow,&rout); r = rout;
767: ISGetIndices(iscol,&cout); c = cout;
769: /* copy the b into temp work space according to permutation */
770: ii = 0;
771: for (i=0; i<n; i++) {
772: ic = 5*c[i];
773: t[ii] = b[ic];
774: t[ii+1] = b[ic+1];
775: t[ii+2] = b[ic+2];
776: t[ii+3] = b[ic+3];
777: t[ii+4] = b[ic+4];
778: ii += 5;
779: }
781: /* forward solve the U^T */
782: idx = 0;
783: for (i=0; i<n; i++) {
785: v = aa + 25*diag[i];
786: /* multiply by the inverse of the block diagonal */
787: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
788: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
789: s2 = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
790: s3 = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
791: s4 = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
792: s5 = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
793: v += 25;
795: vi = aj + diag[i] + 1;
796: nz = ai[i+1] - diag[i] - 1;
797: while (nz--) {
798: oidx = 5*(*vi++);
799: t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
800: t[oidx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
801: t[oidx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
802: t[oidx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
803: t[oidx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
804: v += 25;
805: }
806: t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
807: idx += 5;
808: }
809: /* backward solve the L^T */
810: for (i=n-1; i>=0; i--){
811: v = aa + 25*diag[i] - 25;
812: vi = aj + diag[i] - 1;
813: nz = diag[i] - ai[i];
814: idt = 5*i;
815: s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
816: while (nz--) {
817: idx = 5*(*vi--);
818: t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5;
819: t[idx+1] -= v[5]*s1 + v[6]*s2 + v[7]*s3 + v[8]*s4 + v[9]*s5;
820: t[idx+2] -= v[10]*s1 + v[11]*s2 + v[12]*s3 + v[13]*s4 + v[14]*s5;
821: t[idx+3] -= v[15]*s1 + v[16]*s2 + v[17]*s3 + v[18]*s4 + v[19]*s5;
822: t[idx+4] -= v[20]*s1 + v[21]*s2 + v[22]*s3 + v[23]*s4 + v[24]*s5;
823: v -= 25;
824: }
825: }
827: /* copy t into x according to permutation */
828: ii = 0;
829: for (i=0; i<n; i++) {
830: ir = 5*r[i];
831: x[ir] = t[ii];
832: x[ir+1] = t[ii+1];
833: x[ir+2] = t[ii+2];
834: x[ir+3] = t[ii+3];
835: x[ir+4] = t[ii+4];
836: ii += 5;
837: }
839: ISRestoreIndices(isrow,&rout);
840: ISRestoreIndices(iscol,&cout);
841: VecRestoreArray(bb,&b);
842: VecRestoreArray(xx,&x);
843: PetscLogFlops(2*25*(a->nz) - 5*A->n);
844: return(0);
845: }
847: int MatSolveTranspose_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
848: {
849: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
850: IS iscol=a->col,isrow=a->row;
851: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
852: int *diag = a->diag,ii,ic,ir,oidx;
853: MatScalar *aa=a->a,*v;
854: Scalar s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
855: Scalar *x,*b,*t;
858: VecGetArray(bb,&b);
859: VecGetArray(xx,&x);
860: t = a->solve_work;
862: ISGetIndices(isrow,&rout); r = rout;
863: ISGetIndices(iscol,&cout); c = cout;
865: /* copy the b into temp work space according to permutation */
866: ii = 0;
867: for (i=0; i<n; i++) {
868: ic = 6*c[i];
869: t[ii] = b[ic];
870: t[ii+1] = b[ic+1];
871: t[ii+2] = b[ic+2];
872: t[ii+3] = b[ic+3];
873: t[ii+4] = b[ic+4];
874: t[ii+5] = b[ic+5];
875: ii += 6;
876: }
878: /* forward solve the U^T */
879: idx = 0;
880: for (i=0; i<n; i++) {
882: v = aa + 36*diag[i];
883: /* multiply by the inverse of the block diagonal */
884: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
885: x6 = t[5+idx];
886: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6;
887: s2 = v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4 + v[10]*x5 + v[11]*x6;
888: s3 = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
889: s4 = v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
890: s5 = v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
891: s6 = v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
892: v += 36;
894: vi = aj + diag[i] + 1;
895: nz = ai[i+1] - diag[i] - 1;
896: while (nz--) {
897: oidx = 6*(*vi++);
898: t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
899: t[oidx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
900: t[oidx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
901: t[oidx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
902: t[oidx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
903: t[oidx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
904: v += 36;
905: }
906: t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
907: t[5+idx] = s6;
908: idx += 6;
909: }
910: /* backward solve the L^T */
911: for (i=n-1; i>=0; i--){
912: v = aa + 36*diag[i] - 36;
913: vi = aj + diag[i] - 1;
914: nz = diag[i] - ai[i];
915: idt = 6*i;
916: s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
917: s6 = t[5+idt];
918: while (nz--) {
919: idx = 6*(*vi--);
920: t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6;
921: t[idx+1] -= v[6]*s1 + v[7]*s2 + v[8]*s3 + v[9]*s4 + v[10]*s5 + v[11]*s6;
922: t[idx+2] -= v[12]*s1 + v[13]*s2 + v[14]*s3 + v[15]*s4 + v[16]*s5 + v[17]*s6;
923: t[idx+3] -= v[18]*s1 + v[19]*s2 + v[20]*s3 + v[21]*s4 + v[22]*s5 + v[23]*s6;
924: t[idx+4] -= v[24]*s1 + v[25]*s2 + v[26]*s3 + v[27]*s4 + v[28]*s5 + v[29]*s6;
925: t[idx+5] -= v[30]*s1 + v[31]*s2 + v[32]*s3 + v[33]*s4 + v[34]*s5 + v[35]*s6;
926: v -= 36;
927: }
928: }
930: /* copy t into x according to permutation */
931: ii = 0;
932: for (i=0; i<n; i++) {
933: ir = 6*r[i];
934: x[ir] = t[ii];
935: x[ir+1] = t[ii+1];
936: x[ir+2] = t[ii+2];
937: x[ir+3] = t[ii+3];
938: x[ir+4] = t[ii+4];
939: x[ir+5] = t[ii+5];
940: ii += 6;
941: }
943: ISRestoreIndices(isrow,&rout);
944: ISRestoreIndices(iscol,&cout);
945: VecRestoreArray(bb,&b);
946: VecRestoreArray(xx,&x);
947: PetscLogFlops(2*36*(a->nz) - 6*A->n);
948: return(0);
949: }
951: int MatSolveTranspose_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
952: {
953: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
954: IS iscol=a->col,isrow=a->row;
955: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,*rout,*cout;
956: int *diag = a->diag,ii,ic,ir,oidx;
957: MatScalar *aa=a->a,*v;
958: Scalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
959: Scalar *x,*b,*t;
962: VecGetArray(bb,&b);
963: VecGetArray(xx,&x);
964: t = a->solve_work;
966: ISGetIndices(isrow,&rout); r = rout;
967: ISGetIndices(iscol,&cout); c = cout;
969: /* copy the b into temp work space according to permutation */
970: ii = 0;
971: for (i=0; i<n; i++) {
972: ic = 7*c[i];
973: t[ii] = b[ic];
974: t[ii+1] = b[ic+1];
975: t[ii+2] = b[ic+2];
976: t[ii+3] = b[ic+3];
977: t[ii+4] = b[ic+4];
978: t[ii+5] = b[ic+5];
979: t[ii+6] = b[ic+6];
980: ii += 7;
981: }
983: /* forward solve the U^T */
984: idx = 0;
985: for (i=0; i<n; i++) {
987: v = aa + 49*diag[i];
988: /* multiply by the inverse of the block diagonal */
989: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
990: x6 = t[5+idx]; x7 = t[6+idx];
991: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
992: s2 = v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
993: s3 = v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
994: s4 = v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
995: s5 = v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
996: s6 = v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
997: s7 = v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
998: v += 49;
1000: vi = aj + diag[i] + 1;
1001: nz = ai[i+1] - diag[i] - 1;
1002: while (nz--) {
1003: oidx = 7*(*vi++);
1004: t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
1005: t[oidx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
1006: t[oidx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
1007: t[oidx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
1008: t[oidx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
1009: t[oidx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
1010: t[oidx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
1011: v += 49;
1012: }
1013: t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
1014: t[5+idx] = s6;t[6+idx] = s7;
1015: idx += 7;
1016: }
1017: /* backward solve the L^T */
1018: for (i=n-1; i>=0; i--){
1019: v = aa + 49*diag[i] - 49;
1020: vi = aj + diag[i] - 1;
1021: nz = diag[i] - ai[i];
1022: idt = 7*i;
1023: s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
1024: s6 = t[5+idt];s7 = t[6+idt];
1025: while (nz--) {
1026: idx = 7*(*vi--);
1027: t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3 + v[3]*s4 + v[4]*s5 + v[5]*s6 + v[6]*s7;
1028: t[idx+1] -= v[7]*s1 + v[8]*s2 + v[9]*s3 + v[10]*s4 + v[11]*s5 + v[12]*s6 + v[13]*s7;
1029: t[idx+2] -= v[14]*s1 + v[15]*s2 + v[16]*s3 + v[17]*s4 + v[18]*s5 + v[19]*s6 + v[20]*s7;
1030: t[idx+3] -= v[21]*s1 + v[22]*s2 + v[23]*s3 + v[24]*s4 + v[25]*s5 + v[26]*s6 + v[27]*s7;
1031: t[idx+4] -= v[28]*s1 + v[29]*s2 + v[30]*s3 + v[31]*s4 + v[32]*s5 + v[33]*s6 + v[34]*s7;
1032: t[idx+5] -= v[35]*s1 + v[36]*s2 + v[37]*s3 + v[38]*s4 + v[39]*s5 + v[40]*s6 + v[41]*s7;
1033: t[idx+6] -= v[42]*s1 + v[43]*s2 + v[44]*s3 + v[45]*s4 + v[46]*s5 + v[47]*s6 + v[48]*s7;
1034: v -= 49;
1035: }
1036: }
1038: /* copy t into x according to permutation */
1039: ii = 0;
1040: for (i=0; i<n; i++) {
1041: ir = 7*r[i];
1042: x[ir] = t[ii];
1043: x[ir+1] = t[ii+1];
1044: x[ir+2] = t[ii+2];
1045: x[ir+3] = t[ii+3];
1046: x[ir+4] = t[ii+4];
1047: x[ir+5] = t[ii+5];
1048: x[ir+6] = t[ii+6];
1049: ii += 7;
1050: }
1052: ISRestoreIndices(isrow,&rout);
1053: ISRestoreIndices(iscol,&cout);
1054: VecRestoreArray(bb,&b);
1055: VecRestoreArray(xx,&x);
1056: PetscLogFlops(2*49*(a->nz) - 7*A->n);
1057: return(0);
1058: }
1060: /* ----------------------------------------------------------- */
1061: int MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)
1062: {
1063: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1064: IS iscol=a->col,isrow=a->row;
1065: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
1066: int nz,bs=a->bs,bs2=a->bs2,*rout,*cout;
1067: MatScalar *aa=a->a,*v;
1068: Scalar *x,*b,*s,*t,*ls;
1071: VecGetArray(bb,&b);
1072: VecGetArray(xx,&x);
1073: t = a->solve_work;
1075: ISGetIndices(isrow,&rout); r = rout;
1076: ISGetIndices(iscol,&cout); c = cout + (n-1);
1078: /* forward solve the lower triangular */
1079: PetscMemcpy(t,b+bs*(*r++),bs*sizeof(Scalar));
1080: for (i=1; i<n; i++) {
1081: v = aa + bs2*ai[i];
1082: vi = aj + ai[i];
1083: nz = a->diag[i] - ai[i];
1084: s = t + bs*i;
1085: PetscMemcpy(s,b+bs*(*r++),bs*sizeof(Scalar));
1086: while (nz--) {
1087: Kernel_v_gets_v_minus_A_times_w(bs,s,v,t+bs*(*vi++));
1088: v += bs2;
1089: }
1090: }
1091: /* backward solve the upper triangular */
1092: ls = a->solve_work + A->n;
1093: for (i=n-1; i>=0; i--){
1094: v = aa + bs2*(a->diag[i] + 1);
1095: vi = aj + a->diag[i] + 1;
1096: nz = ai[i+1] - a->diag[i] - 1;
1097: PetscMemcpy(ls,t+i*bs,bs*sizeof(Scalar));
1098: while (nz--) {
1099: Kernel_v_gets_v_minus_A_times_w(bs,ls,v,t+bs*(*vi++));
1100: v += bs2;
1101: }
1102: Kernel_w_gets_A_times_v(bs,ls,aa+bs2*a->diag[i],t+i*bs);
1103: PetscMemcpy(x + bs*(*c--),t+i*bs,bs*sizeof(Scalar));
1104: }
1106: ISRestoreIndices(isrow,&rout);
1107: ISRestoreIndices(iscol,&cout);
1108: VecRestoreArray(bb,&b);
1109: VecRestoreArray(xx,&x);
1110: PetscLogFlops(2*(a->bs2)*(a->nz) - a->bs*A->n);
1111: return(0);
1112: }
1114: int MatSolve_SeqBAIJ_7(Mat A,Vec bb,Vec xx)
1115: {
1116: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1117: IS iscol=a->col,isrow=a->row;
1118: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
1119: int *diag = a->diag;
1120: MatScalar *aa=a->a,*v;
1121: Scalar s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
1122: Scalar *x,*b,*t;
1125: VecGetArray(bb,&b);
1126: VecGetArray(xx,&x);
1127: t = a->solve_work;
1129: ISGetIndices(isrow,&rout); r = rout;
1130: ISGetIndices(iscol,&cout); c = cout + (n-1);
1132: /* forward solve the lower triangular */
1133: idx = 7*(*r++);
1134: t[0] = b[idx]; t[1] = b[1+idx];
1135: t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx];
1136: t[5] = b[5+idx]; t[6] = b[6+idx];
1138: for (i=1; i<n; i++) {
1139: v = aa + 49*ai[i];
1140: vi = aj + ai[i];
1141: nz = diag[i] - ai[i];
1142: idx = 7*(*r++);
1143: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1144: s5 = b[4+idx];s6 = b[5+idx];s7 = b[6+idx];
1145: while (nz--) {
1146: idx = 7*(*vi++);
1147: x1 = t[idx]; x2 = t[1+idx];x3 = t[2+idx];
1148: x4 = t[3+idx];x5 = t[4+idx];
1149: x6 = t[5+idx];x7 = t[6+idx];
1150: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1151: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1152: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1153: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1154: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1155: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1156: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1157: v += 49;
1158: }
1159: idx = 7*i;
1160: t[idx] = s1;t[1+idx] = s2;
1161: t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
1162: t[5+idx] = s6;t[6+idx] = s7;
1163: }
1164: /* backward solve the upper triangular */
1165: for (i=n-1; i>=0; i--){
1166: v = aa + 49*diag[i] + 49;
1167: vi = aj + diag[i] + 1;
1168: nz = ai[i+1] - diag[i] - 1;
1169: idt = 7*i;
1170: s1 = t[idt]; s2 = t[1+idt];
1171: s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
1172: s6 = t[5+idt];s7 = t[6+idt];
1173: while (nz--) {
1174: idx = 7*(*vi++);
1175: x1 = t[idx]; x2 = t[1+idx];
1176: x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
1177: x6 = t[5+idx]; x7 = t[6+idx];
1178: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1179: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1180: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1181: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1182: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1183: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1184: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1185: v += 49;
1186: }
1187: idc = 7*(*c--);
1188: v = aa + 49*diag[i];
1189: x[idc] = t[idt] = v[0]*s1+v[7]*s2+v[14]*s3+
1190: v[21]*s4+v[28]*s5+v[35]*s6+v[42]*s7;
1191: x[1+idc] = t[1+idt] = v[1]*s1+v[8]*s2+v[15]*s3+
1192: v[22]*s4+v[29]*s5+v[36]*s6+v[43]*s7;
1193: x[2+idc] = t[2+idt] = v[2]*s1+v[9]*s2+v[16]*s3+
1194: v[23]*s4+v[30]*s5+v[37]*s6+v[44]*s7;
1195: x[3+idc] = t[3+idt] = v[3]*s1+v[10]*s2+v[17]*s3+
1196: v[24]*s4+v[31]*s5+v[38]*s6+v[45]*s7;
1197: x[4+idc] = t[4+idt] = v[4]*s1+v[11]*s2+v[18]*s3+
1198: v[25]*s4+v[32]*s5+v[39]*s6+v[46]*s7;
1199: x[5+idc] = t[5+idt] = v[5]*s1+v[12]*s2+v[19]*s3+
1200: v[26]*s4+v[33]*s5+v[40]*s6+v[47]*s7;
1201: x[6+idc] = t[6+idt] = v[6]*s1+v[13]*s2+v[20]*s3+
1202: v[27]*s4+v[34]*s5+v[41]*s6+v[48]*s7;
1203: }
1205: ISRestoreIndices(isrow,&rout);
1206: ISRestoreIndices(iscol,&cout);
1207: VecRestoreArray(bb,&b);
1208: VecRestoreArray(xx,&x);
1209: PetscLogFlops(2*49*(a->nz) - 7*A->n);
1210: return(0);
1211: }
1213: int MatSolve_SeqBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
1214: {
1215: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1216: int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
1217: int ierr,*diag = a->diag,jdx;
1218: MatScalar *aa=a->a,*v;
1219: Scalar *x,*b,s1,s2,s3,s4,s5,s6,s7,x1,x2,x3,x4,x5,x6,x7;
1222: VecGetArray(bb,&b);
1223: VecGetArray(xx,&x);
1224: /* forward solve the lower triangular */
1225: idx = 0;
1226: x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx];
1227: x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
1228: x[6] = b[6+idx];
1229: for (i=1; i<n; i++) {
1230: v = aa + 49*ai[i];
1231: vi = aj + ai[i];
1232: nz = diag[i] - ai[i];
1233: idx = 7*i;
1234: s1 = b[idx]; s2 = b[1+idx]; s3 = b[2+idx];
1235: s4 = b[3+idx]; s5 = b[4+idx]; s6 = b[5+idx];
1236: s7 = b[6+idx];
1237: while (nz--) {
1238: jdx = 7*(*vi++);
1239: x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx];
1240: x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
1241: x7 = x[6+jdx];
1242: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1243: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1244: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1245: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1246: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1247: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1248: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1249: v += 49;
1250: }
1251: x[idx] = s1;
1252: x[1+idx] = s2;
1253: x[2+idx] = s3;
1254: x[3+idx] = s4;
1255: x[4+idx] = s5;
1256: x[5+idx] = s6;
1257: x[6+idx] = s7;
1258: }
1259: /* backward solve the upper triangular */
1260: for (i=n-1; i>=0; i--){
1261: v = aa + 49*diag[i] + 49;
1262: vi = aj + diag[i] + 1;
1263: nz = ai[i+1] - diag[i] - 1;
1264: idt = 7*i;
1265: s1 = x[idt]; s2 = x[1+idt];
1266: s3 = x[2+idt]; s4 = x[3+idt];
1267: s5 = x[4+idt]; s6 = x[5+idt];
1268: s7 = x[6+idt];
1269: while (nz--) {
1270: idx = 7*(*vi++);
1271: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1272: x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
1273: x7 = x[6+idx];
1274: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1275: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1276: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1277: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1278: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1279: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1280: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1281: v += 49;
1282: }
1283: v = aa + 49*diag[i];
1284: x[idt] = v[0]*s1 + v[7]*s2 + v[14]*s3 + v[21]*s4
1285: + v[28]*s5 + v[35]*s6 + v[42]*s7;
1286: x[1+idt] = v[1]*s1 + v[8]*s2 + v[15]*s3 + v[22]*s4
1287: + v[29]*s5 + v[36]*s6 + v[43]*s7;
1288: x[2+idt] = v[2]*s1 + v[9]*s2 + v[16]*s3 + v[23]*s4
1289: + v[30]*s5 + v[37]*s6 + v[44]*s7;
1290: x[3+idt] = v[3]*s1 + v[10]*s2 + v[17]*s3 + v[24]*s4
1291: + v[31]*s5 + v[38]*s6 + v[45]*s7;
1292: x[4+idt] = v[4]*s1 + v[11]*s2 + v[18]*s3 + v[25]*s4
1293: + v[32]*s5 + v[39]*s6 + v[46]*s7;
1294: x[5+idt] = v[5]*s1 + v[12]*s2 + v[19]*s3 + v[26]*s4
1295: + v[33]*s5 + v[40]*s6 + v[47]*s7;
1296: x[6+idt] = v[6]*s1 + v[13]*s2 + v[20]*s3 + v[27]*s4
1297: + v[34]*s5 + v[41]*s6 + v[48]*s7;
1298: }
1300: VecRestoreArray(bb,&b);
1301: VecRestoreArray(xx,&x);
1302: PetscLogFlops(2*36*(a->nz) - 6*A->n);
1303: return(0);
1304: }
1306: int MatSolve_SeqBAIJ_6(Mat A,Vec bb,Vec xx)
1307: {
1308: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1309: IS iscol=a->col,isrow=a->row;
1310: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
1311: int *diag = a->diag;
1312: MatScalar *aa=a->a,*v;
1313: Scalar *x,*b,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6,*t;
1316: VecGetArray(bb,&b);
1317: VecGetArray(xx,&x);
1318: t = a->solve_work;
1320: ISGetIndices(isrow,&rout); r = rout;
1321: ISGetIndices(iscol,&cout); c = cout + (n-1);
1323: /* forward solve the lower triangular */
1324: idx = 6*(*r++);
1325: t[0] = b[idx]; t[1] = b[1+idx];
1326: t[2] = b[2+idx]; t[3] = b[3+idx];
1327: t[4] = b[4+idx]; t[5] = b[5+idx];
1328: for (i=1; i<n; i++) {
1329: v = aa + 36*ai[i];
1330: vi = aj + ai[i];
1331: nz = diag[i] - ai[i];
1332: idx = 6*(*r++);
1333: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1334: s5 = b[4+idx]; s6 = b[5+idx];
1335: while (nz--) {
1336: idx = 6*(*vi++);
1337: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
1338: x4 = t[3+idx]; x5 = t[4+idx]; x6 = t[5+idx];
1339: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1340: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1341: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1342: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1343: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1344: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
1345: v += 36;
1346: }
1347: idx = 6*i;
1348: t[idx] = s1;t[1+idx] = s2;
1349: t[2+idx] = s3;t[3+idx] = s4;
1350: t[4+idx] = s5;t[5+idx] = s6;
1351: }
1352: /* backward solve the upper triangular */
1353: for (i=n-1; i>=0; i--){
1354: v = aa + 36*diag[i] + 36;
1355: vi = aj + diag[i] + 1;
1356: nz = ai[i+1] - diag[i] - 1;
1357: idt = 6*i;
1358: s1 = t[idt]; s2 = t[1+idt];
1359: s3 = t[2+idt];s4 = t[3+idt];
1360: s5 = t[4+idt];s6 = t[5+idt];
1361: while (nz--) {
1362: idx = 6*(*vi++);
1363: x1 = t[idx]; x2 = t[1+idx];
1364: x3 = t[2+idx]; x4 = t[3+idx];
1365: x5 = t[4+idx]; x6 = t[5+idx];
1366: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1367: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1368: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1369: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1370: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1371: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
1372: v += 36;
1373: }
1374: idc = 6*(*c--);
1375: v = aa + 36*diag[i];
1376: x[idc] = t[idt] = v[0]*s1+v[6]*s2+v[12]*s3+
1377: v[18]*s4+v[24]*s5+v[30]*s6;
1378: x[1+idc] = t[1+idt] = v[1]*s1+v[7]*s2+v[13]*s3+
1379: v[19]*s4+v[25]*s5+v[31]*s6;
1380: x[2+idc] = t[2+idt] = v[2]*s1+v[8]*s2+v[14]*s3+
1381: v[20]*s4+v[26]*s5+v[32]*s6;
1382: x[3+idc] = t[3+idt] = v[3]*s1+v[9]*s2+v[15]*s3+
1383: v[21]*s4+v[27]*s5+v[33]*s6;
1384: x[4+idc] = t[4+idt] = v[4]*s1+v[10]*s2+v[16]*s3+
1385: v[22]*s4+v[28]*s5+v[34]*s6;
1386: x[5+idc] = t[5+idt] = v[5]*s1+v[11]*s2+v[17]*s3+
1387: v[23]*s4+v[29]*s5+v[35]*s6;
1388: }
1390: ISRestoreIndices(isrow,&rout);
1391: ISRestoreIndices(iscol,&cout);
1392: VecRestoreArray(bb,&b);
1393: VecRestoreArray(xx,&x);
1394: PetscLogFlops(2*36*(a->nz) - 6*A->n);
1395: return(0);
1396: }
1398: int MatSolve_SeqBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
1399: {
1400: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1401: int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
1402: int ierr,*diag = a->diag,jdx;
1403: MatScalar *aa=a->a,*v;
1404: Scalar *x,*b,s1,s2,s3,s4,s5,s6,x1,x2,x3,x4,x5,x6;
1407: VecGetArray(bb,&b);
1408: VecGetArray(xx,&x);
1409: /* forward solve the lower triangular */
1410: idx = 0;
1411: x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx];
1412: x[3] = b[3+idx]; x[4] = b[4+idx]; x[5] = b[5+idx];
1413: for (i=1; i<n; i++) {
1414: v = aa + 36*ai[i];
1415: vi = aj + ai[i];
1416: nz = diag[i] - ai[i];
1417: idx = 6*i;
1418: s1 = b[idx]; s2 = b[1+idx]; s3 = b[2+idx];
1419: s4 = b[3+idx]; s5 = b[4+idx]; s6 = b[5+idx];
1420: while (nz--) {
1421: jdx = 6*(*vi++);
1422: x1 = x[jdx]; x2 = x[1+jdx]; x3 = x[2+jdx];
1423: x4 = x[3+jdx]; x5 = x[4+jdx]; x6 = x[5+jdx];
1424: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1425: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1426: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1427: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1428: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1429: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
1430: v += 36;
1431: }
1432: x[idx] = s1;
1433: x[1+idx] = s2;
1434: x[2+idx] = s3;
1435: x[3+idx] = s4;
1436: x[4+idx] = s5;
1437: x[5+idx] = s6;
1438: }
1439: /* backward solve the upper triangular */
1440: for (i=n-1; i>=0; i--){
1441: v = aa + 36*diag[i] + 36;
1442: vi = aj + diag[i] + 1;
1443: nz = ai[i+1] - diag[i] - 1;
1444: idt = 6*i;
1445: s1 = x[idt]; s2 = x[1+idt];
1446: s3 = x[2+idt]; s4 = x[3+idt];
1447: s5 = x[4+idt]; s6 = x[5+idt];
1448: while (nz--) {
1449: idx = 6*(*vi++);
1450: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1451: x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
1452: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1453: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1454: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1455: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1456: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1457: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
1458: v += 36;
1459: }
1460: v = aa + 36*diag[i];
1461: x[idt] = v[0]*s1 + v[6]*s2 + v[12]*s3 + v[18]*s4 + v[24]*s5 + v[30]*s6;
1462: x[1+idt] = v[1]*s1 + v[7]*s2 + v[13]*s3 + v[19]*s4 + v[25]*s5 + v[31]*s6;
1463: x[2+idt] = v[2]*s1 + v[8]*s2 + v[14]*s3 + v[20]*s4 + v[26]*s5 + v[32]*s6;
1464: x[3+idt] = v[3]*s1 + v[9]*s2 + v[15]*s3 + v[21]*s4 + v[27]*s5 + v[33]*s6;
1465: x[4+idt] = v[4]*s1 + v[10]*s2 + v[16]*s3 + v[22]*s4 + v[28]*s5 + v[34]*s6;
1466: x[5+idt] = v[5]*s1 + v[11]*s2 + v[17]*s3 + v[23]*s4 + v[29]*s5 + v[35]*s6;
1467: }
1469: VecRestoreArray(bb,&b);
1470: VecRestoreArray(xx,&x);
1471: PetscLogFlops(2*36*(a->nz) - 6*A->n);
1472: return(0);
1473: }
1475: int MatSolve_SeqBAIJ_5(Mat A,Vec bb,Vec xx)
1476: {
1477: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1478: IS iscol=a->col,isrow=a->row;
1479: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
1480: int *diag = a->diag;
1481: MatScalar *aa=a->a,*v;
1482: Scalar *x,*b,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5,*t;
1485: VecGetArray(bb,&b);
1486: VecGetArray(xx,&x);
1487: t = a->solve_work;
1489: ISGetIndices(isrow,&rout); r = rout;
1490: ISGetIndices(iscol,&cout); c = cout + (n-1);
1492: /* forward solve the lower triangular */
1493: idx = 5*(*r++);
1494: t[0] = b[idx]; t[1] = b[1+idx];
1495: t[2] = b[2+idx]; t[3] = b[3+idx]; t[4] = b[4+idx];
1496: for (i=1; i<n; i++) {
1497: v = aa + 25*ai[i];
1498: vi = aj + ai[i];
1499: nz = diag[i] - ai[i];
1500: idx = 5*(*r++);
1501: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1502: s5 = b[4+idx];
1503: while (nz--) {
1504: idx = 5*(*vi++);
1505: x1 = t[idx]; x2 = t[1+idx];x3 = t[2+idx];
1506: x4 = t[3+idx];x5 = t[4+idx];
1507: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1508: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1509: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1510: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1511: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
1512: v += 25;
1513: }
1514: idx = 5*i;
1515: t[idx] = s1;t[1+idx] = s2;
1516: t[2+idx] = s3;t[3+idx] = s4; t[4+idx] = s5;
1517: }
1518: /* backward solve the upper triangular */
1519: for (i=n-1; i>=0; i--){
1520: v = aa + 25*diag[i] + 25;
1521: vi = aj + diag[i] + 1;
1522: nz = ai[i+1] - diag[i] - 1;
1523: idt = 5*i;
1524: s1 = t[idt]; s2 = t[1+idt];
1525: s3 = t[2+idt];s4 = t[3+idt]; s5 = t[4+idt];
1526: while (nz--) {
1527: idx = 5*(*vi++);
1528: x1 = t[idx]; x2 = t[1+idx];
1529: x3 = t[2+idx]; x4 = t[3+idx]; x5 = t[4+idx];
1530: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1531: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1532: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1533: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1534: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
1535: v += 25;
1536: }
1537: idc = 5*(*c--);
1538: v = aa + 25*diag[i];
1539: x[idc] = t[idt] = v[0]*s1+v[5]*s2+v[10]*s3+
1540: v[15]*s4+v[20]*s5;
1541: x[1+idc] = t[1+idt] = v[1]*s1+v[6]*s2+v[11]*s3+
1542: v[16]*s4+v[21]*s5;
1543: x[2+idc] = t[2+idt] = v[2]*s1+v[7]*s2+v[12]*s3+
1544: v[17]*s4+v[22]*s5;
1545: x[3+idc] = t[3+idt] = v[3]*s1+v[8]*s2+v[13]*s3+
1546: v[18]*s4+v[23]*s5;
1547: x[4+idc] = t[4+idt] = v[4]*s1+v[9]*s2+v[14]*s3+
1548: v[19]*s4+v[24]*s5;
1549: }
1551: ISRestoreIndices(isrow,&rout);
1552: ISRestoreIndices(iscol,&cout);
1553: VecRestoreArray(bb,&b);
1554: VecRestoreArray(xx,&x);
1555: PetscLogFlops(2*25*(a->nz) - 5*A->n);
1556: return(0);
1557: }
1559: int MatSolve_SeqBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
1560: {
1561: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1562: int i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt;
1563: int ierr,*diag = a->diag,jdx;
1564: MatScalar *aa=a->a,*v;
1565: Scalar *x,*b,s1,s2,s3,s4,s5,x1,x2,x3,x4,x5;
1568: VecGetArray(bb,&b);
1569: VecGetArray(xx,&x);
1570: /* forward solve the lower triangular */
1571: idx = 0;
1572: x[0] = b[idx]; x[1] = b[1+idx]; x[2] = b[2+idx]; x[3] = b[3+idx];x[4] = b[4+idx];
1573: for (i=1; i<n; i++) {
1574: v = aa + 25*ai[i];
1575: vi = aj + ai[i];
1576: nz = diag[i] - ai[i];
1577: idx = 5*i;
1578: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];s5 = b[4+idx];
1579: while (nz--) {
1580: jdx = 5*(*vi++);
1581: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];x5 = x[4+jdx];
1582: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1583: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1584: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1585: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1586: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
1587: v += 25;
1588: }
1589: x[idx] = s1;
1590: x[1+idx] = s2;
1591: x[2+idx] = s3;
1592: x[3+idx] = s4;
1593: x[4+idx] = s5;
1594: }
1595: /* backward solve the upper triangular */
1596: for (i=n-1; i>=0; i--){
1597: v = aa + 25*diag[i] + 25;
1598: vi = aj + diag[i] + 1;
1599: nz = ai[i+1] - diag[i] - 1;
1600: idt = 5*i;
1601: s1 = x[idt]; s2 = x[1+idt];
1602: s3 = x[2+idt];s4 = x[3+idt]; s5 = x[4+idt];
1603: while (nz--) {
1604: idx = 5*(*vi++);
1605: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
1606: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1607: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1608: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1609: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1610: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
1611: v += 25;
1612: }
1613: v = aa + 25*diag[i];
1614: x[idt] = v[0]*s1 + v[5]*s2 + v[10]*s3 + v[15]*s4 + v[20]*s5;
1615: x[1+idt] = v[1]*s1 + v[6]*s2 + v[11]*s3 + v[16]*s4 + v[21]*s5;
1616: x[2+idt] = v[2]*s1 + v[7]*s2 + v[12]*s3 + v[17]*s4 + v[22]*s5;
1617: x[3+idt] = v[3]*s1 + v[8]*s2 + v[13]*s3 + v[18]*s4 + v[23]*s5;
1618: x[4+idt] = v[4]*s1 + v[9]*s2 + v[14]*s3 + v[19]*s4 + v[24]*s5;
1619: }
1621: VecRestoreArray(bb,&b);
1622: VecRestoreArray(xx,&x);
1623: PetscLogFlops(2*25*(a->nz) - 5*A->n);
1624: return(0);
1625: }
1627: int MatSolve_SeqBAIJ_4(Mat A,Vec bb,Vec xx)
1628: {
1629: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1630: IS iscol=a->col,isrow=a->row;
1631: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
1632: int *diag = a->diag;
1633: MatScalar *aa=a->a,*v;
1634: Scalar *x,*b,s1,s2,s3,s4,x1,x2,x3,x4,*t;
1637: VecGetArray(bb,&b);
1638: VecGetArray(xx,&x);
1639: t = a->solve_work;
1641: ISGetIndices(isrow,&rout); r = rout;
1642: ISGetIndices(iscol,&cout); c = cout + (n-1);
1644: /* forward solve the lower triangular */
1645: idx = 4*(*r++);
1646: t[0] = b[idx]; t[1] = b[1+idx];
1647: t[2] = b[2+idx]; t[3] = b[3+idx];
1648: for (i=1; i<n; i++) {
1649: v = aa + 16*ai[i];
1650: vi = aj + ai[i];
1651: nz = diag[i] - ai[i];
1652: idx = 4*(*r++);
1653: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1654: while (nz--) {
1655: idx = 4*(*vi++);
1656: x1 = t[idx];x2 = t[1+idx];x3 = t[2+idx];x4 = t[3+idx];
1657: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
1658: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
1659: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1660: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
1661: v += 16;
1662: }
1663: idx = 4*i;
1664: t[idx] = s1;t[1+idx] = s2;
1665: t[2+idx] = s3;t[3+idx] = s4;
1666: }
1667: /* backward solve the upper triangular */
1668: for (i=n-1; i>=0; i--){
1669: v = aa + 16*diag[i] + 16;
1670: vi = aj + diag[i] + 1;
1671: nz = ai[i+1] - diag[i] - 1;
1672: idt = 4*i;
1673: s1 = t[idt]; s2 = t[1+idt];
1674: s3 = t[2+idt];s4 = t[3+idt];
1675: while (nz--) {
1676: idx = 4*(*vi++);
1677: x1 = t[idx]; x2 = t[1+idx];
1678: x3 = t[2+idx]; x4 = t[3+idx];
1679: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
1680: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
1681: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1682: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
1683: v += 16;
1684: }
1685: idc = 4*(*c--);
1686: v = aa + 16*diag[i];
1687: x[idc] = t[idt] = v[0]*s1+v[4]*s2+v[8]*s3+v[12]*s4;
1688: x[1+idc] = t[1+idt] = v[1]*s1+v[5]*s2+v[9]*s3+v[13]*s4;
1689: x[2+idc] = t[2+idt] = v[2]*s1+v[6]*s2+v[10]*s3+v[14]*s4;
1690: x[3+idc] = t[3+idt] = v[3]*s1+v[7]*s2+v[11]*s3+v[15]*s4;
1691: }
1693: ISRestoreIndices(isrow,&rout);
1694: ISRestoreIndices(iscol,&cout);
1695: VecRestoreArray(bb,&b);
1696: VecRestoreArray(xx,&x);
1697: PetscLogFlops(2*16*(a->nz) - 4*A->n);
1698: return(0);
1699: }
1702: /*
1703: Special case where the matrix was ILU(0) factored in the natural
1704: ordering. This eliminates the need for the column and row permutation.
1705: */
1706: int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1707: {
1708: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1709: int n=a->mbs,*ai=a->i,*aj=a->j;
1710: int ierr,*diag = a->diag;
1711: MatScalar *aa=a->a;
1712: Scalar *x,*b;
1715: VecGetArray(bb,&b);
1716: VecGetArray(xx,&x);
1718: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
1719: {
1720: static Scalar w[2000]; /* very BAD need to fix */
1721: fortransolvebaij4blas_(&n,x,ai,aj,diag,aa,b,w);
1722: }
1723: #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
1724: {
1725: static Scalar w[2000]; /* very BAD need to fix */
1726: fortransolvebaij4_(&n,x,ai,aj,diag,aa,b,w);
1727: }
1728: #elif defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
1729: fortransolvebaij4unroll_(&n,x,ai,aj,diag,aa,b);
1730: #else
1731: {
1732: Scalar s1,s2,s3,s4,x1,x2,x3,x4;
1733: MatScalar *v;
1734: int jdx,idt,idx,nz,*vi,i,ai16;
1736: /* forward solve the lower triangular */
1737: idx = 0;
1738: x[0] = b[0]; x[1] = b[1]; x[2] = b[2]; x[3] = b[3];
1739: for (i=1; i<n; i++) {
1740: v = aa + 16*ai[i];
1741: vi = aj + ai[i];
1742: nz = diag[i] - ai[i];
1743: idx += 4;
1744: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];s4 = b[3+idx];
1745: while (nz--) {
1746: jdx = 4*(*vi++);
1747: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];x4 = x[3+jdx];
1748: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
1749: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
1750: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1751: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
1752: v += 16;
1753: }
1754: x[idx] = s1;
1755: x[1+idx] = s2;
1756: x[2+idx] = s3;
1757: x[3+idx] = s4;
1758: }
1759: /* backward solve the upper triangular */
1760: idt = 4*(n-1);
1761: for (i=n-1; i>=0; i--){
1762: ai16 = 16*diag[i];
1763: v = aa + ai16 + 16;
1764: vi = aj + diag[i] + 1;
1765: nz = ai[i+1] - diag[i] - 1;
1766: s1 = x[idt]; s2 = x[1+idt];
1767: s3 = x[2+idt];s4 = x[3+idt];
1768: while (nz--) {
1769: idx = 4*(*vi++);
1770: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; x4 = x[3+idx];
1771: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
1772: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
1773: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1774: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
1775: v += 16;
1776: }
1777: v = aa + ai16;
1778: x[idt] = v[0]*s1 + v[4]*s2 + v[8]*s3 + v[12]*s4;
1779: x[1+idt] = v[1]*s1 + v[5]*s2 + v[9]*s3 + v[13]*s4;
1780: x[2+idt] = v[2]*s1 + v[6]*s2 + v[10]*s3 + v[14]*s4;
1781: x[3+idt] = v[3]*s1 + v[7]*s2 + v[11]*s3 + v[15]*s4;
1782: idt -= 4;
1783: }
1784: }
1785: #endif
1787: VecRestoreArray(bb,&b);
1788: VecRestoreArray(xx,&x);
1789: PetscLogFlops(2*16*(a->nz) - 4*A->n);
1790: return(0);
1791: }
1793: int MatSolve_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
1794: {
1795: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1796: IS iscol=a->col,isrow=a->row;
1797: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
1798: int *diag = a->diag;
1799: MatScalar *aa=a->a,*v;
1800: Scalar *x,*b,s1,s2,s3,x1,x2,x3,*t;
1803: VecGetArray(bb,&b);
1804: VecGetArray(xx,&x);
1805: t = a->solve_work;
1807: ISGetIndices(isrow,&rout); r = rout;
1808: ISGetIndices(iscol,&cout); c = cout + (n-1);
1810: /* forward solve the lower triangular */
1811: idx = 3*(*r++);
1812: t[0] = b[idx]; t[1] = b[1+idx]; t[2] = b[2+idx];
1813: for (i=1; i<n; i++) {
1814: v = aa + 9*ai[i];
1815: vi = aj + ai[i];
1816: nz = diag[i] - ai[i];
1817: idx = 3*(*r++);
1818: s1 = b[idx]; s2 = b[1+idx]; s3 = b[2+idx];
1819: while (nz--) {
1820: idx = 3*(*vi++);
1821: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
1822: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1823: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1824: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1825: v += 9;
1826: }
1827: idx = 3*i;
1828: t[idx] = s1; t[1+idx] = s2; t[2+idx] = s3;
1829: }
1830: /* backward solve the upper triangular */
1831: for (i=n-1; i>=0; i--){
1832: v = aa + 9*diag[i] + 9;
1833: vi = aj + diag[i] + 1;
1834: nz = ai[i+1] - diag[i] - 1;
1835: idt = 3*i;
1836: s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];
1837: while (nz--) {
1838: idx = 3*(*vi++);
1839: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
1840: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1841: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1842: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1843: v += 9;
1844: }
1845: idc = 3*(*c--);
1846: v = aa + 9*diag[i];
1847: x[idc] = t[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3;
1848: x[1+idc] = t[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
1849: x[2+idc] = t[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
1850: }
1851: ISRestoreIndices(isrow,&rout);
1852: ISRestoreIndices(iscol,&cout);
1853: VecRestoreArray(bb,&b);
1854: VecRestoreArray(xx,&x);
1855: PetscLogFlops(2*9*(a->nz) - 3*A->n);
1856: return(0);
1857: }
1859: /*
1860: Special case where the matrix was ILU(0) factored in the natural
1861: ordering. This eliminates the need for the column and row permutation.
1862: */
1863: int MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1864: {
1865: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1866: int n=a->mbs,*ai=a->i,*aj=a->j;
1867: int ierr,*diag = a->diag;
1868: MatScalar *aa=a->a,*v;
1869: Scalar *x,*b,s1,s2,s3,x1,x2,x3;
1870: int jdx,idt,idx,nz,*vi,i;
1873: VecGetArray(bb,&b);
1874: VecGetArray(xx,&x);
1877: /* forward solve the lower triangular */
1878: idx = 0;
1879: x[0] = b[0]; x[1] = b[1]; x[2] = b[2];
1880: for (i=1; i<n; i++) {
1881: v = aa + 9*ai[i];
1882: vi = aj + ai[i];
1883: nz = diag[i] - ai[i];
1884: idx += 3;
1885: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];
1886: while (nz--) {
1887: jdx = 3*(*vi++);
1888: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
1889: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1890: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1891: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1892: v += 9;
1893: }
1894: x[idx] = s1;
1895: x[1+idx] = s2;
1896: x[2+idx] = s3;
1897: }
1898: /* backward solve the upper triangular */
1899: for (i=n-1; i>=0; i--){
1900: v = aa + 9*diag[i] + 9;
1901: vi = aj + diag[i] + 1;
1902: nz = ai[i+1] - diag[i] - 1;
1903: idt = 3*i;
1904: s1 = x[idt]; s2 = x[1+idt];
1905: s3 = x[2+idt];
1906: while (nz--) {
1907: idx = 3*(*vi++);
1908: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1909: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1910: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1911: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1912: v += 9;
1913: }
1914: v = aa + 9*diag[i];
1915: x[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3;
1916: x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
1917: x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
1918: }
1920: VecRestoreArray(bb,&b);
1921: VecRestoreArray(xx,&x);
1922: PetscLogFlops(2*9*(a->nz) - 3*A->n);
1923: return(0);
1924: }
1926: int MatSolve_SeqBAIJ_2(Mat A,Vec bb,Vec xx)
1927: {
1928: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1929: IS iscol=a->col,isrow=a->row;
1930: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,idx,idt,idc,*rout,*cout;
1931: int *diag = a->diag;
1932: MatScalar *aa=a->a,*v;
1933: Scalar *x,*b,s1,s2,x1,x2,*t;
1936: VecGetArray(bb,&b);
1937: VecGetArray(xx,&x);
1938: t = a->solve_work;
1940: ISGetIndices(isrow,&rout); r = rout;
1941: ISGetIndices(iscol,&cout); c = cout + (n-1);
1943: /* forward solve the lower triangular */
1944: idx = 2*(*r++);
1945: t[0] = b[idx]; t[1] = b[1+idx];
1946: for (i=1; i<n; i++) {
1947: v = aa + 4*ai[i];
1948: vi = aj + ai[i];
1949: nz = diag[i] - ai[i];
1950: idx = 2*(*r++);
1951: s1 = b[idx]; s2 = b[1+idx];
1952: while (nz--) {
1953: idx = 2*(*vi++);
1954: x1 = t[idx]; x2 = t[1+idx];
1955: s1 -= v[0]*x1 + v[2]*x2;
1956: s2 -= v[1]*x1 + v[3]*x2;
1957: v += 4;
1958: }
1959: idx = 2*i;
1960: t[idx] = s1; t[1+idx] = s2;
1961: }
1962: /* backward solve the upper triangular */
1963: for (i=n-1; i>=0; i--){
1964: v = aa + 4*diag[i] + 4;
1965: vi = aj + diag[i] + 1;
1966: nz = ai[i+1] - diag[i] - 1;
1967: idt = 2*i;
1968: s1 = t[idt]; s2 = t[1+idt];
1969: while (nz--) {
1970: idx = 2*(*vi++);
1971: x1 = t[idx]; x2 = t[1+idx];
1972: s1 -= v[0]*x1 + v[2]*x2;
1973: s2 -= v[1]*x1 + v[3]*x2;
1974: v += 4;
1975: }
1976: idc = 2*(*c--);
1977: v = aa + 4*diag[i];
1978: x[idc] = t[idt] = v[0]*s1 + v[2]*s2;
1979: x[1+idc] = t[1+idt] = v[1]*s1 + v[3]*s2;
1980: }
1981: ISRestoreIndices(isrow,&rout);
1982: ISRestoreIndices(iscol,&cout);
1983: VecRestoreArray(bb,&b);
1984: VecRestoreArray(xx,&x);
1985: PetscLogFlops(2*4*(a->nz) - 2*A->n);
1986: return(0);
1987: }
1989: /*
1990: Special case where the matrix was ILU(0) factored in the natural
1991: ordering. This eliminates the need for the column and row permutation.
1992: */
1993: int MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1994: {
1995: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1996: int n=a->mbs,*ai=a->i,*aj=a->j;
1997: int ierr,*diag = a->diag;
1998: MatScalar *aa=a->a,*v;
1999: Scalar *x,*b,s1,s2,x1,x2;
2000: int jdx,idt,idx,nz,*vi,i;
2003: VecGetArray(bb,&b);
2004: VecGetArray(xx,&x);
2006: /* forward solve the lower triangular */
2007: idx = 0;
2008: x[0] = b[0]; x[1] = b[1];
2009: for (i=1; i<n; i++) {
2010: v = aa + 4*ai[i];
2011: vi = aj + ai[i];
2012: nz = diag[i] - ai[i];
2013: idx += 2;
2014: s1 = b[idx];s2 = b[1+idx];
2015: while (nz--) {
2016: jdx = 2*(*vi++);
2017: x1 = x[jdx];x2 = x[1+jdx];
2018: s1 -= v[0]*x1 + v[2]*x2;
2019: s2 -= v[1]*x1 + v[3]*x2;
2020: v += 4;
2021: }
2022: x[idx] = s1;
2023: x[1+idx] = s2;
2024: }
2025: /* backward solve the upper triangular */
2026: for (i=n-1; i>=0; i--){
2027: v = aa + 4*diag[i] + 4;
2028: vi = aj + diag[i] + 1;
2029: nz = ai[i+1] - diag[i] - 1;
2030: idt = 2*i;
2031: s1 = x[idt]; s2 = x[1+idt];
2032: while (nz--) {
2033: idx = 2*(*vi++);
2034: x1 = x[idx]; x2 = x[1+idx];
2035: s1 -= v[0]*x1 + v[2]*x2;
2036: s2 -= v[1]*x1 + v[3]*x2;
2037: v += 4;
2038: }
2039: v = aa + 4*diag[i];
2040: x[idt] = v[0]*s1 + v[2]*s2;
2041: x[1+idt] = v[1]*s1 + v[3]*s2;
2042: }
2044: VecRestoreArray(bb,&b);
2045: VecRestoreArray(xx,&x);
2046: PetscLogFlops(2*4*(a->nz) - 2*A->n);
2047: return(0);
2048: }
2050: int MatSolve_SeqBAIJ_1(Mat A,Vec bb,Vec xx)
2051: {
2052: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
2053: IS iscol=a->col,isrow=a->row;
2054: int *r,*c,ierr,i,n=a->mbs,*vi,*ai=a->i,*aj=a->j,nz,*rout,*cout;
2055: int *diag = a->diag;
2056: MatScalar *aa=a->a,*v;
2057: Scalar *x,*b,s1,*t;
2060: if (!n) return(0);
2062: VecGetArray(bb,&b);
2063: VecGetArray(xx,&x);
2064: t = a->solve_work;
2066: ISGetIndices(isrow,&rout); r = rout;
2067: ISGetIndices(iscol,&cout); c = cout + (n-1);
2069: /* forward solve the lower triangular */
2070: t[0] = b[*r++];
2071: for (i=1; i<n; i++) {
2072: v = aa + ai[i];
2073: vi = aj + ai[i];
2074: nz = diag[i] - ai[i];
2075: s1 = b[*r++];
2076: while (nz--) {
2077: s1 -= (*v++)*t[*vi++];
2078: }
2079: t[i] = s1;
2080: }
2081: /* backward solve the upper triangular */
2082: for (i=n-1; i>=0; i--){
2083: v = aa + diag[i] + 1;
2084: vi = aj + diag[i] + 1;
2085: nz = ai[i+1] - diag[i] - 1;
2086: s1 = t[i];
2087: while (nz--) {
2088: s1 -= (*v++)*t[*vi++];
2089: }
2090: x[*c--] = t[i] = aa[diag[i]]*s1;
2091: }
2093: ISRestoreIndices(isrow,&rout);
2094: ISRestoreIndices(iscol,&cout);
2095: VecRestoreArray(bb,&b);
2096: VecRestoreArray(xx,&x);
2097: PetscLogFlops(2*1*(a->nz) - A->n);
2098: return(0);
2099: }
2100: /*
2101: Special case where the matrix was ILU(0) factored in the natural
2102: ordering. This eliminates the need for the column and row permutation.
2103: */
2104: int MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
2105: {
2106: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2107: int n=a->mbs,*ai=a->i,*aj=a->j;
2108: int ierr,*diag = a->diag;
2109: MatScalar *aa=a->a;
2110: Scalar *x,*b;
2111: Scalar s1,x1;
2112: MatScalar *v;
2113: int jdx,idt,idx,nz,*vi,i;
2116: VecGetArray(bb,&b);
2117: VecGetArray(xx,&x);
2119: /* forward solve the lower triangular */
2120: idx = 0;
2121: x[0] = b[0];
2122: for (i=1; i<n; i++) {
2123: v = aa + ai[i];
2124: vi = aj + ai[i];
2125: nz = diag[i] - ai[i];
2126: idx += 1;
2127: s1 = b[idx];
2128: while (nz--) {
2129: jdx = *vi++;
2130: x1 = x[jdx];
2131: s1 -= v[0]*x1;
2132: v += 1;
2133: }
2134: x[idx] = s1;
2135: }
2136: /* backward solve the upper triangular */
2137: for (i=n-1; i>=0; i--){
2138: v = aa + diag[i] + 1;
2139: vi = aj + diag[i] + 1;
2140: nz = ai[i+1] - diag[i] - 1;
2141: idt = i;
2142: s1 = x[idt];
2143: while (nz--) {
2144: idx = *vi++;
2145: x1 = x[idx];
2146: s1 -= v[0]*x1;
2147: v += 1;
2148: }
2149: v = aa + diag[i];
2150: x[idt] = v[0]*s1;
2151: }
2152: VecRestoreArray(bb,&b);
2153: VecRestoreArray(xx,&x);
2154: PetscLogFlops(2*(a->nz) - A->n);
2155: return(0);
2156: }
2158: /* ----------------------------------------------------------------*/
2159: /*
2160: This code is virtually identical to MatILUFactorSymbolic_SeqAIJ
2161: except that the data structure of Mat_SeqAIJ is slightly different.
2162: Not a good example of code reuse.
2163: */
2164: EXTERN int MatMissingDiagonal_SeqBAIJ(Mat);
2166: int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
2167: {
2168: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
2169: IS isicol;
2170: int *r,*ic,ierr,prow,n = a->mbs,*ai = a->i,*aj = a->j;
2171: int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
2172: int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
2173: int incrlev,nnz,i,bs = a->bs,bs2 = a->bs2,levels,diagonal_fill;
2174: PetscTruth col_identity,row_identity;
2175: PetscReal f;
2181: if (info) {
2182: f = info->fill;
2183: levels = (int)info->levels;
2184: diagonal_fill = (int)info->diagonal_fill;
2185: } else {
2186: f = 1.0;
2187: levels = 0;
2188: diagonal_fill = 0;
2189: }
2190: if (!isrow) {
2191: ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);
2192: }
2193: if (!iscol) {
2194: ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);
2195: }
2196: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
2197: ISIdentity(isrow,&row_identity);
2198: ISIdentity(iscol,&col_identity);
2200: if (!levels && row_identity && col_identity) { /* special case copy the nonzero structure */
2201: MatDuplicate_SeqBAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
2202: (*fact)->factor = FACTOR_LU;
2203: b = (Mat_SeqBAIJ*)(*fact)->data;
2204: if (!b->diag) {
2205: MatMarkDiagonal_SeqBAIJ(*fact);
2206: }
2207: MatMissingDiagonal_SeqBAIJ(*fact);
2208: b->row = isrow;
2209: b->col = iscol;
2210: ierr = PetscObjectReference((PetscObject)isrow);
2211: ierr = PetscObjectReference((PetscObject)iscol);
2212: b->icol = isicol;
2213: ierr = PetscMalloc(((*fact)->m+1+b->bs)*sizeof(Scalar),&b->solve_work);
2214: } else { /* general case perform the symbolic factorization */
2215: ISGetIndices(isrow,&r);
2216: ISGetIndices(isicol,&ic);
2218: /* get new row pointers */
2219: PetscMalloc((n+1)*sizeof(int),&ainew);
2220: ainew[0] = 0;
2221: /* don't know how many column pointers are needed so estimate */
2222: jmax = (int)(f*ai[n] + 1);
2223: PetscMalloc((jmax)*sizeof(int),&ajnew);
2224: /* ajfill is level of fill for each fill entry */
2225: PetscMalloc((jmax)*sizeof(int),&ajfill);
2226: /* fill is a linked list of nonzeros in active row */
2227: PetscMalloc((n+1)*sizeof(int),&fill);
2228: /* im is level for each filled value */
2229: PetscMalloc((n+1)*sizeof(int),&im);
2230: /* dloc is location of diagonal in factor */
2231: PetscMalloc((n+1)*sizeof(int),&dloc);
2232: dloc[0] = 0;
2233: for (prow=0; prow<n; prow++) {
2235: /* copy prow into linked list */
2236: nzf = nz = ai[r[prow]+1] - ai[r[prow]];
2237: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
2238: xi = aj + ai[r[prow]];
2239: fill[n] = n;
2240: fill[prow] = -1; /* marker for diagonal entry */
2241: while (nz--) {
2242: fm = n;
2243: idx = ic[*xi++];
2244: do {
2245: m = fm;
2246: fm = fill[m];
2247: } while (fm < idx);
2248: fill[m] = idx;
2249: fill[idx] = fm;
2250: im[idx] = 0;
2251: }
2253: /* make sure diagonal entry is included */
2254: if (diagonal_fill && fill[prow] == -1) {
2255: fm = n;
2256: while (fill[fm] < prow) fm = fill[fm];
2257: fill[prow] = fill[fm]; /* insert diagonal into linked list */
2258: fill[fm] = prow;
2259: im[prow] = 0;
2260: nzf++;
2261: dcount++;
2262: }
2264: nzi = 0;
2265: row = fill[n];
2266: while (row < prow) {
2267: incrlev = im[row] + 1;
2268: nz = dloc[row];
2269: xi = ajnew + ainew[row] + nz + 1;
2270: flev = ajfill + ainew[row] + nz + 1;
2271: nnz = ainew[row+1] - ainew[row] - nz - 1;
2272: fm = row;
2273: while (nnz-- > 0) {
2274: idx = *xi++;
2275: if (*flev + incrlev > levels) {
2276: flev++;
2277: continue;
2278: }
2279: do {
2280: m = fm;
2281: fm = fill[m];
2282: } while (fm < idx);
2283: if (fm != idx) {
2284: im[idx] = *flev + incrlev;
2285: fill[m] = idx;
2286: fill[idx] = fm;
2287: fm = idx;
2288: nzf++;
2289: } else {
2290: if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
2291: }
2292: flev++;
2293: }
2294: row = fill[row];
2295: nzi++;
2296: }
2297: /* copy new filled row into permanent storage */
2298: ainew[prow+1] = ainew[prow] + nzf;
2299: if (ainew[prow+1] > jmax) {
2301: /* estimate how much additional space we will need */
2302: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
2303: /* just double the memory each time */
2304: int maxadd = jmax;
2305: /* maxadd = (int)(((f*ai[n]+1)*(n-prow+5))/n); */
2306: if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
2307: jmax += maxadd;
2309: /* allocate a longer ajnew and ajfill */
2310: PetscMalloc(jmax*sizeof(int),&xi);
2311: PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int));
2312: PetscFree(ajnew);
2313: ajnew = xi;
2314: PetscMalloc(jmax*sizeof(int),&xi);
2315: PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int));
2316: PetscFree(ajfill);
2317: ajfill = xi;
2318: realloc++; /* count how many reallocations are needed */
2319: }
2320: xi = ajnew + ainew[prow];
2321: flev = ajfill + ainew[prow];
2322: dloc[prow] = nzi;
2323: fm = fill[n];
2324: while (nzf--) {
2325: *xi++ = fm;
2326: *flev++ = im[fm];
2327: fm = fill[fm];
2328: }
2329: /* make sure row has diagonal entry */
2330: if (ajnew[ainew[prow]+dloc[prow]] != prow) {
2331: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrixn
2332: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
2333: }
2334: }
2335: PetscFree(ajfill);
2336: ISRestoreIndices(isrow,&r);
2337: ISRestoreIndices(isicol,&ic);
2338: PetscFree(fill);
2339: PetscFree(im);
2341: {
2342: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
2343: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
2344: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Run with -pc_ilu_fill %g or use n",af);
2345: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:PCILUSetFill(pc,%g);n",af);
2346: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:for best performance.n");
2347: if (diagonal_fill) {
2348: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Detected and replace %d missing diagonals",dcount);
2349: }
2350: }
2352: /* put together the new matrix */
2353: MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);
2354: PetscLogObjectParent(*fact,isicol);
2355: b = (Mat_SeqBAIJ*)(*fact)->data;
2356: PetscFree(b->imax);
2357: b->singlemalloc = PETSC_FALSE;
2358: len = bs2*ainew[n]*sizeof(MatScalar);
2359: /* the next line frees the default space generated by the Create() */
2360: PetscFree(b->a);
2361: PetscFree(b->ilen);
2362: PetscMalloc(len,&b->a);
2363: b->j = ajnew;
2364: b->i = ainew;
2365: for (i=0; i<n; i++) dloc[i] += ainew[i];
2366: b->diag = dloc;
2367: b->ilen = 0;
2368: b->imax = 0;
2369: b->row = isrow;
2370: b->col = iscol;
2371: ierr = PetscObjectReference((PetscObject)isrow);
2372: ierr = PetscObjectReference((PetscObject)iscol);
2373: b->icol = isicol;
2374: PetscMalloc((bs*n+bs)*sizeof(Scalar),&b->solve_work);
2375: /* In b structure: Free imax, ilen, old a, old j.
2376: Allocate dloc, solve_work, new a, new j */
2377: PetscLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs2*ainew[n]*sizeof(Scalar));
2378: b->maxnz = b->nz = ainew[n];
2379: (*fact)->factor = FACTOR_LU;
2381: (*fact)->info.factor_mallocs = realloc;
2382: (*fact)->info.fill_ratio_given = f;
2383: (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
2384: }
2386: if (row_identity && col_identity) {
2387: switch (b->bs) {
2388: case 2:
2389: (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
2390: (*fact)->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering;
2391: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2n");
2392: break;
2393: case 3:
2394: (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
2395: (*fact)->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering;
2396: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:sing special in-place natural ordering factor and solve BS=3n");
2397: break;
2398: case 4:
2399: (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
2400: (*fact)->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
2401: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4n");
2402: break;
2403: case 5:
2404: (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
2405: (*fact)->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering;
2406: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5n");
2407: break;
2408: case 6:
2409: (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
2410: (*fact)->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering;
2411: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6n");
2412: break;
2413: case 7:
2414: (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
2415: (*fact)->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering;
2416: PetscLogInfo(A,"MatILUFactorSymbolic_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7n");
2417: break;
2418: }
2419: }
2420: return(0);
2421: }