Actual source code: baijfact.c
1: /*$Id: baijfact.c,v 1.90 2001/03/23 23:22:07 balay Exp $*/
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include src/mat/impls/baij/seq/baij.h
6: #include src/vec/vecimpl.h
7: #include src/inline/ilu.h
9: /* ------------------------------------------------------------*/
10: /*
11: Version for when blocks are 2 by 2
12: */
13: #undef __FUNCT__
15: int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
16: {
17: Mat C = *B;
18: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
19: IS isrow = b->row,isicol = b->icol;
20: int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
21: int *ajtmpold,*ajtmp,nz,row;
22: int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
23: MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
24: MatScalar p1,p2,p3,p4;
25: MatScalar *ba = b->a,*aa = a->a;
28: ierr = ISGetIndices(isrow,&r);
29: ierr = ISGetIndices(isicol,&ic);
30: ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);
32: for (i=0; i<n; i++) {
33: nz = bi[i+1] - bi[i];
34: ajtmp = bj + bi[i];
35: for (j=0; j<nz; j++) {
36: x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
37: }
38: /* load in initial (unfactored row) */
39: idx = r[i];
40: nz = ai[idx+1] - ai[idx];
41: ajtmpold = aj + ai[idx];
42: v = aa + 4*ai[idx];
43: for (j=0; j<nz; j++) {
44: x = rtmp+4*ic[ajtmpold[j]];
45: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
46: v += 4;
47: }
48: row = *ajtmp++;
49: while (row < i) {
50: pc = rtmp + 4*row;
51: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
52: if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
53: pv = ba + 4*diag_offset[row];
54: pj = bj + diag_offset[row] + 1;
55: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
56: pc[0] = m1 = p1*x1 + p3*x2;
57: pc[1] = m2 = p2*x1 + p4*x2;
58: pc[2] = m3 = p1*x3 + p3*x4;
59: pc[3] = m4 = p2*x3 + p4*x4;
60: nz = bi[row+1] - diag_offset[row] - 1;
61: pv += 4;
62: for (j=0; j<nz; j++) {
63: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
64: x = rtmp + 4*pj[j];
65: x[0] -= m1*x1 + m3*x2;
66: x[1] -= m2*x1 + m4*x2;
67: x[2] -= m1*x3 + m3*x4;
68: x[3] -= m2*x3 + m4*x4;
69: pv += 4;
70: }
71: PetscLogFlops(16*nz+12);
72: }
73: row = *ajtmp++;
74: }
75: /* finished row so stick it into b->a */
76: pv = ba + 4*bi[i];
77: pj = bj + bi[i];
78: nz = bi[i+1] - bi[i];
79: for (j=0; j<nz; j++) {
80: x = rtmp+4*pj[j];
81: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
82: pv += 4;
83: }
84: /* invert diagonal block */
85: w = ba + 4*diag_offset[i];
86: Kernel_A_gets_inverse_A_2(w);
87: }
89: PetscFree(rtmp);
90: ISRestoreIndices(isicol,&ic);
91: ISRestoreIndices(isrow,&r);
92: C->factor = FACTOR_LU;
93: C->assembled = PETSC_TRUE;
94: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
95: return(0);
96: }
97: /*
98: Version for when blocks are 2 by 2 Using natural ordering
99: */
100: #undef __FUNCT__
102: int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
103: {
104: Mat C = *B;
105: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
106: int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
107: int *ajtmpold,*ajtmp,nz,row;
108: int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109: MatScalar *pv,*v,*rtmp,*pc,*w,*x;
110: MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
111: MatScalar *ba = b->a,*aa = a->a;
114: PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);
116: for (i=0; i<n; i++) {
117: nz = bi[i+1] - bi[i];
118: ajtmp = bj + bi[i];
119: for (j=0; j<nz; j++) {
120: x = rtmp+4*ajtmp[j];
121: x[0] = x[1] = x[2] = x[3] = 0.0;
122: }
123: /* load in initial (unfactored row) */
124: nz = ai[i+1] - ai[i];
125: ajtmpold = aj + ai[i];
126: v = aa + 4*ai[i];
127: for (j=0; j<nz; j++) {
128: x = rtmp+4*ajtmpold[j];
129: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
130: v += 4;
131: }
132: row = *ajtmp++;
133: while (row < i) {
134: pc = rtmp + 4*row;
135: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
136: if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
137: pv = ba + 4*diag_offset[row];
138: pj = bj + diag_offset[row] + 1;
139: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
140: pc[0] = m1 = p1*x1 + p3*x2;
141: pc[1] = m2 = p2*x1 + p4*x2;
142: pc[2] = m3 = p1*x3 + p3*x4;
143: pc[3] = m4 = p2*x3 + p4*x4;
144: nz = bi[row+1] - diag_offset[row] - 1;
145: pv += 4;
146: for (j=0; j<nz; j++) {
147: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
148: x = rtmp + 4*pj[j];
149: x[0] -= m1*x1 + m3*x2;
150: x[1] -= m2*x1 + m4*x2;
151: x[2] -= m1*x3 + m3*x4;
152: x[3] -= m2*x3 + m4*x4;
153: pv += 4;
154: }
155: PetscLogFlops(16*nz+12);
156: }
157: row = *ajtmp++;
158: }
159: /* finished row so stick it into b->a */
160: pv = ba + 4*bi[i];
161: pj = bj + bi[i];
162: nz = bi[i+1] - bi[i];
163: for (j=0; j<nz; j++) {
164: x = rtmp+4*pj[j];
165: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
166: pv += 4;
167: }
168: /* invert diagonal block */
169: w = ba + 4*diag_offset[i];
170: Kernel_A_gets_inverse_A_2(w);
171: /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
172: }
174: PetscFree(rtmp);
175: C->factor = FACTOR_LU;
176: C->assembled = PETSC_TRUE;
177: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
178: return(0);
179: }
181: /* ----------------------------------------------------------- */
182: /*
183: Version for when blocks are 1 by 1.
184: */
185: #undef __FUNCT__
187: int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
188: {
189: Mat C = *B;
190: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
191: IS isrow = b->row,isicol = b->icol;
192: int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
193: int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
194: int *diag_offset = b->diag,diag,*pj;
195: MatScalar *pv,*v,*rtmp,multiplier,*pc;
196: MatScalar *ba = b->a,*aa = a->a;
199: ierr = ISGetIndices(isrow,&r);
200: ierr = ISGetIndices(isicol,&ic);
201: ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
203: for (i=0; i<n; i++) {
204: nz = bi[i+1] - bi[i];
205: ajtmp = bj + bi[i];
206: for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
208: /* load in initial (unfactored row) */
209: nz = ai[r[i]+1] - ai[r[i]];
210: ajtmpold = aj + ai[r[i]];
211: v = aa + ai[r[i]];
212: for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j];
214: row = *ajtmp++;
215: while (row < i) {
216: pc = rtmp + row;
217: if (*pc != 0.0) {
218: pv = ba + diag_offset[row];
219: pj = bj + diag_offset[row] + 1;
220: multiplier = *pc * *pv++;
221: *pc = multiplier;
222: nz = bi[row+1] - diag_offset[row] - 1;
223: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
224: PetscLogFlops(1+2*nz);
225: }
226: row = *ajtmp++;
227: }
228: /* finished row so stick it into b->a */
229: pv = ba + bi[i];
230: pj = bj + bi[i];
231: nz = bi[i+1] - bi[i];
232: for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
233: diag = diag_offset[i] - bi[i];
234: /* check pivot entry for current row */
235: if (pv[diag] == 0.0) {
236: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
237: }
238: pv[diag] = 1.0/pv[diag];
239: }
241: PetscFree(rtmp);
242: ISRestoreIndices(isicol,&ic);
243: ISRestoreIndices(isrow,&r);
244: C->factor = FACTOR_LU;
245: C->assembled = PETSC_TRUE;
246: PetscLogFlops(C->n);
247: return(0);
248: }
251: /* ----------------------------------------------------------- */
252: #undef __FUNCT__
254: int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatLUInfo *info)
255: {
256: int ierr;
257: Mat C;
260: MatLUFactorSymbolic(A,row,col,info,&C);
261: MatLUFactorNumeric(A,&C);
262: MatHeaderCopy(A,C);
263: PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
264: return(0);
265: }