Actual source code: baijfact.c

  1: /*$Id: baijfact.c,v 1.90 2001/03/23 23:22:07 balay Exp $*/
  2: /*
  3:     Factorization code for BAIJ format. 
  4: */
 5:  #include src/mat/impls/baij/seq/baij.h
 6:  #include src/vec/vecimpl.h
 7:  #include src/inline/ilu.h

  9: /* ------------------------------------------------------------*/
 10: /*
 11:       Version for when blocks are 2 by 2
 12: */
 15: int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
 16: {
 17:   Mat                C = *B;
 18:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
 19:   IS                 isrow = b->row,isicol = b->icol;
 20:   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
 21:   int                *ajtmpold,*ajtmp,nz,row;
 22:   int                *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
 23:   MatScalar          *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
 24:   MatScalar          p1,p2,p3,p4;
 25:   MatScalar          *ba = b->a,*aa = a->a;

 28:   ISGetIndices(isrow,&r);
 29:   ISGetIndices(isicol,&ic);
 30:   PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);

 32:   for (i=0; i<n; i++) {
 33:     nz    = bi[i+1] - bi[i];
 34:     ajtmp = bj + bi[i];
 35:     for  (j=0; j<nz; j++) {
 36:       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
 37:     }
 38:     /* load in initial (unfactored row) */
 39:     idx      = r[i];
 40:     nz       = ai[idx+1] - ai[idx];
 41:     ajtmpold = aj + ai[idx];
 42:     v        = aa + 4*ai[idx];
 43:     for (j=0; j<nz; j++) {
 44:       x    = rtmp+4*ic[ajtmpold[j]];
 45:       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
 46:       v    += 4;
 47:     }
 48:     row = *ajtmp++;
 49:     while (row < i) {
 50:       pc = rtmp + 4*row;
 51:       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
 52:       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
 53:         pv = ba + 4*diag_offset[row];
 54:         pj = bj + diag_offset[row] + 1;
 55:         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
 56:         pc[0] = m1 = p1*x1 + p3*x2;
 57:         pc[1] = m2 = p2*x1 + p4*x2;
 58:         pc[2] = m3 = p1*x3 + p3*x4;
 59:         pc[3] = m4 = p2*x3 + p4*x4;
 60:         nz = bi[row+1] - diag_offset[row] - 1;
 61:         pv += 4;
 62:         for (j=0; j<nz; j++) {
 63:           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
 64:           x    = rtmp + 4*pj[j];
 65:           x[0] -= m1*x1 + m3*x2;
 66:           x[1] -= m2*x1 + m4*x2;
 67:           x[2] -= m1*x3 + m3*x4;
 68:           x[3] -= m2*x3 + m4*x4;
 69:           pv   += 4;
 70:         }
 71:         PetscLogFlops(16*nz+12);
 72:       }
 73:       row = *ajtmp++;
 74:     }
 75:     /* finished row so stick it into b->a */
 76:     pv = ba + 4*bi[i];
 77:     pj = bj + bi[i];
 78:     nz = bi[i+1] - bi[i];
 79:     for (j=0; j<nz; j++) {
 80:       x     = rtmp+4*pj[j];
 81:       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
 82:       pv   += 4;
 83:     }
 84:     /* invert diagonal block */
 85:     w = ba + 4*diag_offset[i];
 86:     Kernel_A_gets_inverse_A_2(w);
 87:   }

 89:   PetscFree(rtmp);
 90:   ISRestoreIndices(isicol,&ic);
 91:   ISRestoreIndices(isrow,&r);
 92:   C->factor = FACTOR_LU;
 93:   C->assembled = PETSC_TRUE;
 94:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
 95:   return(0);
 96: }
 97: /*
 98:       Version for when blocks are 2 by 2 Using natural ordering
 99: */
102: int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
103: {
104:   Mat                C = *B;
105:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
106:   int                ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
107:   int                *ajtmpold,*ajtmp,nz,row;
108:   int                *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
109:   MatScalar          *pv,*v,*rtmp,*pc,*w,*x;
110:   MatScalar          p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
111:   MatScalar          *ba = b->a,*aa = a->a;

114:   PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);

116:   for (i=0; i<n; i++) {
117:     nz    = bi[i+1] - bi[i];
118:     ajtmp = bj + bi[i];
119:     for  (j=0; j<nz; j++) {
120:       x = rtmp+4*ajtmp[j];
121:       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
122:     }
123:     /* load in initial (unfactored row) */
124:     nz       = ai[i+1] - ai[i];
125:     ajtmpold = aj + ai[i];
126:     v        = aa + 4*ai[i];
127:     for (j=0; j<nz; j++) {
128:       x    = rtmp+4*ajtmpold[j];
129:       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
130:       v    += 4;
131:     }
132:     row = *ajtmp++;
133:     while (row < i) {
134:       pc  = rtmp + 4*row;
135:       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
136:       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
137:         pv = ba + 4*diag_offset[row];
138:         pj = bj + diag_offset[row] + 1;
139:         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
140:         pc[0] = m1 = p1*x1 + p3*x2;
141:         pc[1] = m2 = p2*x1 + p4*x2;
142:         pc[2] = m3 = p1*x3 + p3*x4;
143:         pc[3] = m4 = p2*x3 + p4*x4;
144:         nz = bi[row+1] - diag_offset[row] - 1;
145:         pv += 4;
146:         for (j=0; j<nz; j++) {
147:           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
148:           x    = rtmp + 4*pj[j];
149:           x[0] -= m1*x1 + m3*x2;
150:           x[1] -= m2*x1 + m4*x2;
151:           x[2] -= m1*x3 + m3*x4;
152:           x[3] -= m2*x3 + m4*x4;
153:           pv   += 4;
154:         }
155:         PetscLogFlops(16*nz+12);
156:       }
157:       row = *ajtmp++;
158:     }
159:     /* finished row so stick it into b->a */
160:     pv = ba + 4*bi[i];
161:     pj = bj + bi[i];
162:     nz = bi[i+1] - bi[i];
163:     for (j=0; j<nz; j++) {
164:       x      = rtmp+4*pj[j];
165:       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
166:       pv   += 4;
167:     }
168:     /* invert diagonal block */
169:     w = ba + 4*diag_offset[i];
170:     Kernel_A_gets_inverse_A_2(w);
171:     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
172:   }

174:   PetscFree(rtmp);
175:   C->factor    = FACTOR_LU;
176:   C->assembled = PETSC_TRUE;
177:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
178:   return(0);
179: }

181: /* ----------------------------------------------------------- */
182: /*
183:      Version for when blocks are 1 by 1.
184: */
187: int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
188: {
189:   Mat                C = *B;
190:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
191:   IS                 isrow = b->row,isicol = b->icol;
192:   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
193:   int                *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
194:   int                *diag_offset = b->diag,diag,*pj;
195:   MatScalar          *pv,*v,*rtmp,multiplier,*pc;
196:   MatScalar          *ba = b->a,*aa = a->a;

199:   ISGetIndices(isrow,&r);
200:   ISGetIndices(isicol,&ic);
201:   PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);

203:   for (i=0; i<n; i++) {
204:     nz    = bi[i+1] - bi[i];
205:     ajtmp = bj + bi[i];
206:     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;

208:     /* load in initial (unfactored row) */
209:     nz       = ai[r[i]+1] - ai[r[i]];
210:     ajtmpold = aj + ai[r[i]];
211:     v        = aa + ai[r[i]];
212:     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];

214:     row = *ajtmp++;
215:     while (row < i) {
216:       pc = rtmp + row;
217:       if (*pc != 0.0) {
218:         pv         = ba + diag_offset[row];
219:         pj         = bj + diag_offset[row] + 1;
220:         multiplier = *pc * *pv++;
221:         *pc        = multiplier;
222:         nz         = bi[row+1] - diag_offset[row] - 1;
223:         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
224:         PetscLogFlops(1+2*nz);
225:       }
226:       row = *ajtmp++;
227:     }
228:     /* finished row so stick it into b->a */
229:     pv = ba + bi[i];
230:     pj = bj + bi[i];
231:     nz = bi[i+1] - bi[i];
232:     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
233:     diag = diag_offset[i] - bi[i];
234:     /* check pivot entry for current row */
235:     if (pv[diag] == 0.0) {
236:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
237:     }
238:     pv[diag] = 1.0/pv[diag];
239:   }

241:   PetscFree(rtmp);
242:   ISRestoreIndices(isicol,&ic);
243:   ISRestoreIndices(isrow,&r);
244:   C->factor    = FACTOR_LU;
245:   C->assembled = PETSC_TRUE;
246:   PetscLogFlops(C->n);
247:   return(0);
248: }


251: /* ----------------------------------------------------------- */
254: int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
255: {
256:   int         ierr;
257:   Mat         C;

260:   MatLUFactorSymbolic(A,row,col,info,&C);
261:   MatLUFactorNumeric(A,&C);
262:   MatHeaderCopy(A,C);
263:   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
264:   return(0);
265: }