Actual source code: sbaijfact.c

  1: /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/mat/impls/sbaij/seq/sbaij.h
 5:  #include src/vec/vecimpl.h
 6:  #include src/inline/ilu.h
 7:  #include include/petscis.h

  9: #if !defined(PETSC_USE_COMPLEX)
 10: /* 
 11:   input:
 12:    F -- numeric factor 
 13:   output:
 14:    nneg, nzero, npos: matrix inertia 
 15: */

 17: #undef __FUNCT__  
 19: int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos)
 20: {
 21:   Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
 22:   PetscScalar  *dd = fact_ptr->a;
 23:   int          m = F->m,i;

 26:   if (nneig){
 27:     *nneig = 0;
 28:     for (i=0; i<m; i++){
 29:       if (PetscRealPart(dd[i]) < 0.0) (*nneig)++;
 30:     }
 31:   }
 32:   if (nzero){
 33:     *nzero = 0;
 34:     for (i=0; i<m; i++){
 35:       if (PetscRealPart(dd[i]) == 0.0) (*nzero)++;
 36:     }
 37:   }
 38:   if (npos){
 39:     *npos = 0;
 40:     for (i=0; i<m; i++){
 41:       if (PetscRealPart(dd[i]) > 0.0) (*npos)++;
 42:     }
 43:   }
 44:   return(0);
 45: }
 46: #endif /* !defined(PETSC_USE_COMPLEX) */

 48: /* Using Modified Sparse Row (MSR) storage.
 49: See page 85, "Iterative Methods ..." by Saad. */
 50: /*
 51:     Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 52: */
 53: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
 54: #undef __FUNCT__  
 56: int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B)
 57: {
 58:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
 59:   int          *rip,ierr,i,mbs = a->mbs,*ai,*aj;
 60:   int          *jutmp,bs = a->bs,bs2=a->bs2;
 61:   int          m,realloc = 0,prow;
 62:   int          *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
 63:   int          *il,ili,nextprow;
 64:   PetscTruth   perm_identity;

 67:   /* check whether perm is the identity mapping */
 68:   ISIdentity(perm,&perm_identity);

 70:   /* -- inplace factorization, i.e., use sbaij for *B -- */
 71:   if (perm_identity && bs==1 ){
 72:     if (!perm_identity) a->permute = PETSC_TRUE;
 73: 
 74:   ISGetIndices(perm,&rip);
 75: 
 76:   if (perm_identity){ /* without permutation */
 77:     ai = a->i; aj = a->j;
 78:   } else {            /* non-trivial permutation */
 79:     MatReorderingSeqSBAIJ(A,perm);
 80:     ai = a->inew; aj = a->jnew;
 81:   }
 82: 
 83:   /* initialization */
 84:   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);
 85:   umax  = (int)(f*ai[mbs] + 1);
 86:   ierr  = PetscMalloc(umax*sizeof(int),&ju);
 87:   iu[0] = 0;
 88:   juidx = 0; /* index for ju */
 89:   ierr  = PetscMalloc((3*mbs+1)*sizeof(int),&jl); /* linked list for getting pivot row */
 90:   q     = jl + mbs;   /* linked list for col index of active row */
 91:   il    = q  + mbs;
 92:   for (i=0; i<mbs; i++){
 93:     jl[i] = mbs;
 94:     q[i]  = 0;
 95:     il[i] = 0;
 96:   }

 98:   /* for each row k */
 99:   for (k=0; k<mbs; k++){
100:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
101:     q[k] = mbs;
102:     /* initialize nonzero structure of k-th row to row rip[k] of A */
103:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
104:     jmax = ai[rip[k]+1];
105:     for (j=jmin; j<jmax; j++){
106:       vj = rip[aj[j]]; /* col. value */
107:       if(vj > k){
108:         qm = k;
109:         do {
110:           m  = qm; qm = q[m];
111:         } while(qm < vj);
112:         if (qm == vj) {
113:           SETERRQ(1," error: duplicate entry in An");
114:         }
115:         nzk++;
116:         q[m]  = vj;
117:         q[vj] = qm;
118:       } /* if(vj > k) */
119:     } /* for (j=jmin; j<jmax; j++) */

121:     /* modify nonzero structure of k-th row by computing fill-in
122:        for each row i to be merged in */
123:     prow = k;
124:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
125: 
126:     while (prow < k){
127:       nextprow = jl[prow];
128: 
129:       /* merge row prow into k-th row */
130:       ili = il[prow];
131:       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
132:       jmax = iu[prow+1];
133:       qm = k;
134:       for (j=jmin; j<jmax; j++){
135:         vj = ju[j];
136:         do {
137:           m = qm; qm = q[m];
138:         } while (qm < vj);
139:         if (qm != vj){  /* a fill */
140:           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
141:         }
142:       } /* end of for (j=jmin; j<jmax; j++) */
143:       if (jmin < jmax){
144:         il[prow] = jmin;
145:         j = ju[jmin];
146:         jl[prow] = jl[j]; jl[j] = prow;  /* update jl */
147:       }
148:       prow = nextprow;
149:     }
150: 
151:     /* update il and jl */
152:     if (nzk > 0){
153:       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
154:       jl[k] = jl[i]; jl[i] = k;
155:       il[k] = iu[k] + 1;
156:     }
157:     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */

159:     /* allocate more space to ju if needed */
160:     if (iu[k+1] > umax) {
161:       /* estimate how much additional space we will need */
162:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
163:       /* just double the memory each time */
164:       maxadd = umax;
165:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
166:       umax += maxadd;

168:       /* allocate a longer ju */
169:       PetscMalloc(umax*sizeof(int),&jutmp);
170:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
171:       PetscFree(ju);
172:       ju   = jutmp;
173:       realloc++; /* count how many times we realloc */
174:     }

176:     /* save nonzero structure of k-th row in ju */
177:     ju[juidx++] = k; /* diag[k] */
178:     i = k;
179:     while (nzk --) {
180:       i           = q[i];
181:       ju[juidx++] = i;
182:     }
183:   }

185:   if (ai[mbs] != 0) {
186:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
187:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
188:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use n",af);
189:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);n",af);
190:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.n");
191:   } else {
192:      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.n");
193:   }

195:   ISRestoreIndices(perm,&rip);
196:   /* PetscFree(q); */
197:   PetscFree(jl);

199:   /* put together the new matrix */
200:   MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
201:   /* PetscLogObjectParent(*B,iperm); */
202:   b = (Mat_SeqSBAIJ*)(*B)->data;
203:   PetscFree(b->imax);
204:   b->singlemalloc = PETSC_FALSE;
205:   /* the next line frees the default space generated by the Create() */
206:   PetscFree(b->a);
207:   PetscFree(b->ilen);
208:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
209:   b->j    = ju;
210:   b->i    = iu;
211:   b->diag = 0;
212:   b->ilen = 0;
213:   b->imax = 0;
214:   b->row  = perm;
215:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */
216:   ierr    = PetscObjectReference((PetscObject)perm);
217:   b->icol = perm;
218:   ierr    = PetscObjectReference((PetscObject)perm);
219:   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
220:   /* In b structure:  Free imax, ilen, old a, old j.  
221:      Allocate idnew, solve_work, new a, new j */
222:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
223:   b->s_maxnz = b->s_nz = iu[mbs];
224: 
225:   (*B)->factor                 = FACTOR_CHOLESKY;
226:   (*B)->info.factor_mallocs    = realloc;
227:   (*B)->info.fill_ratio_given  = f;
228:   if (ai[mbs] != 0) {
229:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
230:   } else {
231:     (*B)->info.fill_ratio_needed = 0.0;
232:   }


235:   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace;
236:   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
237:   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1n");
238: 
239:   return(0);
240:   }
241:   /* -----------  end of new code --------------------*/


244:   if (!perm_identity) a->permute = PETSC_TRUE;
245: 
246:   ISGetIndices(perm,&rip);
247: 
248:   if (perm_identity){ /* without permutation */
249:     ai = a->i; aj = a->j;
250:   } else {            /* non-trivial permutation */
251:     MatReorderingSeqSBAIJ(A,perm);
252:     ai = a->inew; aj = a->jnew;
253:   }
254: 
255:   /* initialization */
256:   ierr  = PetscMalloc((mbs+1)*sizeof(int),&iu);
257:   umax  = (int)(f*ai[mbs] + 1); umax += mbs + 1;
258:   ierr  = PetscMalloc(umax*sizeof(int),&ju);
259:   iu[0] = mbs+1;
260:   juidx = mbs + 1; /* index for ju */
261:   ierr  = PetscMalloc(2*mbs*sizeof(int),&jl); /* linked list for pivot row */
262:   q     = jl + mbs;   /* linked list for col index */
263:   for (i=0; i<mbs; i++){
264:     jl[i] = mbs;
265:     q[i] = 0;
266:   }

268:   /* for each row k */
269:   for (k=0; k<mbs; k++){
270:     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
271:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
272:     q[k] = mbs;
273:     /* initialize nonzero structure of k-th row to row rip[k] of A */
274:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
275:     jmax = ai[rip[k]+1];
276:     for (j=jmin; j<jmax; j++){
277:       vj = rip[aj[j]]; /* col. value */
278:       if(vj > k){
279:         qm = k;
280:         do {
281:           m  = qm; qm = q[m];
282:         } while(qm < vj);
283:         if (qm == vj) {
284:           SETERRQ(1," error: duplicate entry in An");
285:         }
286:         nzk++;
287:         q[m]  = vj;
288:         q[vj] = qm;
289:       } /* if(vj > k) */
290:     } /* for (j=jmin; j<jmax; j++) */

292:     /* modify nonzero structure of k-th row by computing fill-in
293:        for each row i to be merged in */
294:     prow = k;
295:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
296: 
297:     while (prow < k){
298:       /* merge row prow into k-th row */
299:       jmin = iu[prow] + 1; jmax = iu[prow+1];
300:       qm = k;
301:       for (j=jmin; j<jmax; j++){
302:         vj = ju[j];
303:         do {
304:           m = qm; qm = q[m];
305:         } while (qm < vj);
306:         if (qm != vj){
307:          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
308:         }
309:       }
310:       prow = jl[prow]; /* next pivot row */
311:     }
312: 
313:     /* add k to row list for first nonzero element in k-th row */
314:     if (nzk > 0){
315:       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
316:       jl[k] = jl[i]; jl[i] = k;
317:     }
318:     iu[k+1] = iu[k] + nzk;

320:     /* allocate more space to ju if needed */
321:     if (iu[k+1] > umax) {
322:       /* estimate how much additional space we will need */
323:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
324:       /* just double the memory each time */
325:       maxadd = umax;
326:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
327:       umax += maxadd;

329:       /* allocate a longer ju */
330:       PetscMalloc(umax*sizeof(int),&jutmp);
331:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
332:       PetscFree(ju);
333:       ju   = jutmp;
334:       realloc++; /* count how many times we realloc */
335:     }

337:     /* save nonzero structure of k-th row in ju */
338:     i=k;
339:     while (nzk --) {
340:       i           = q[i];
341:       ju[juidx++] = i;
342:     }
343:   }

345:   if (ai[mbs] != 0) {
346:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
347:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
348:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use n",af);
349:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);n",af);
350:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.n");
351:   } else {
352:      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.n");
353:   }

355:   ISRestoreIndices(perm,&rip);
356:   /* PetscFree(q); */
357:   PetscFree(jl);

359:   /* put together the new matrix */
360:   MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
361:   /* PetscLogObjectParent(*B,iperm); */
362:   b = (Mat_SeqSBAIJ*)(*B)->data;
363:   PetscFree(b->imax);
364:   b->singlemalloc = PETSC_FALSE;
365:   /* the next line frees the default space generated by the Create() */
366:   PetscFree(b->a);
367:   PetscFree(b->ilen);
368:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
369:   b->j    = ju;
370:   b->i    = iu;
371:   b->diag = 0;
372:   b->ilen = 0;
373:   b->imax = 0;
374:   b->row  = perm;
375:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */
376:   ierr    = PetscObjectReference((PetscObject)perm);
377:   b->icol = perm;
378:   ierr    = PetscObjectReference((PetscObject)perm);
379:   ierr    = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
380:   /* In b structure:  Free imax, ilen, old a, old j.  
381:      Allocate idnew, solve_work, new a, new j */
382:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
383:   b->s_maxnz = b->s_nz = iu[mbs];
384: 
385:   (*B)->factor                 = FACTOR_CHOLESKY;
386:   (*B)->info.factor_mallocs    = realloc;
387:   (*B)->info.fill_ratio_given  = f;
388:   if (ai[mbs] != 0) {
389:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
390:   } else {
391:     (*B)->info.fill_ratio_needed = 0.0;
392:   }

394:   if (perm_identity){
395:     switch (bs) {
396:       case 1:
397:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
398:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
399:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1n");
400:         break;
401:       case 2:
402:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
403:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
404:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2n");
405:         break;
406:       case 3:
407:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
408:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
409:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3n");
410:         break;
411:       case 4:
412:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
413:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
414:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4n");
415:         break;
416:       case 5:
417:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
418:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
419:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5n");
420:         break;
421:       case 6:
422:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
423:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
424:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6n");
425:         break;
426:       case 7:
427:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
428:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
429:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7n");
430:       break;
431:       default:
432:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
433:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
434:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7n");
435:       break;
436:     }
437:   }

439:   return(0);
440: }


443: #undef __FUNCT__  
445: int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
446: {
447:   Mat                C = *B;
448:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
449:   IS                 perm = b->row;
450:   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
451:   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
452:   int                bs=a->bs,bs2 = a->bs2;
453:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
454:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
455:   MatScalar          *work;
456:   int                *pivots;


460:   /* initialization */
461:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
462:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
463:   PetscMalloc(2*mbs*sizeof(int),&il);
464:   jl   = il + mbs;
465:   for (i=0; i<mbs; i++) {
466:     jl[i] = mbs; il[0] = 0;
467:   }
468:   PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
469:   uik  = dk + bs2;
470:   work = uik + bs2;
471:   PetscMalloc(bs*sizeof(int),&pivots);
472: 
473:   ierr  = ISGetIndices(perm,&perm_ptr);
474: 
475:   /* check permutation */
476:   if (!a->permute){
477:     ai = a->i; aj = a->j; aa = a->a;
478:   } else {
479:     ai   = a->inew; aj = a->jnew;
480:     PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
481:     PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
482:     PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
483:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));

485:     for (i=0; i<mbs; i++){
486:       jmin = ai[i]; jmax = ai[i+1];
487:       for (j=jmin; j<jmax; j++){
488:         while (a2anew[j] != j){
489:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
490:           for (k1=0; k1<bs2; k1++){
491:             dk[k1]       = aa[k*bs2+k1];
492:             aa[k*bs2+k1] = aa[j*bs2+k1];
493:             aa[j*bs2+k1] = dk[k1];
494:           }
495:         }
496:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
497:         if (i > aj[j]){
498:           /* printf("change orientation, row: %d, col: %dn",i,aj[j]); */
499:           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
500:           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
501:           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
502:             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
503:           }
504:         }
505:       }
506:     }
507:     PetscFree(a2anew);
508:   }
509: 
510:   /* for each row k */
511:   for (k = 0; k<mbs; k++){

513:     /*initialize k-th row with elements nonzero in row perm(k) of A */
514:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
515: 
516:     ap = aa + jmin*bs2;
517:     for (j = jmin; j < jmax; j++){
518:       vj = perm_ptr[aj[j]];         /* block col. index */
519:       rtmp_ptr = rtmp + vj*bs2;
520:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
521:     }

523:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
524:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
525:     i = jl[k]; /* first row to be added to k_th row  */

527:     while (i < k){
528:       nexti = jl[i]; /* next row to be added to k_th row */

530:       /* compute multiplier */
531:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

533:       /* uik = -inv(Di)*U_bar(i,k) */
534:       diag = ba + i*bs2;
535:       u    = ba + ili*bs2;
536:       PetscMemzero(uik,bs2*sizeof(MatScalar));
537:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
538: 
539:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
540:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
541: 
542:       /* update -U(i,k) */
543:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

545:       /* add multiple of row i to k-th row ... */
546:       jmin = ili + 1; jmax = bi[i+1];
547:       if (jmin < jmax){
548:         for (j=jmin; j<jmax; j++) {
549:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
550:           rtmp_ptr = rtmp + bj[j]*bs2;
551:           u = ba + j*bs2;
552:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
553:         }
554: 
555:         /* ... add i to row list for next nonzero entry */
556:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
557:         j     = bj[jmin];
558:         jl[i] = jl[j]; jl[j] = i; /* update jl */
559:       }
560:       i = nexti;
561:     }

563:     /* save nonzero entries in k-th row of U ... */

565:     /* invert diagonal block */
566:     diag = ba+k*bs2;
567:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
568:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
569: 
570:     jmin = bi[k]; jmax = bi[k+1];
571:     if (jmin < jmax) {
572:       for (j=jmin; j<jmax; j++){
573:          vj = bj[j];           /* block col. index of U */
574:          u   = ba + j*bs2;
575:          rtmp_ptr = rtmp + vj*bs2;
576:          for (k1=0; k1<bs2; k1++){
577:            *u++        = *rtmp_ptr;
578:            *rtmp_ptr++ = 0.0;
579:          }
580:       }
581: 
582:       /* ... add k to row list for first nonzero entry in k-th row */
583:       il[k] = jmin;
584:       i     = bj[jmin];
585:       jl[k] = jl[i]; jl[i] = k;
586:     }
587:   }

589:   PetscFree(rtmp);
590:   PetscFree(il);
591:   PetscFree(dk);
592:   PetscFree(pivots);
593:   if (a->permute){
594:     PetscFree(aa);
595:   }

597:   ISRestoreIndices(perm,&perm_ptr);
598:   C->factor       = FACTOR_CHOLESKY;
599:   C->assembled    = PETSC_TRUE;
600:   C->preallocated = PETSC_TRUE;
601:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
602:   return(0);
603: }

605: #undef __FUNCT__  
607: int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
608: {
609:   Mat                C = *B;
610:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
611:   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
612:   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
613:   int                bs=a->bs,bs2 = a->bs2;
614:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
615:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
616:   MatScalar          *work;
617:   int                *pivots;


621:   /* initialization */
622: 
623:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
624:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
625:   PetscMalloc(2*mbs*sizeof(int),&il);
626:   jl   = il + mbs;
627:   for (i=0; i<mbs; i++) {
628:     jl[i] = mbs; il[0] = 0;
629:   }
630:   PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
631:   uik  = dk + bs2;
632:   work = uik + bs2;
633:   PetscMalloc(bs*sizeof(int),&pivots);
634: 
635:   ai = a->i; aj = a->j; aa = a->a;
636: 
637:   /* for each row k */
638:   for (k = 0; k<mbs; k++){

640:     /*initialize k-th row with elements nonzero in row k of A */
641:     jmin = ai[k]; jmax = ai[k+1];
642:     ap = aa + jmin*bs2;
643:     for (j = jmin; j < jmax; j++){
644:       vj = aj[j];         /* block col. index */
645:       rtmp_ptr = rtmp + vj*bs2;
646:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
647:     }

649:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
650:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
651:     i = jl[k]; /* first row to be added to k_th row  */

653:     while (i < k){
654:       nexti = jl[i]; /* next row to be added to k_th row */

656:       /* compute multiplier */
657:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

659:       /* uik = -inv(Di)*U_bar(i,k) */
660:       diag = ba + i*bs2;
661:       u    = ba + ili*bs2;
662:       PetscMemzero(uik,bs2*sizeof(MatScalar));
663:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
664: 
665:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
666:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
667: 
668:       /* update -U(i,k) */
669:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

671:       /* add multiple of row i to k-th row ... */
672:       jmin = ili + 1; jmax = bi[i+1];
673:       if (jmin < jmax){
674:         for (j=jmin; j<jmax; j++) {
675:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
676:           rtmp_ptr = rtmp + bj[j]*bs2;
677:           u = ba + j*bs2;
678:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
679:         }
680: 
681:         /* ... add i to row list for next nonzero entry */
682:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
683:         j     = bj[jmin];
684:         jl[i] = jl[j]; jl[j] = i; /* update jl */
685:       }
686:       i = nexti;
687:     }

689:     /* save nonzero entries in k-th row of U ... */

691:     /* invert diagonal block */
692:     diag = ba+k*bs2;
693:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
694:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
695: 
696:     jmin = bi[k]; jmax = bi[k+1];
697:     if (jmin < jmax) {
698:       for (j=jmin; j<jmax; j++){
699:          vj = bj[j];           /* block col. index of U */
700:          u   = ba + j*bs2;
701:          rtmp_ptr = rtmp + vj*bs2;
702:          for (k1=0; k1<bs2; k1++){
703:            *u++        = *rtmp_ptr;
704:            *rtmp_ptr++ = 0.0;
705:          }
706:       }
707: 
708:       /* ... add k to row list for first nonzero entry in k-th row */
709:       il[k] = jmin;
710:       i     = bj[jmin];
711:       jl[k] = jl[i]; jl[i] = k;
712:     }
713:   }

715:   PetscFree(rtmp);
716:   PetscFree(il);
717:   PetscFree(dk);
718:   PetscFree(pivots);

720:   C->factor    = FACTOR_CHOLESKY;
721:   C->assembled = PETSC_TRUE;
722:   C->preallocated = PETSC_TRUE;
723:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
724:   return(0);
725: }

727: /*
728:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
729:     Version for blocks 2 by 2.
730: */
731: #undef __FUNCT__  
733: int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
734: {
735:   Mat                C = *B;
736:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
737:   IS                 perm = b->row;
738:   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
739:   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
740:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
741:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;

744: 
745:   /* initialization */
746:   /* il and jl record the first nonzero element in each row of the accessing 
747:      window U(0:k, k:mbs-1).
748:      jl:    list of rows to be added to uneliminated rows 
749:             i>= k: jl(i) is the first row to be added to row i
750:             i<  k: jl(i) is the row following row i in some list of rows
751:             jl(i) = mbs indicates the end of a list                        
752:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
753:             row i of U */
754:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
755:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
756:   PetscMalloc(2*mbs*sizeof(int),&il);
757:   jl   = il + mbs;
758:   for (i=0; i<mbs; i++) {
759:     jl[i] = mbs; il[0] = 0;
760:   }
761:   PetscMalloc(8*sizeof(MatScalar),&dk);
762:   uik  = dk + 4;
763:   ISGetIndices(perm,&perm_ptr);

765:   /* check permutation */
766:   if (!a->permute){
767:     ai = a->i; aj = a->j; aa = a->a;
768:   } else {
769:     ai   = a->inew; aj = a->jnew;
770:     PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
771:     PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
772:     PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
773:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));

775:     for (i=0; i<mbs; i++){
776:       jmin = ai[i]; jmax = ai[i+1];
777:       for (j=jmin; j<jmax; j++){
778:         while (a2anew[j] != j){
779:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
780:           for (k1=0; k1<4; k1++){
781:             dk[k1]       = aa[k*4+k1];
782:             aa[k*4+k1] = aa[j*4+k1];
783:             aa[j*4+k1] = dk[k1];
784:           }
785:         }
786:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
787:         if (i > aj[j]){
788:           /* printf("change orientation, row: %d, col: %dn",i,aj[j]); */
789:           ap = aa + j*4;     /* ptr to the beginning of the block */
790:           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
791:           ap[1] = ap[2];
792:           ap[2] = dk[1];
793:         }
794:       }
795:     }
796:     PetscFree(a2anew);
797:   }

799:   /* for each row k */
800:   for (k = 0; k<mbs; k++){

802:     /*initialize k-th row with elements nonzero in row perm(k) of A */
803:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
804:     ap = aa + jmin*4;
805:     for (j = jmin; j < jmax; j++){
806:       vj = perm_ptr[aj[j]];         /* block col. index */
807:       rtmp_ptr = rtmp + vj*4;
808:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
809:     }

811:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
812:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
813:     i = jl[k]; /* first row to be added to k_th row  */

815:     while (i < k){
816:       nexti = jl[i]; /* next row to be added to k_th row */

818:       /* compute multiplier */
819:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

821:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
822:       diag = ba + i*4;
823:       u    = ba + ili*4;
824:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
825:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
826:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
827:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
828: 
829:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
830:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
831:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
832:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
833:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

835:       /* update -U(i,k): ba[ili] = uik */
836:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

838:       /* add multiple of row i to k-th row ... */
839:       jmin = ili + 1; jmax = bi[i+1];
840:       if (jmin < jmax){
841:         for (j=jmin; j<jmax; j++) {
842:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
843:           rtmp_ptr = rtmp + bj[j]*4;
844:           u = ba + j*4;
845:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
846:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
847:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
848:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
849:         }
850: 
851:         /* ... add i to row list for next nonzero entry */
852:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
853:         j     = bj[jmin];
854:         jl[i] = jl[j]; jl[j] = i; /* update jl */
855:       }
856:       i = nexti;
857:     }

859:     /* save nonzero entries in k-th row of U ... */

861:     /* invert diagonal block */
862:     diag = ba+k*4;
863:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
864:     Kernel_A_gets_inverse_A_2(diag);
865: 
866:     jmin = bi[k]; jmax = bi[k+1];
867:     if (jmin < jmax) {
868:       for (j=jmin; j<jmax; j++){
869:          vj = bj[j];           /* block col. index of U */
870:          u   = ba + j*4;
871:          rtmp_ptr = rtmp + vj*4;
872:          for (k1=0; k1<4; k1++){
873:            *u++        = *rtmp_ptr;
874:            *rtmp_ptr++ = 0.0;
875:          }
876:       }
877: 
878:       /* ... add k to row list for first nonzero entry in k-th row */
879:       il[k] = jmin;
880:       i     = bj[jmin];
881:       jl[k] = jl[i]; jl[i] = k;
882:     }
883:   }

885:   PetscFree(rtmp);
886:   PetscFree(il);
887:   PetscFree(dk);
888:   if (a->permute) {
889:     PetscFree(aa);
890:   }
891:   ISRestoreIndices(perm,&perm_ptr);
892:   C->factor    = FACTOR_CHOLESKY;
893:   C->assembled = PETSC_TRUE;
894:   C->preallocated = PETSC_TRUE;
895:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
896:   return(0);
897: }

899: /*
900:       Version for when blocks are 2 by 2 Using natural ordering
901: */
902: #undef __FUNCT__  
904: int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
905: {
906:   Mat                C = *B;
907:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
908:   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
909:   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
910:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
911:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;

914: 
915:   /* initialization */
916:   /* il and jl record the first nonzero element in each row of the accessing 
917:      window U(0:k, k:mbs-1).
918:      jl:    list of rows to be added to uneliminated rows 
919:             i>= k: jl(i) is the first row to be added to row i
920:             i<  k: jl(i) is the row following row i in some list of rows
921:             jl(i) = mbs indicates the end of a list                        
922:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
923:             row i of U */
924:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
925:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
926:   PetscMalloc(2*mbs*sizeof(int),&il);
927:   jl   = il + mbs;
928:   for (i=0; i<mbs; i++) {
929:     jl[i] = mbs; il[0] = 0;
930:   }
931:   PetscMalloc(8*sizeof(MatScalar),&dk);
932:   uik  = dk + 4;
933: 
934:   ai = a->i; aj = a->j; aa = a->a;

936:   /* for each row k */
937:   for (k = 0; k<mbs; k++){

939:     /*initialize k-th row with elements nonzero in row k of A */
940:     jmin = ai[k]; jmax = ai[k+1];
941:     ap = aa + jmin*4;
942:     for (j = jmin; j < jmax; j++){
943:       vj = aj[j];         /* block col. index */
944:       rtmp_ptr = rtmp + vj*4;
945:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
946:     }
947: 
948:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
949:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
950:     i = jl[k]; /* first row to be added to k_th row  */

952:     while (i < k){
953:       nexti = jl[i]; /* next row to be added to k_th row */

955:       /* compute multiplier */
956:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

958:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
959:       diag = ba + i*4;
960:       u    = ba + ili*4;
961:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
962:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
963:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
964:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
965: 
966:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
967:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
968:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
969:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
970:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

972:       /* update -U(i,k): ba[ili] = uik */
973:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

975:       /* add multiple of row i to k-th row ... */
976:       jmin = ili + 1; jmax = bi[i+1];
977:       if (jmin < jmax){
978:         for (j=jmin; j<jmax; j++) {
979:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
980:           rtmp_ptr = rtmp + bj[j]*4;
981:           u = ba + j*4;
982:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
983:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
984:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
985:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
986:         }
987: 
988:         /* ... add i to row list for next nonzero entry */
989:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
990:         j     = bj[jmin];
991:         jl[i] = jl[j]; jl[j] = i; /* update jl */
992:       }
993:       i = nexti;
994:     }

996:     /* save nonzero entries in k-th row of U ... */

998:     /* invert diagonal block */
999:     diag = ba+k*4;
1000:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1001:     Kernel_A_gets_inverse_A_2(diag);
1002: 
1003:     jmin = bi[k]; jmax = bi[k+1];
1004:     if (jmin < jmax) {
1005:       for (j=jmin; j<jmax; j++){
1006:          vj = bj[j];           /* block col. index of U */
1007:          u   = ba + j*4;
1008:          rtmp_ptr = rtmp + vj*4;
1009:          for (k1=0; k1<4; k1++){
1010:            *u++        = *rtmp_ptr;
1011:            *rtmp_ptr++ = 0.0;
1012:          }
1013:       }
1014: 
1015:       /* ... add k to row list for first nonzero entry in k-th row */
1016:       il[k] = jmin;
1017:       i     = bj[jmin];
1018:       jl[k] = jl[i]; jl[i] = k;
1019:     }
1020:   }

1022:   PetscFree(rtmp);
1023:   PetscFree(il);
1024:   PetscFree(dk);

1026:   C->factor    = FACTOR_CHOLESKY;
1027:   C->assembled = PETSC_TRUE;
1028:   C->preallocated = PETSC_TRUE;
1029:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1030:   return(0);
1031: }

1033: /*
1034:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1035:     Version for blocks are 1 by 1.
1036: */
1037: #undef __FUNCT__  
1039: int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1040: {
1041:   Mat                C = *B;
1042:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1043:   IS                 ip = b->row;
1044:   int                *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1045:   int                *ai,*aj,*r;
1046:   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
1047:   MatScalar          *rtmp;
1048:   MatScalar          *ba = b->a,*aa,ak;
1049:   MatScalar          dk,uikdi;

1052:   ierr  = ISGetIndices(ip,&rip);
1053:   if (!a->permute){
1054:     ai = a->i; aj = a->j; aa = a->a;
1055:   } else {
1056:     ai = a->inew; aj = a->jnew;
1057:     PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);
1058:     PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));
1059:     PetscMalloc(ai[mbs]*sizeof(int),&r);
1060:     ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));

1062:     jmin = ai[0]; jmax = ai[mbs];
1063:     for (j=jmin; j<jmax; j++){
1064:       while (r[j] != j){
1065:         k = r[j]; r[j] = r[k]; r[k] = k;
1066:         ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
1067:       }
1068:     }
1069:     PetscFree(r);
1070:   }
1071: 
1072:   /* initialization */
1073:   /* il and jl record the first nonzero element in each row of the accessing 
1074:      window U(0:k, k:mbs-1).
1075:      jl:    list of rows to be added to uneliminated rows 
1076:             i>= k: jl(i) is the first row to be added to row i
1077:             i<  k: jl(i) is the row following row i in some list of rows
1078:             jl(i) = mbs indicates the end of a list                        
1079:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1080:             row i of U */
1081:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1082:   PetscMalloc(2*mbs*sizeof(int),&il);
1083:   jl   = il + mbs;
1084:   for (i=0; i<mbs; i++) {
1085:     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1086:   }

1088:   /* for each row k */
1089:   for (k = 0; k<mbs; k++){

1091:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1092:     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1093: 
1094:     for (j = jmin; j < jmax; j++){
1095:       vj = rip[aj[j]];
1096:       rtmp[vj] = aa[j];
1097:     }

1099:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1100:     dk = rtmp[k];
1101:     i = jl[k]; /* first row to be added to k_th row  */

1103:     while (i < k){
1104:       nexti = jl[i]; /* next row to be added to k_th row */

1106:       /* compute multiplier, update D(k) and U(i,k) */
1107:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1108:       uikdi = - ba[ili]*ba[i];
1109:       dk += uikdi*ba[ili];
1110:       ba[ili] = uikdi; /* -U(i,k) */

1112:       /* add multiple of row i to k-th row ... */
1113:       jmin = ili + 1; jmax = bi[i+1];
1114:       if (jmin < jmax){
1115:         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1116:         /* ... add i to row list for next nonzero entry */
1117:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1118:         j     = bj[jmin];
1119:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1120:       }
1121:       i = nexti;
1122:     }

1124:     /* check for zero pivot and save diagoanl element */
1125:     if (dk == 0.0){
1126:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1127:       /*
1128:     } else if (PetscRealPart(dk) < 0.0){
1129:       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %gn",k,dk);  
1130:       */
1131:     }

1133:     /* save nonzero entries in k-th row of U ... */
1134:     ba[k] = 1.0/dk;
1135:     jmin = bi[k]; jmax = bi[k+1];
1136:     if (jmin < jmax) {
1137:       for (j=jmin; j<jmax; j++){
1138:          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1139:       }
1140:       /* ... add k to row list for first nonzero entry in k-th row */
1141:       il[k] = jmin;
1142:       i     = bj[jmin];
1143:       jl[k] = jl[i]; jl[i] = k;
1144:     }
1145:   }
1146: 
1147:   PetscFree(rtmp);
1148:   PetscFree(il);
1149:   if (a->permute){
1150:     PetscFree(aa);
1151:   }

1153:   ISRestoreIndices(ip,&rip);
1154:   C->factor    = FACTOR_CHOLESKY;
1155:   C->assembled = PETSC_TRUE;
1156:   C->preallocated = PETSC_TRUE;
1157:   PetscLogFlops(b->mbs);
1158:   return(0);
1159: }

1161: /*
1162:   Version for when blocks are 1 by 1 Using natural ordering
1163: */
1164: #undef __FUNCT__  
1166: int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1167: {
1168:   Mat                C = *B;
1169:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1170:   int                ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1171:   int                *ai,*aj;
1172:   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
1173:   MatScalar          *rtmp,*ba = b->a,*aa,dk,uikdi;

1176: 
1177:   /* initialization */
1178:   /* il and jl record the first nonzero element in each row of the accessing 
1179:      window U(0:k, k:mbs-1).
1180:      jl:    list of rows to be added to uneliminated rows 
1181:             i>= k: jl(i) is the first row to be added to row i
1182:             i<  k: jl(i) is the row following row i in some list of rows
1183:             jl(i) = mbs indicates the end of a list                        
1184:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1185:             row i of U */
1186:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1187:   PetscMalloc(2*mbs*sizeof(int),&il);
1188:   jl   = il + mbs;
1189:   for (i=0; i<mbs; i++) {
1190:     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1191:   }

1193:   ai = a->i; aj = a->j; aa = a->a;

1195:   /* for each row k */
1196:   for (k = 0; k<mbs; k++){

1198:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1199:     jmin = ai[k]; jmax = ai[k+1];
1200: 
1201:     for (j = jmin; j < jmax; j++){
1202:       vj = aj[j];
1203:       rtmp[vj] = aa[j];
1204:     }
1205: 
1206:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1207:     dk = rtmp[k];
1208:     i = jl[k]; /* first row to be added to k_th row  */

1210:     while (i < k){
1211:       nexti = jl[i]; /* next row to be added to k_th row */

1213:       /* compute multiplier, update D(k) and U(i,k) */
1214:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1215:       uikdi = - ba[ili]*ba[i];
1216:       dk += uikdi*ba[ili];
1217:       ba[ili] = uikdi; /* -U(i,k) */

1219:       /* add multiple of row i to k-th row ... */
1220:       jmin = ili + 1; jmax = bi[i+1];
1221:       if (jmin < jmax){
1222:         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1223:         /* ... add i to row list for next nonzero entry */
1224:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1225:         j     = bj[jmin];
1226:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1227:       }
1228:       i = nexti;
1229:     }

1231:     /* check for zero pivot and save diagoanl element */
1232:     if (dk == 0.0){
1233:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1234:       /*
1235:     } else if (PetscRealPart(dk) < 0){
1236:       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %gn",k,dk);  
1237:       */
1238:     }

1240:     /* save nonzero entries in k-th row of U ... */
1241:     ba[k] = 1.0/dk;
1242:     jmin = bi[k]; jmax = bi[k+1];
1243:     if (jmin < jmax) {
1244:       for (j=jmin; j<jmax; j++){
1245:          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1246:       }
1247:       /* ... add k to row list for first nonzero entry in k-th row */
1248:       il[k] = jmin;
1249:       i     = bj[jmin];
1250:       jl[k] = jl[i]; jl[i] = k;
1251:     }
1252:   }
1253: 
1254:   PetscFree(rtmp);
1255:   PetscFree(il);
1256: 
1257:   C->factor    = FACTOR_CHOLESKY;
1258:   C->assembled = PETSC_TRUE;
1259:   C->preallocated = PETSC_TRUE;
1260:   PetscLogFlops(b->mbs);
1261:   return(0);
1262: }

1264: #undef __FUNCT__  
1266: int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Mat *B)
1267: {
1268:   Mat                C = *B;
1269:   Mat_SeqSBAIJ       *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1270:   int                ierr,i,j,mbs = a->mbs;
1271:   int                *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1272:   int                k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1273:   MatScalar          *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;

1276:   /* initialization */
1277:   /* il and jl record the first nonzero element in each row of the accessing 
1278:      window U(0:k, k:mbs-1).
1279:      jl:    list of rows to be added to uneliminated rows 
1280:             i>= k: jl(i) is the first row to be added to row i
1281:             i<  k: jl(i) is the row following row i in some list of rows
1282:             jl(i) = mbs indicates the end of a list                        
1283:      il(i): points to the first nonzero element in U(i,k:mbs-1) 
1284:   */
1285:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1286:   PetscMalloc(2*mbs*sizeof(int),&il);
1287:   jl   = il + mbs;
1288:   for (i=0; i<mbs; i++) {
1289:     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1290:   }

1292:   /* for each row k */
1293:   for (k = 0; k<mbs; k++){

1295:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1296:     nz   = ai[k+1] - ai[k];
1297:     acol = aj + ai[k];
1298:     aval = aa + ai[k];
1299:     bval = ba + bi[k];
1300:     while (nz -- ){
1301:       rtmp[*acol++] = *aval++;
1302:       *bval++       = 0.0; /* for in-place factorization */
1303:     }
1304: 
1305:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1306:     dk = rtmp[k];
1307:     i  = jl[k]; /* first row to be added to k_th row  */

1309:     while (i < k){
1310:       nexti = jl[i]; /* next row to be added to k_th row */
1311:       /* printf("factnum, k %d, i %dn", k,i); */

1313:       /* compute multiplier, update D(k) and U(i,k) */
1314:       ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1315:       uikdi = - ba[ili]*ba[bi[i]];
1316:       dk   += uikdi*ba[ili];
1317:       ba[ili] = uikdi; /* -U(i,k) */

1319:       /* add multiple of row i to k-th row ... */
1320:       jmin = ili + 1;
1321:       nz   = bi[i+1] - jmin;
1322:       if (nz > 0){
1323:         bcol = bj + jmin;
1324:         bval = ba + jmin;
1325:         while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1326:         /* ... add i to row list for next nonzero entry */
1327:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1328:         j     = bj[jmin];
1329:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1330:       }
1331:       i = nexti;
1332:     }

1334:     /* check for zero pivot and save diagoanl element */
1335:     if (PetscRealPart(dk) == 0.0){
1336:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1337:     } else if( PetscRealPart(dk) < 0.0){
1338:       PetscLogInfo((PetscObject)A,"Negative pivot %g in Cholesky factorizationn",1./PetscRealPart(dk));
1339:     }

1341:     /* save nonzero entries in k-th row of U ... */
1342:     ba[bi[k]] = 1.0/dk;
1343:     jmin      = bi[k]+1;
1344:     nz        = bi[k+1] - jmin;
1345:     if (nz){
1346:       bcol = bj + jmin;
1347:       bval = ba + jmin;
1348:       while (nz--){
1349:         *bval++       = rtmp[*bcol];
1350:         rtmp[*bcol++] = 0.0;
1351:       }
1352:       /* ... add k to row list for first nonzero entry in k-th row */
1353:       il[k] = jmin;
1354:       i     = bj[jmin];
1355:       jl[k] = jl[i]; jl[i] = k;
1356:     }
1357:   }
1358: 
1359:   PetscFree(rtmp);
1360:   PetscFree(il);
1361: 
1362:   C->factor       = FACTOR_CHOLESKY;
1363:   C->assembled    = PETSC_TRUE;
1364:   C->preallocated = PETSC_TRUE;
1365:   PetscLogFlops(b->mbs);
1366:   return(0);
1367: }

1369: #undef __FUNCT__  
1371: int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f)
1372: {
1374:   Mat C;

1377:   MatCholeskyFactorSymbolic(A,perm,f,&C);
1378:   MatCholeskyFactorNumeric(A,&C);
1379:   MatHeaderCopy(A,C);
1380:   return(0);
1381: }