Actual source code: sbaijfact.c

  1: /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/mat/impls/sbaij/seq/sbaij.h
 5:  #include src/vec/vecimpl.h
 6:  #include src/inline/ilu.h
 7:  #include include/petscis.h

  9: #if !defined(PETSC_USE_COMPLEX)
 10: /* 
 11:   input:
 12:    F -- numeric factor 
 13:   output:
 14:    nneg, nzero, npos: matrix inertia 
 15: */

 19: int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos)
 20: {
 21:   Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
 22:   PetscScalar  *dd = fact_ptr->a;
 23:   int          m = F->m,i;

 26:   if (nneig){
 27:     *nneig = 0;
 28:     for (i=0; i<m; i++){
 29:       if (PetscRealPart(dd[i]) < 0.0) (*nneig)++;
 30:     }
 31:   }
 32:   if (nzero){
 33:     *nzero = 0;
 34:     for (i=0; i<m; i++){
 35:       if (PetscRealPart(dd[i]) == 0.0) (*nzero)++;
 36:     }
 37:   }
 38:   if (npos){
 39:     *npos = 0;
 40:     for (i=0; i<m; i++){
 41:       if (PetscRealPart(dd[i]) > 0.0) (*npos)++;
 42:     }
 43:   }
 44:   return(0);
 45: }
 46: #endif /* !defined(PETSC_USE_COMPLEX) */

 48: /* Using Modified Sparse Row (MSR) storage.
 49: See page 85, "Iterative Methods ..." by Saad. */
 50: /*
 51:     Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 52: */
 53: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
 56: int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
 57: {
 58:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
 59:   int          *rip,ierr,i,mbs = a->mbs,*ai,*aj;
 60:   int          *jutmp,bs = a->bs,bs2=a->bs2;
 61:   int          m,realloc = 0,prow;
 62:   int          *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
 63:   int          *il,ili,nextprow;
 64:   PetscReal    f = info->fill;
 65:   PetscTruth   perm_identity;

 68:   /* check whether perm is the identity mapping */
 69:   ISIdentity(perm,&perm_identity);

 71:   /* -- inplace factorization, i.e., use sbaij for *B -- */
 72:   if (perm_identity && bs==1 ){
 73:     if (!perm_identity) a->permute = PETSC_TRUE;
 74: 
 75:   ISGetIndices(perm,&rip);
 76: 
 77:   if (perm_identity){ /* without permutation */
 78:     ai = a->i; aj = a->j;
 79:   } else {            /* non-trivial permutation */
 80:     MatReorderingSeqSBAIJ(A,perm);
 81:     ai = a->inew; aj = a->jnew;
 82:   }
 83: 
 84:   /* initialization */
 85:   PetscMalloc((mbs+1)*sizeof(int),&iu);
 86:   umax  = (int)(f*ai[mbs] + 1);
 87:   PetscMalloc(umax*sizeof(int),&ju);
 88:   iu[0] = 0;
 89:   juidx = 0; /* index for ju */
 90:   PetscMalloc((3*mbs+1)*sizeof(int),&jl); /* linked list for getting pivot row */
 91:   q     = jl + mbs;   /* linked list for col index of active row */
 92:   il    = q  + mbs;
 93:   for (i=0; i<mbs; i++){
 94:     jl[i] = mbs;
 95:     q[i]  = 0;
 96:     il[i] = 0;
 97:   }

 99:   /* for each row k */
100:   for (k=0; k<mbs; k++){
101:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
102:     q[k] = mbs;
103:     /* initialize nonzero structure of k-th row to row rip[k] of A */
104:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
105:     jmax = ai[rip[k]+1];
106:     for (j=jmin; j<jmax; j++){
107:       vj = rip[aj[j]]; /* col. value */
108:       if(vj > k){
109:         qm = k;
110:         do {
111:           m  = qm; qm = q[m];
112:         } while(qm < vj);
113:         if (qm == vj) {
114:           SETERRQ(1," error: duplicate entry in A\n");
115:         }
116:         nzk++;
117:         q[m]  = vj;
118:         q[vj] = qm;
119:       } /* if(vj > k) */
120:     } /* for (j=jmin; j<jmax; j++) */

122:     /* modify nonzero structure of k-th row by computing fill-in
123:        for each row i to be merged in */
124:     prow = k;
125:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
126: 
127:     while (prow < k){
128:       nextprow = jl[prow];
129: 
130:       /* merge row prow into k-th row */
131:       ili = il[prow];
132:       jmin = ili + 1;  /* points to 2nd nzero entry in U(prow,k:mbs-1) */
133:       jmax = iu[prow+1];
134:       qm = k;
135:       for (j=jmin; j<jmax; j++){
136:         vj = ju[j];
137:         do {
138:           m = qm; qm = q[m];
139:         } while (qm < vj);
140:         if (qm != vj){  /* a fill */
141:           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
142:         }
143:       } /* end of for (j=jmin; j<jmax; j++) */
144:       if (jmin < jmax){
145:         il[prow] = jmin;
146:         j = ju[jmin];
147:         jl[prow] = jl[j]; jl[j] = prow;  /* update jl */
148:       }
149:       prow = nextprow;
150:     }
151: 
152:     /* update il and jl */
153:     if (nzk > 0){
154:       i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
155:       jl[k] = jl[i]; jl[i] = k;
156:       il[k] = iu[k] + 1;
157:     }
158:     iu[k+1] = iu[k] + nzk + 1;  /* include diag[k] */

160:     /* allocate more space to ju if needed */
161:     if (iu[k+1] > umax) {
162:       /* estimate how much additional space we will need */
163:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
164:       /* just double the memory each time */
165:       maxadd = umax;
166:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
167:       umax += maxadd;

169:       /* allocate a longer ju */
170:       PetscMalloc(umax*sizeof(int),&jutmp);
171:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
172:       PetscFree(ju);
173:       ju   = jutmp;
174:       realloc++; /* count how many times we realloc */
175:     }

177:     /* save nonzero structure of k-th row in ju */
178:     ju[juidx++] = k; /* diag[k] */
179:     i = k;
180:     while (nzk --) {
181:       i           = q[i];
182:       ju[juidx++] = i;
183:     }
184:   }

186:   if (ai[mbs] != 0) {
187:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
188:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
189:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
190:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
191:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
192:   } else {
193:      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
194:   }

196:   ISRestoreIndices(perm,&rip);
197:   /* PetscFree(q); */
198:   PetscFree(jl);

200:   /* put together the new matrix */
201:   MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
202:   /* PetscLogObjectParent(*B,iperm); */
203:   b = (Mat_SeqSBAIJ*)(*B)->data;
204:   PetscFree(b->imax);
205:   b->singlemalloc = PETSC_FALSE;
206:   /* the next line frees the default space generated by the Create() */
207:   PetscFree(b->a);
208:   PetscFree(b->ilen);
209:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
210:   b->j    = ju;
211:   b->i    = iu;
212:   b->diag = 0;
213:   b->ilen = 0;
214:   b->imax = 0;
215:   b->row  = perm;
216:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
217:   PetscObjectReference((PetscObject)perm);
218:   b->icol = perm;
219:   PetscObjectReference((PetscObject)perm);
220:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
221:   /* In b structure:  Free imax, ilen, old a, old j.  
222:      Allocate idnew, solve_work, new a, new j */
223:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
224:   b->s_maxnz = b->s_nz = iu[mbs];
225: 
226:   (*B)->factor                 = FACTOR_CHOLESKY;
227:   (*B)->info.factor_mallocs    = realloc;
228:   (*B)->info.fill_ratio_given  = f;
229:   if (ai[mbs] != 0) {
230:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
231:   } else {
232:     (*B)->info.fill_ratio_needed = 0.0;
233:   }


236:   (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
237:   (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
238:   PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
239: 
240:   return(0);
241:   }
242:   /* -----------  end of new code --------------------*/


245:   if (!perm_identity) a->permute = PETSC_TRUE;
246: 
247:   ISGetIndices(perm,&rip);
248: 
249:   if (perm_identity){ /* without permutation */
250:     ai = a->i; aj = a->j;
251:   } else {            /* non-trivial permutation */
252:     MatReorderingSeqSBAIJ(A,perm);
253:     ai = a->inew; aj = a->jnew;
254:   }
255: 
256:   /* initialization */
257:   PetscMalloc((mbs+1)*sizeof(int),&iu);
258:   umax  = (int)(f*ai[mbs] + 1); umax += mbs + 1;
259:   PetscMalloc(umax*sizeof(int),&ju);
260:   iu[0] = mbs+1;
261:   juidx = mbs + 1; /* index for ju */
262:   PetscMalloc(2*mbs*sizeof(int),&jl); /* linked list for pivot row */
263:   q     = jl + mbs;   /* linked list for col index */
264:   for (i=0; i<mbs; i++){
265:     jl[i] = mbs;
266:     q[i] = 0;
267:   }

269:   /* for each row k */
270:   for (k=0; k<mbs; k++){
271:     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
272:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
273:     q[k] = mbs;
274:     /* initialize nonzero structure of k-th row to row rip[k] of A */
275:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
276:     jmax = ai[rip[k]+1];
277:     for (j=jmin; j<jmax; j++){
278:       vj = rip[aj[j]]; /* col. value */
279:       if(vj > k){
280:         qm = k;
281:         do {
282:           m  = qm; qm = q[m];
283:         } while(qm < vj);
284:         if (qm == vj) {
285:           SETERRQ(1," error: duplicate entry in A\n");
286:         }
287:         nzk++;
288:         q[m]  = vj;
289:         q[vj] = qm;
290:       } /* if(vj > k) */
291:     } /* for (j=jmin; j<jmax; j++) */

293:     /* modify nonzero structure of k-th row by computing fill-in
294:        for each row i to be merged in */
295:     prow = k;
296:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
297: 
298:     while (prow < k){
299:       /* merge row prow into k-th row */
300:       jmin = iu[prow] + 1; jmax = iu[prow+1];
301:       qm = k;
302:       for (j=jmin; j<jmax; j++){
303:         vj = ju[j];
304:         do {
305:           m = qm; qm = q[m];
306:         } while (qm < vj);
307:         if (qm != vj){
308:          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
309:         }
310:       }
311:       prow = jl[prow]; /* next pivot row */
312:     }
313: 
314:     /* add k to row list for first nonzero element in k-th row */
315:     if (nzk > 0){
316:       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
317:       jl[k] = jl[i]; jl[i] = k;
318:     }
319:     iu[k+1] = iu[k] + nzk;

321:     /* allocate more space to ju if needed */
322:     if (iu[k+1] > umax) {
323:       /* estimate how much additional space we will need */
324:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
325:       /* just double the memory each time */
326:       maxadd = umax;
327:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
328:       umax += maxadd;

330:       /* allocate a longer ju */
331:       PetscMalloc(umax*sizeof(int),&jutmp);
332:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
333:       PetscFree(ju);
334:       ju   = jutmp;
335:       realloc++; /* count how many times we realloc */
336:     }

338:     /* save nonzero structure of k-th row in ju */
339:     i=k;
340:     while (nzk --) {
341:       i           = q[i];
342:       ju[juidx++] = i;
343:     }
344:   }

346:   if (ai[mbs] != 0) {
347:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
348:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
349:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
350:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
351:     PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
352:   } else {
353:      PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
354:   }

356:   ISRestoreIndices(perm,&rip);
357:   /* PetscFree(q); */
358:   PetscFree(jl);

360:   /* put together the new matrix */
361:   MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
362:   /* PetscLogObjectParent(*B,iperm); */
363:   b = (Mat_SeqSBAIJ*)(*B)->data;
364:   PetscFree(b->imax);
365:   b->singlemalloc = PETSC_FALSE;
366:   /* the next line frees the default space generated by the Create() */
367:   PetscFree(b->a);
368:   PetscFree(b->ilen);
369:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
370:   b->j    = ju;
371:   b->i    = iu;
372:   b->diag = 0;
373:   b->ilen = 0;
374:   b->imax = 0;
375:   b->row  = perm;
376:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
377:   PetscObjectReference((PetscObject)perm);
378:   b->icol = perm;
379:   PetscObjectReference((PetscObject)perm);
380:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
381:   /* In b structure:  Free imax, ilen, old a, old j.  
382:      Allocate idnew, solve_work, new a, new j */
383:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
384:   b->s_maxnz = b->s_nz = iu[mbs];
385: 
386:   (*B)->factor                 = FACTOR_CHOLESKY;
387:   (*B)->info.factor_mallocs    = realloc;
388:   (*B)->info.fill_ratio_given  = f;
389:   if (ai[mbs] != 0) {
390:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
391:   } else {
392:     (*B)->info.fill_ratio_needed = 0.0;
393:   }

395:   if (perm_identity){
396:     switch (bs) {
397:       case 1:
398:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
399:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
400:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
401:         break;
402:       case 2:
403:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
404:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_2_NaturalOrdering;
405:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
406:         break;
407:       case 3:
408:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
409:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_3_NaturalOrdering;
410:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
411:         break;
412:       case 4:
413:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
414:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_4_NaturalOrdering;
415:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
416:         break;
417:       case 5:
418:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
419:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_5_NaturalOrdering;
420:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
421:         break;
422:       case 6:
423:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
424:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_6_NaturalOrdering;
425:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
426:         break;
427:       case 7:
428:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
429:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_7_NaturalOrdering;
430:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
431:       break;
432:       default:
433:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
434:         (*B)->ops->solve           = MatSolve_SeqSBAIJ_N_NaturalOrdering;
435:         PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
436:       break;
437:     }
438:   }

440:   return(0);
441: }


446: int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
447: {
448:   Mat                C = *B;
449:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
450:   IS                 perm = b->row;
451:   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
452:   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
453:   int                bs=a->bs,bs2 = a->bs2;
454:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
455:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
456:   MatScalar          *work;
457:   int                *pivots;


461:   /* initialization */
462:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
463:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
464:   PetscMalloc(2*mbs*sizeof(int),&il);
465:   jl   = il + mbs;
466:   for (i=0; i<mbs; i++) {
467:     jl[i] = mbs; il[0] = 0;
468:   }
469:   PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
470:   uik  = dk + bs2;
471:   work = uik + bs2;
472:   PetscMalloc(bs*sizeof(int),&pivots);
473: 
474:   ISGetIndices(perm,&perm_ptr);
475: 
476:   /* check permutation */
477:   if (!a->permute){
478:     ai = a->i; aj = a->j; aa = a->a;
479:   } else {
480:     ai   = a->inew; aj = a->jnew;
481:     PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
482:     PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
483:     PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
484:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));

486:     for (i=0; i<mbs; i++){
487:       jmin = ai[i]; jmax = ai[i+1];
488:       for (j=jmin; j<jmax; j++){
489:         while (a2anew[j] != j){
490:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
491:           for (k1=0; k1<bs2; k1++){
492:             dk[k1]       = aa[k*bs2+k1];
493:             aa[k*bs2+k1] = aa[j*bs2+k1];
494:             aa[j*bs2+k1] = dk[k1];
495:           }
496:         }
497:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
498:         if (i > aj[j]){
499:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
500:           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
501:           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
502:           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
503:             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
504:           }
505:         }
506:       }
507:     }
508:     PetscFree(a2anew);
509:   }
510: 
511:   /* for each row k */
512:   for (k = 0; k<mbs; k++){

514:     /*initialize k-th row with elements nonzero in row perm(k) of A */
515:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
516: 
517:     ap = aa + jmin*bs2;
518:     for (j = jmin; j < jmax; j++){
519:       vj = perm_ptr[aj[j]];         /* block col. index */
520:       rtmp_ptr = rtmp + vj*bs2;
521:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
522:     }

524:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
525:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
526:     i = jl[k]; /* first row to be added to k_th row  */

528:     while (i < k){
529:       nexti = jl[i]; /* next row to be added to k_th row */

531:       /* compute multiplier */
532:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

534:       /* uik = -inv(Di)*U_bar(i,k) */
535:       diag = ba + i*bs2;
536:       u    = ba + ili*bs2;
537:       PetscMemzero(uik,bs2*sizeof(MatScalar));
538:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
539: 
540:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
541:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
542: 
543:       /* update -U(i,k) */
544:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

546:       /* add multiple of row i to k-th row ... */
547:       jmin = ili + 1; jmax = bi[i+1];
548:       if (jmin < jmax){
549:         for (j=jmin; j<jmax; j++) {
550:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
551:           rtmp_ptr = rtmp + bj[j]*bs2;
552:           u = ba + j*bs2;
553:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
554:         }
555: 
556:         /* ... add i to row list for next nonzero entry */
557:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
558:         j     = bj[jmin];
559:         jl[i] = jl[j]; jl[j] = i; /* update jl */
560:       }
561:       i = nexti;
562:     }

564:     /* save nonzero entries in k-th row of U ... */

566:     /* invert diagonal block */
567:     diag = ba+k*bs2;
568:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
569:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
570: 
571:     jmin = bi[k]; jmax = bi[k+1];
572:     if (jmin < jmax) {
573:       for (j=jmin; j<jmax; j++){
574:          vj = bj[j];           /* block col. index of U */
575:          u   = ba + j*bs2;
576:          rtmp_ptr = rtmp + vj*bs2;
577:          for (k1=0; k1<bs2; k1++){
578:            *u++        = *rtmp_ptr;
579:            *rtmp_ptr++ = 0.0;
580:          }
581:       }
582: 
583:       /* ... add k to row list for first nonzero entry in k-th row */
584:       il[k] = jmin;
585:       i     = bj[jmin];
586:       jl[k] = jl[i]; jl[i] = k;
587:     }
588:   }

590:   PetscFree(rtmp);
591:   PetscFree(il);
592:   PetscFree(dk);
593:   PetscFree(pivots);
594:   if (a->permute){
595:     PetscFree(aa);
596:   }

598:   ISRestoreIndices(perm,&perm_ptr);
599:   C->factor       = FACTOR_CHOLESKY;
600:   C->assembled    = PETSC_TRUE;
601:   C->preallocated = PETSC_TRUE;
602:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
603:   return(0);
604: }

608: int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
609: {
610:   Mat                C = *B;
611:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
612:   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
613:   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
614:   int                bs=a->bs,bs2 = a->bs2;
615:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
616:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;
617:   MatScalar          *work;
618:   int                *pivots;


622:   /* initialization */
623: 
624:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
625:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
626:   PetscMalloc(2*mbs*sizeof(int),&il);
627:   jl   = il + mbs;
628:   for (i=0; i<mbs; i++) {
629:     jl[i] = mbs; il[0] = 0;
630:   }
631:   PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
632:   uik  = dk + bs2;
633:   work = uik + bs2;
634:   PetscMalloc(bs*sizeof(int),&pivots);
635: 
636:   ai = a->i; aj = a->j; aa = a->a;
637: 
638:   /* for each row k */
639:   for (k = 0; k<mbs; k++){

641:     /*initialize k-th row with elements nonzero in row k of A */
642:     jmin = ai[k]; jmax = ai[k+1];
643:     ap = aa + jmin*bs2;
644:     for (j = jmin; j < jmax; j++){
645:       vj = aj[j];         /* block col. index */
646:       rtmp_ptr = rtmp + vj*bs2;
647:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
648:     }

650:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
651:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
652:     i = jl[k]; /* first row to be added to k_th row  */

654:     while (i < k){
655:       nexti = jl[i]; /* next row to be added to k_th row */

657:       /* compute multiplier */
658:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

660:       /* uik = -inv(Di)*U_bar(i,k) */
661:       diag = ba + i*bs2;
662:       u    = ba + ili*bs2;
663:       PetscMemzero(uik,bs2*sizeof(MatScalar));
664:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
665: 
666:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
667:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
668: 
669:       /* update -U(i,k) */
670:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

672:       /* add multiple of row i to k-th row ... */
673:       jmin = ili + 1; jmax = bi[i+1];
674:       if (jmin < jmax){
675:         for (j=jmin; j<jmax; j++) {
676:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
677:           rtmp_ptr = rtmp + bj[j]*bs2;
678:           u = ba + j*bs2;
679:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
680:         }
681: 
682:         /* ... add i to row list for next nonzero entry */
683:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
684:         j     = bj[jmin];
685:         jl[i] = jl[j]; jl[j] = i; /* update jl */
686:       }
687:       i = nexti;
688:     }

690:     /* save nonzero entries in k-th row of U ... */

692:     /* invert diagonal block */
693:     diag = ba+k*bs2;
694:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
695:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
696: 
697:     jmin = bi[k]; jmax = bi[k+1];
698:     if (jmin < jmax) {
699:       for (j=jmin; j<jmax; j++){
700:          vj = bj[j];           /* block col. index of U */
701:          u   = ba + j*bs2;
702:          rtmp_ptr = rtmp + vj*bs2;
703:          for (k1=0; k1<bs2; k1++){
704:            *u++        = *rtmp_ptr;
705:            *rtmp_ptr++ = 0.0;
706:          }
707:       }
708: 
709:       /* ... add k to row list for first nonzero entry in k-th row */
710:       il[k] = jmin;
711:       i     = bj[jmin];
712:       jl[k] = jl[i]; jl[i] = k;
713:     }
714:   }

716:   PetscFree(rtmp);
717:   PetscFree(il);
718:   PetscFree(dk);
719:   PetscFree(pivots);

721:   C->factor    = FACTOR_CHOLESKY;
722:   C->assembled = PETSC_TRUE;
723:   C->preallocated = PETSC_TRUE;
724:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
725:   return(0);
726: }

728: /*
729:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
730:     Version for blocks 2 by 2.
731: */
734: int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
735: {
736:   Mat                C = *B;
737:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
738:   IS                 perm = b->row;
739:   int                *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
740:   int                *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
741:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
742:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;

745: 
746:   /* initialization */
747:   /* il and jl record the first nonzero element in each row of the accessing 
748:      window U(0:k, k:mbs-1).
749:      jl:    list of rows to be added to uneliminated rows 
750:             i>= k: jl(i) is the first row to be added to row i
751:             i<  k: jl(i) is the row following row i in some list of rows
752:             jl(i) = mbs indicates the end of a list                        
753:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
754:             row i of U */
755:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
756:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
757:   PetscMalloc(2*mbs*sizeof(int),&il);
758:   jl   = il + mbs;
759:   for (i=0; i<mbs; i++) {
760:     jl[i] = mbs; il[0] = 0;
761:   }
762:   PetscMalloc(8*sizeof(MatScalar),&dk);
763:   uik  = dk + 4;
764:   ISGetIndices(perm,&perm_ptr);

766:   /* check permutation */
767:   if (!a->permute){
768:     ai = a->i; aj = a->j; aa = a->a;
769:   } else {
770:     ai   = a->inew; aj = a->jnew;
771:     PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
772:     PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
773:     PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
774:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));

776:     for (i=0; i<mbs; i++){
777:       jmin = ai[i]; jmax = ai[i+1];
778:       for (j=jmin; j<jmax; j++){
779:         while (a2anew[j] != j){
780:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
781:           for (k1=0; k1<4; k1++){
782:             dk[k1]       = aa[k*4+k1];
783:             aa[k*4+k1] = aa[j*4+k1];
784:             aa[j*4+k1] = dk[k1];
785:           }
786:         }
787:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
788:         if (i > aj[j]){
789:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
790:           ap = aa + j*4;     /* ptr to the beginning of the block */
791:           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
792:           ap[1] = ap[2];
793:           ap[2] = dk[1];
794:         }
795:       }
796:     }
797:     PetscFree(a2anew);
798:   }

800:   /* for each row k */
801:   for (k = 0; k<mbs; k++){

803:     /*initialize k-th row with elements nonzero in row perm(k) of A */
804:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
805:     ap = aa + jmin*4;
806:     for (j = jmin; j < jmax; j++){
807:       vj = perm_ptr[aj[j]];         /* block col. index */
808:       rtmp_ptr = rtmp + vj*4;
809:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
810:     }

812:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
813:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
814:     i = jl[k]; /* first row to be added to k_th row  */

816:     while (i < k){
817:       nexti = jl[i]; /* next row to be added to k_th row */

819:       /* compute multiplier */
820:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

822:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
823:       diag = ba + i*4;
824:       u    = ba + ili*4;
825:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
826:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
827:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
828:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
829: 
830:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
831:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
832:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
833:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
834:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

836:       /* update -U(i,k): ba[ili] = uik */
837:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

839:       /* add multiple of row i to k-th row ... */
840:       jmin = ili + 1; jmax = bi[i+1];
841:       if (jmin < jmax){
842:         for (j=jmin; j<jmax; j++) {
843:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
844:           rtmp_ptr = rtmp + bj[j]*4;
845:           u = ba + j*4;
846:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
847:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
848:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
849:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
850:         }
851: 
852:         /* ... add i to row list for next nonzero entry */
853:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
854:         j     = bj[jmin];
855:         jl[i] = jl[j]; jl[j] = i; /* update jl */
856:       }
857:       i = nexti;
858:     }

860:     /* save nonzero entries in k-th row of U ... */

862:     /* invert diagonal block */
863:     diag = ba+k*4;
864:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
865:     Kernel_A_gets_inverse_A_2(diag);
866: 
867:     jmin = bi[k]; jmax = bi[k+1];
868:     if (jmin < jmax) {
869:       for (j=jmin; j<jmax; j++){
870:          vj = bj[j];           /* block col. index of U */
871:          u   = ba + j*4;
872:          rtmp_ptr = rtmp + vj*4;
873:          for (k1=0; k1<4; k1++){
874:            *u++        = *rtmp_ptr;
875:            *rtmp_ptr++ = 0.0;
876:          }
877:       }
878: 
879:       /* ... add k to row list for first nonzero entry in k-th row */
880:       il[k] = jmin;
881:       i     = bj[jmin];
882:       jl[k] = jl[i]; jl[i] = k;
883:     }
884:   }

886:   PetscFree(rtmp);
887:   PetscFree(il);
888:   PetscFree(dk);
889:   if (a->permute) {
890:     PetscFree(aa);
891:   }
892:   ISRestoreIndices(perm,&perm_ptr);
893:   C->factor    = FACTOR_CHOLESKY;
894:   C->assembled = PETSC_TRUE;
895:   C->preallocated = PETSC_TRUE;
896:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
897:   return(0);
898: }

900: /*
901:       Version for when blocks are 2 by 2 Using natural ordering
902: */
905: int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
906: {
907:   Mat                C = *B;
908:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
909:   int                ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
910:   int                *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
911:   MatScalar          *ba = b->a,*aa,*ap,*dk,*uik;
912:   MatScalar          *u,*diag,*rtmp,*rtmp_ptr;

915: 
916:   /* initialization */
917:   /* il and jl record the first nonzero element in each row of the accessing 
918:      window U(0:k, k:mbs-1).
919:      jl:    list of rows to be added to uneliminated rows 
920:             i>= k: jl(i) is the first row to be added to row i
921:             i<  k: jl(i) is the row following row i in some list of rows
922:             jl(i) = mbs indicates the end of a list                        
923:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
924:             row i of U */
925:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
926:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
927:   PetscMalloc(2*mbs*sizeof(int),&il);
928:   jl   = il + mbs;
929:   for (i=0; i<mbs; i++) {
930:     jl[i] = mbs; il[0] = 0;
931:   }
932:   PetscMalloc(8*sizeof(MatScalar),&dk);
933:   uik  = dk + 4;
934: 
935:   ai = a->i; aj = a->j; aa = a->a;

937:   /* for each row k */
938:   for (k = 0; k<mbs; k++){

940:     /*initialize k-th row with elements nonzero in row k of A */
941:     jmin = ai[k]; jmax = ai[k+1];
942:     ap = aa + jmin*4;
943:     for (j = jmin; j < jmax; j++){
944:       vj = aj[j];         /* block col. index */
945:       rtmp_ptr = rtmp + vj*4;
946:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
947:     }
948: 
949:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
950:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
951:     i = jl[k]; /* first row to be added to k_th row  */

953:     while (i < k){
954:       nexti = jl[i]; /* next row to be added to k_th row */

956:       /* compute multiplier */
957:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

959:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
960:       diag = ba + i*4;
961:       u    = ba + ili*4;
962:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
963:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
964:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
965:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
966: 
967:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
968:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
969:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
970:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
971:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

973:       /* update -U(i,k): ba[ili] = uik */
974:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

976:       /* add multiple of row i to k-th row ... */
977:       jmin = ili + 1; jmax = bi[i+1];
978:       if (jmin < jmax){
979:         for (j=jmin; j<jmax; j++) {
980:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
981:           rtmp_ptr = rtmp + bj[j]*4;
982:           u = ba + j*4;
983:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
984:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
985:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
986:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
987:         }
988: 
989:         /* ... add i to row list for next nonzero entry */
990:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
991:         j     = bj[jmin];
992:         jl[i] = jl[j]; jl[j] = i; /* update jl */
993:       }
994:       i = nexti;
995:     }

997:     /* save nonzero entries in k-th row of U ... */

999:     /* invert diagonal block */
1000:     diag = ba+k*4;
1001:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1002:     Kernel_A_gets_inverse_A_2(diag);
1003: 
1004:     jmin = bi[k]; jmax = bi[k+1];
1005:     if (jmin < jmax) {
1006:       for (j=jmin; j<jmax; j++){
1007:          vj = bj[j];           /* block col. index of U */
1008:          u   = ba + j*4;
1009:          rtmp_ptr = rtmp + vj*4;
1010:          for (k1=0; k1<4; k1++){
1011:            *u++        = *rtmp_ptr;
1012:            *rtmp_ptr++ = 0.0;
1013:          }
1014:       }
1015: 
1016:       /* ... add k to row list for first nonzero entry in k-th row */
1017:       il[k] = jmin;
1018:       i     = bj[jmin];
1019:       jl[k] = jl[i]; jl[i] = k;
1020:     }
1021:   }

1023:   PetscFree(rtmp);
1024:   PetscFree(il);
1025:   PetscFree(dk);

1027:   C->factor    = FACTOR_CHOLESKY;
1028:   C->assembled = PETSC_TRUE;
1029:   C->preallocated = PETSC_TRUE;
1030:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1031:   return(0);
1032: }

1034: /*
1035:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1036:     Version for blocks are 1 by 1.
1037: */
1040: int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1041: {
1042:   Mat                C = *B;
1043:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1044:   IS                 ip = b->row;
1045:   int                *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1046:   int                *ai,*aj,*r;
1047:   int                k,jmin,jmax,*jl,*il,vj,nexti,ili;
1048:   MatScalar          *rtmp;
1049:   MatScalar          *ba = b->a,*aa,ak;
1050:   MatScalar          dk,uikdi;

1053:   ISGetIndices(ip,&rip);
1054:   if (!a->permute){
1055:     ai = a->i; aj = a->j; aa = a->a;
1056:   } else {
1057:     ai = a->inew; aj = a->jnew;
1058:     PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);
1059:     PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));
1060:     PetscMalloc(ai[mbs]*sizeof(int),&r);
1061:     ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));

1063:     jmin = ai[0]; jmax = ai[mbs];
1064:     for (j=jmin; j<jmax; j++){
1065:       while (r[j] != j){
1066:         k = r[j]; r[j] = r[k]; r[k] = k;
1067:         ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
1068:       }
1069:     }
1070:     PetscFree(r);
1071:   }
1072: 
1073:   /* initialization */
1074:   /* il and jl record the first nonzero element in each row of the accessing 
1075:      window U(0:k, k:mbs-1).
1076:      jl:    list of rows to be added to uneliminated rows 
1077:             i>= k: jl(i) is the first row to be added to row i
1078:             i<  k: jl(i) is the row following row i in some list of rows
1079:             jl(i) = mbs indicates the end of a list                        
1080:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1081:             row i of U */
1082:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1083:   PetscMalloc(2*mbs*sizeof(int),&il);
1084:   jl   = il + mbs;
1085:   for (i=0; i<mbs; i++) {
1086:     rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1087:   }

1089:   /* for each row k */
1090:   for (k = 0; k<mbs; k++){

1092:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1093:     jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1094: 
1095:     for (j = jmin; j < jmax; j++){
1096:       vj = rip[aj[j]];
1097:       rtmp[vj] = aa[j];
1098:     }

1100:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1101:     dk = rtmp[k];
1102:     i = jl[k]; /* first row to be added to k_th row  */

1104:     while (i < k){
1105:       nexti = jl[i]; /* next row to be added to k_th row */

1107:       /* compute multiplier, update D(k) and U(i,k) */
1108:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1109:       uikdi = - ba[ili]*ba[i];
1110:       dk += uikdi*ba[ili];
1111:       ba[ili] = uikdi; /* -U(i,k) */

1113:       /* add multiple of row i to k-th row ... */
1114:       jmin = ili + 1; jmax = bi[i+1];
1115:       if (jmin < jmax){
1116:         for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1117:         /* ... add i to row list for next nonzero entry */
1118:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1119:         j     = bj[jmin];
1120:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1121:       }
1122:       i = nexti;
1123:     }

1125:     /* check for zero pivot and save diagoanl element */
1126:     if (dk == 0.0){
1127:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1128:       /*
1129:     } else if (PetscRealPart(dk) < 0.0){
1130:       SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);  
1131:       */
1132:     }

1134:     /* save nonzero entries in k-th row of U ... */
1135:     ba[k] = 1.0/dk;
1136:     jmin = bi[k]; jmax = bi[k+1];
1137:     if (jmin < jmax) {
1138:       for (j=jmin; j<jmax; j++){
1139:          vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1140:       }
1141:       /* ... add k to row list for first nonzero entry in k-th row */
1142:       il[k] = jmin;
1143:       i     = bj[jmin];
1144:       jl[k] = jl[i]; jl[i] = k;
1145:     }
1146:   }
1147: 
1148:   PetscFree(rtmp);
1149:   PetscFree(il);
1150:   if (a->permute){
1151:     PetscFree(aa);
1152:   }

1154:   ISRestoreIndices(ip,&rip);
1155:   C->factor    = FACTOR_CHOLESKY;
1156:   C->assembled = PETSC_TRUE;
1157:   C->preallocated = PETSC_TRUE;
1158:   PetscLogFlops(b->mbs);
1159:   return(0);
1160: }

1162: /*
1163:   Version for when blocks are 1 by 1 Using natural ordering
1164: */
1167: int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1168: {
1169:   Mat                C = *B;
1170:   Mat_SeqSBAIJ       *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1171:   int                ierr,i,j,mbs = a->mbs;
1172:   int                *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1173:   int                k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1174:   MatScalar          *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1175:   PetscReal          damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1176:   PetscTruth         damp,chshift;
1177:   int                nshift=0;

1180:   /* initialization */
1181:   /* il and jl record the first nonzero element in each row of the accessing 
1182:      window U(0:k, k:mbs-1).
1183:      jl:    list of rows to be added to uneliminated rows 
1184:             i>= k: jl(i) is the first row to be added to row i
1185:             i<  k: jl(i) is the row following row i in some list of rows
1186:             jl(i) = mbs indicates the end of a list                        
1187:      il(i): points to the first nonzero element in U(i,k:mbs-1) 
1188:   */
1189:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1190:   PetscMalloc(2*mbs*sizeof(int),&il);
1191:   jl   = il + mbs;

1193:   shift_amount = 0;
1194:   do {
1195:     damp = PETSC_FALSE;
1196:     chshift = PETSC_FALSE;
1197:     for (i=0; i<mbs; i++) {
1198:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1199:     }

1201:     for (k = 0; k<mbs; k++){ /* row k */
1202:     /*initialize k-th row with elements nonzero in row perm(k) of A */
1203:       nz   = ai[k+1] - ai[k];
1204:       acol = aj + ai[k];
1205:       aval = aa + ai[k];
1206:       bval = ba + bi[k];
1207:       while (nz -- ){
1208:         rtmp[*acol++] = *aval++;
1209:         *bval++       = 0.0; /* for in-place factorization */
1210:       }
1211:       /* damp the diagonal of the matrix */
1212:       if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1213: 
1214:       /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1215:       dk = rtmp[k];
1216:       i  = jl[k]; /* first row to be added to k_th row  */

1218:       while (i < k){
1219:         nexti = jl[i]; /* next row to be added to k_th row */
1220: 
1221:         /* compute multiplier, update D(k) and U(i,k) */
1222:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1223:         uikdi = - ba[ili]*ba[bi[i]];
1224:         dk   += uikdi*ba[ili];
1225:         ba[ili] = uikdi; /* -U(i,k) */

1227:         /* add multiple of row i to k-th row ... */
1228:         jmin = ili + 1;
1229:         nz   = bi[i+1] - jmin;
1230:         if (nz > 0){
1231:           bcol = bj + jmin;
1232:           bval = ba + jmin;
1233:           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1234:           /* ... add i to row list for next nonzero entry */
1235:           il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1236:           j     = bj[jmin];
1237:           jl[i] = jl[j]; jl[j] = i; /* update jl */
1238:         }
1239:         i = nexti;
1240:       }

1242:       /* check for zero pivot and save diagonal element */
1243:       if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1244:         PetscReal rs = -PetscRealPart(dk);
1245:         jmin      = bi[k]+1;
1246:         nz        = bi[k+1] - jmin;
1247:         if (nz){
1248:           bcol = bj + jmin;
1249:           bval = ba + jmin;
1250:           while (nz--){
1251:             rs += PetscAbsScalar(rtmp[*bcol++]);
1252:           }
1253:         }
1254:         shift_amount = rs;
1255:         chshift  = PETSC_TRUE;
1256:         nshift++;
1257:         break;
1258:       }
1259:       if (PetscRealPart(dk) < zeropivot){
1260:         if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1261:         if (damping > 0.0) {
1262:           if (ndamp) damping *= 2.0;
1263:           damp = PETSC_TRUE;
1264:           ndamp++;
1265:           break;
1266:         } else if (PetscAbsScalar(dk) < zeropivot){
1267:           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1268:         } else {
1269:           PetscLogInfo((PetscObject)A,"Negative pivot %g in row %d of Cholesky factorization\n",PetscRealPart(dk),k);
1270:         }
1271:       }
1272: 
1273:       /* save nonzero entries in k-th row of U ... */
1274:       /* printf("%d, dk: %g, 1/dk: %g\n",k,dk,1/dk); */
1275:       ba[bi[k]] = 1.0/dk;
1276:       jmin      = bi[k]+1;
1277:       nz        = bi[k+1] - jmin;
1278:       if (nz){
1279:         bcol = bj + jmin;
1280:         bval = ba + jmin;
1281:         while (nz--){
1282:           *bval++       = rtmp[*bcol];
1283:           rtmp[*bcol++] = 0.0;
1284:         }
1285:         /* ... add k to row list for first nonzero entry in k-th row */
1286:         il[k] = jmin;
1287:         i     = bj[jmin];
1288:         jl[k] = jl[i]; jl[i] = k;
1289:       }
1290:     } /* end of for (k = 0; k<mbs; k++) */
1291:   } while (damp||chshift);
1292:   PetscFree(rtmp);
1293:   PetscFree(il);
1294: 
1295:   C->factor       = FACTOR_CHOLESKY;
1296:   C->assembled    = PETSC_TRUE;
1297:   C->preallocated = PETSC_TRUE;
1298:   PetscLogFlops(b->mbs);
1299:   if (ndamp) {
1300:     PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %d damping value %g\n",ndamp,damping);
1301:   }
1302:   return(0);
1303: }

1307: int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1308: {
1310:   Mat C;

1313:   MatCholeskyFactorSymbolic(A,perm,info,&C);
1314:   MatCholeskyFactorNumeric(A,&C);
1315:   MatHeaderCopy(A,C);
1316:   return(0);
1317: }