Actual source code: sbaijfact.c

  1: #define PETSCMAT_DLL

 3:  #include ../src/mat/impls/baij/seq/baij.h
 4:  #include ../src/mat/impls/sbaij/seq/sbaij.h
 5:  #include ../src/mat/blockinvert.h
 6:  #include petscis.h

  8: #if !defined(PETSC_USE_COMPLEX)
  9: /* 
 10:   input:
 11:    F -- numeric factor 
 12:   output:
 13:    nneg, nzero, npos: matrix inertia 
 14: */

 18: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos)
 19: {
 20:   Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
 21:   MatScalar    *dd = fact_ptr->a;
 22:   PetscInt     mbs=fact_ptr->mbs,bs=F->rmap->bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i;

 25:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);
 26:   nneig_tmp = 0; npos_tmp = 0;
 27:   for (i=0; i<mbs; i++){
 28:     if (PetscRealPart(dd[*fi]) > 0.0){
 29:       npos_tmp++;
 30:     } else if (PetscRealPart(dd[*fi]) < 0.0){
 31:       nneig_tmp++;
 32:     }
 33:     fi++;
 34:   }
 35:   if (nneig) *nneig = nneig_tmp;
 36:   if (npos)  *npos  = npos_tmp;
 37:   if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;

 39:   return(0);
 40: }
 41: #endif /* !defined(PETSC_USE_COMPLEX) */

 43: /* 
 44:   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 45:   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 
 46: */
 49: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info)
 50: {
 51:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
 53:   const PetscInt *rip,*ai,*aj;
 54:   PetscInt       i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2;
 55:   PetscInt       m,reallocs = 0,prow;
 56:   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
 57:   PetscReal      f = info->fill;
 58:   PetscTruth     perm_identity;

 61:   /* check whether perm is the identity mapping */
 62:   ISIdentity(perm,&perm_identity);
 63:   ISGetIndices(perm,&rip);
 64: 
 65:   if (perm_identity){ /* without permutation */
 66:     a->permute = PETSC_FALSE;
 67:     ai = a->i; aj = a->j;
 68:   } else {            /* non-trivial permutation */
 69:     a->permute = PETSC_TRUE;
 70:     MatReorderingSeqSBAIJ(A,perm);
 71:     ai = a->inew; aj = a->jnew;
 72:   }
 73: 
 74:   /* initialization */
 75:   PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
 76:   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
 77:   PetscMalloc(umax*sizeof(PetscInt),&ju);
 78:   iu[0] = mbs+1;
 79:   juidx = mbs + 1; /* index for ju */
 80:   /* jl linked list for pivot row -- linked list for col index */
 81:   PetscMalloc2(mbs,PetscInt,&jl,mbs,PetscInt,&q);
 82:   for (i=0; i<mbs; i++){
 83:     jl[i] = mbs;
 84:     q[i] = 0;
 85:   }

 87:   /* for each row k */
 88:   for (k=0; k<mbs; k++){
 89:     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
 90:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
 91:     q[k] = mbs;
 92:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 93:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
 94:     jmax = ai[rip[k]+1];
 95:     for (j=jmin; j<jmax; j++){
 96:       vj = rip[aj[j]]; /* col. value */
 97:       if(vj > k){
 98:         qm = k;
 99:         do {
100:           m  = qm; qm = q[m];
101:         } while(qm < vj);
102:         if (qm == vj) {
103:           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
104:         }
105:         nzk++;
106:         q[m]  = vj;
107:         q[vj] = qm;
108:       } /* if(vj > k) */
109:     } /* for (j=jmin; j<jmax; j++) */

111:     /* modify nonzero structure of k-th row by computing fill-in
112:        for each row i to be merged in */
113:     prow = k;
114:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
115: 
116:     while (prow < k){
117:       /* merge row prow into k-th row */
118:       jmin = iu[prow] + 1; jmax = iu[prow+1];
119:       qm = k;
120:       for (j=jmin; j<jmax; j++){
121:         vj = ju[j];
122:         do {
123:           m = qm; qm = q[m];
124:         } while (qm < vj);
125:         if (qm != vj){
126:          nzk++; q[m] = vj; q[vj] = qm; qm = vj;
127:         }
128:       }
129:       prow = jl[prow]; /* next pivot row */
130:     }
131: 
132:     /* add k to row list for first nonzero element in k-th row */
133:     if (nzk > 0){
134:       i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
135:       jl[k] = jl[i]; jl[i] = k;
136:     }
137:     iu[k+1] = iu[k] + nzk;

139:     /* allocate more space to ju if needed */
140:     if (iu[k+1] > umax) {
141:       /* estimate how much additional space we will need */
142:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
143:       /* just double the memory each time */
144:       maxadd = umax;
145:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
146:       umax += maxadd;

148:       /* allocate a longer ju */
149:       PetscMalloc(umax*sizeof(PetscInt),&jutmp);
150:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
151:       PetscFree(ju);
152:       ju   = jutmp;
153:       reallocs++; /* count how many times we realloc */
154:     }

156:     /* save nonzero structure of k-th row in ju */
157:     i=k;
158:     while (nzk --) {
159:       i           = q[i];
160:       ju[juidx++] = i;
161:     }
162:   }

164: #if defined(PETSC_USE_INFO)
165:   if (ai[mbs] != 0) {
166:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
167:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
168:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
169:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
170:     PetscInfo(A,"for best performance.\n");
171:   } else {
172:     PetscInfo(A,"Empty matrix.\n");
173:   }
174: #endif

176:   ISRestoreIndices(perm,&rip);
177:   PetscFree2(jl,q);

179:   /* put together the new matrix */
180:   MatSeqSBAIJSetPreallocation_SeqSBAIJ(F,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);

182:   /* PetscLogObjectParent(B,iperm); */
183:   b = (Mat_SeqSBAIJ*)(F)->data;
184:   b->singlemalloc = PETSC_FALSE;
185:   b->free_a       = PETSC_TRUE;
186:   b->free_ij       = PETSC_TRUE;
187:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
188:   b->j    = ju;
189:   b->i    = iu;
190:   b->diag = 0;
191:   b->ilen = 0;
192:   b->imax = 0;
193:   b->row  = perm;
194:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
195:   PetscObjectReference((PetscObject)perm);
196:   b->icol = perm;
197:   PetscObjectReference((PetscObject)perm);
198:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
199:   /* In b structure:  Free imax, ilen, old a, old j.  
200:      Allocate idnew, solve_work, new a, new j */
201:   PetscLogObjectMemory(F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
202:   b->maxnz = b->nz = iu[mbs];
203: 
204:   (F)->info.factor_mallocs    = reallocs;
205:   (F)->info.fill_ratio_given  = f;
206:   if (ai[mbs] != 0) {
207:     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
208:   } else {
209:     (F)->info.fill_ratio_needed = 0.0;
210:   }
211:   MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity);
212:   return(0);
213: }
214: /*
215:     Symbolic U^T*D*U factorization for SBAIJ format. 
216:     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
217: */
218:  #include petscbt.h
219:  #include ../src/mat/utils/freespace.h
222: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
223: {
224:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
225:   Mat_SeqSBAIJ       *b;
226:   PetscErrorCode     ierr;
227:   PetscTruth         perm_identity,missing;
228:   PetscReal          fill = info->fill;
229:   const PetscInt     *rip,*ai=a->i,*aj=a->j;
230:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
231:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
232:   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
233:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
234:   PetscBT            lnkbt;

237:   if (bs > 1){
238:     MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);
239:     return(0);
240:   }
241:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
242:   MatMissingDiagonal(A,&missing,&d);
243:   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);

245:   /* check whether perm is the identity mapping */
246:   ISIdentity(perm,&perm_identity);
247:   if (!perm_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
248:   a->permute = PETSC_FALSE;
249:   ISGetIndices(perm,&rip);

251:   /* initialization */
252:   PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);
253:   PetscMalloc((mbs+1)*sizeof(PetscInt),&udiag);
254:   ui[0] = 0;

256:   /* jl: linked list for storing indices of the pivot rows 
257:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
258:   PetscMalloc4(mbs,PetscInt*,&ui_ptr,mbs,PetscInt,&il,mbs,PetscInt,&jl,mbs,PetscInt,&cols);
259:   for (i=0; i<mbs; i++){
260:     jl[i] = mbs; il[i] = 0;
261:   }

263:   /* create and initialize a linked list for storing column indices of the active row k */
264:   nlnk = mbs + 1;
265:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

267:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
268:   PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);
269:   current_space = free_space;

271:   for (k=0; k<mbs; k++){  /* for each active row k */
272:     /* initialize lnk by the column indices of row rip[k] of A */
273:     nzk   = 0;
274:     ncols = ai[k+1] - ai[k];
275:     if (!ncols) SETERRQ1(PETSC_ERR_MAT_CH_ZRPVT,"Empty row %D in matrix ",k);
276:     for (j=0; j<ncols; j++){
277:       i = *(aj + ai[k] + j);
278:       cols[j] = i;
279:     }
280:     PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);
281:     nzk += nlnk;

283:     /* update lnk by computing fill-in for each pivot row to be merged in */
284:     prow = jl[k]; /* 1st pivot row */
285: 
286:     while (prow < k){
287:       nextprow = jl[prow];
288:       /* merge prow into k-th row */
289:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
290:       jmax = ui[prow+1];
291:       ncols = jmax-jmin;
292:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
293:       PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
294:       nzk += nlnk;

296:       /* update il and jl for prow */
297:       if (jmin < jmax){
298:         il[prow] = jmin;
299:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
300:       }
301:       prow = nextprow;
302:     }

304:     /* if free space is not available, make more free space */
305:     if (current_space->local_remaining<nzk) {
306:       i  = mbs - k + 1; /* num of unfactored rows */
307:       i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
308:       PetscFreeSpaceGet(i,&current_space);
309:       reallocs++;
310:     }

312:     /* copy data into free space, then initialize lnk */
313:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

315:     /* add the k-th row into il and jl */
316:     if (nzk > 1){
317:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
318:       jl[k] = jl[i]; jl[i] = k;
319:       il[k] = ui[k] + 1;
320:     }
321:     ui_ptr[k] = current_space->array;
322:     current_space->array           += nzk;
323:     current_space->local_used      += nzk;
324:     current_space->local_remaining -= nzk;

326:     ui[k+1] = ui[k] + nzk;
327:   }

329: #if defined(PETSC_USE_INFO)
330:   if (ai[mbs] != 0) {
331:     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
332:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
333:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
334:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
335:   } else {
336:     PetscInfo(A,"Empty matrix.\n");
337:   }
338: #endif

340:   ISRestoreIndices(perm,&rip);
341:   PetscFree4(ui_ptr,il,jl,cols);

343:   /* destroy list of free space and other temporary array(s) */
344:   PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);
345:   PetscFreeSpaceContiguous_Cholesky(&free_space,uj,mbs,ui,udiag); /* store matrix factor */
346:   PetscLLDestroy(lnk,lnkbt);

348:   /* put together the new matrix in MATSEQSBAIJ format */
349:   MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
350: 
351:   b = (Mat_SeqSBAIJ*)fact->data;
352:   b->singlemalloc = PETSC_FALSE;
353:   b->free_a       = PETSC_TRUE;
354:   b->free_ij      = PETSC_TRUE;
355:   PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);
356:   b->j    = uj;
357:   b->i    = ui;
358:   b->diag = udiag;
359:   b->free_diag = PETSC_TRUE;
360:   b->ilen = 0;
361:   b->imax = 0;
362:   b->row  = perm;
363:   b->icol = perm;
364:   PetscObjectReference((PetscObject)perm);
365:   PetscObjectReference((PetscObject)perm);
366:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
367:   PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);
368:   PetscLogObjectMemory(fact,ui[mbs]*(sizeof(PetscInt)+sizeof(MatScalar)));
369:   b->maxnz = b->nz = ui[mbs];
370: 
371:   (fact)->info.factor_mallocs    = reallocs;
372:   (fact)->info.fill_ratio_given  = fill;
373:   if (ai[mbs] != 0) {
374:     (fact)->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
375:   } else {
376:     (fact)->info.fill_ratio_needed = 0.0;
377:   }
378:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
379:   return(0);
380: }

384: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
385: {
386:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
387:   Mat_SeqSBAIJ       *b;
388:   PetscErrorCode     ierr;
389:   PetscTruth         perm_identity,missing;
390:   PetscReal          fill = info->fill;
391:   const PetscInt     *rip,*ai,*aj;
392:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
393:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
394:   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
395:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
396:   PetscBT            lnkbt;

399:   MatMissingDiagonal(A,&missing,&d);
400:   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);

402:   /*  
403:    This code originally uses Modified Sparse Row (MSR) storage
404:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
405:    Then it is rewritten so the factor B takes seqsbaij format. However the associated 
406:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 
407:    thus the original code in MSR format is still used for these cases. 
408:    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 
409:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
410:   */
411:   if (bs > 1){
412:     MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);
413:     return(0);
414:   }

416:   /* check whether perm is the identity mapping */
417:   ISIdentity(perm,&perm_identity);

419:   if (perm_identity){
420:     a->permute = PETSC_FALSE;
421:     ai = a->i; aj = a->j;
422:   } else {
423:     SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
424:     /* There are bugs for reordeing. Needs further work. 
425:        MatReordering for sbaij cannot be efficient. User should use aij formt! */
426:     a->permute = PETSC_TRUE;
427:     MatReorderingSeqSBAIJ(A,perm);
428:     ai = a->inew; aj = a->jnew;
429:   }
430:   ISGetIndices(perm,&rip);

432:   /* initialization */
433:   PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);
434:   ui[0] = 0;

436:   /* jl: linked list for storing indices of the pivot rows 
437:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
438:   PetscMalloc4(mbs,PetscInt*,&ui_ptr,mbs,PetscInt,&il,mbs,PetscInt,&jl,mbs,PetscInt,&cols);
439:   for (i=0; i<mbs; i++){
440:     jl[i] = mbs; il[i] = 0;
441:   }

443:   /* create and initialize a linked list for storing column indices of the active row k */
444:   nlnk = mbs + 1;
445:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

447:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
448:   PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);
449:   current_space = free_space;

451:   for (k=0; k<mbs; k++){  /* for each active row k */
452:     /* initialize lnk by the column indices of row rip[k] of A */
453:     nzk   = 0;
454:     ncols = ai[rip[k]+1] - ai[rip[k]];
455:     for (j=0; j<ncols; j++){
456:       i = *(aj + ai[rip[k]] + j);
457:       cols[j] = rip[i];
458:     }
459:     PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);
460:     nzk += nlnk;

462:     /* update lnk by computing fill-in for each pivot row to be merged in */
463:     prow = jl[k]; /* 1st pivot row */
464: 
465:     while (prow < k){
466:       nextprow = jl[prow];
467:       /* merge prow into k-th row */
468:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
469:       jmax = ui[prow+1];
470:       ncols = jmax-jmin;
471:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
472:       PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
473:       nzk += nlnk;

475:       /* update il and jl for prow */
476:       if (jmin < jmax){
477:         il[prow] = jmin;
478:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
479:       }
480:       prow = nextprow;
481:     }

483:     /* if free space is not available, make more free space */
484:     if (current_space->local_remaining<nzk) {
485:       i = mbs - k + 1; /* num of unfactored rows */
486:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
487:       PetscFreeSpaceGet(i,&current_space);
488:       reallocs++;
489:     }

491:     /* copy data into free space, then initialize lnk */
492:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

494:     /* add the k-th row into il and jl */
495:     if (nzk-1 > 0){
496:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
497:       jl[k] = jl[i]; jl[i] = k;
498:       il[k] = ui[k] + 1;
499:     }
500:     ui_ptr[k] = current_space->array;
501:     current_space->array           += nzk;
502:     current_space->local_used      += nzk;
503:     current_space->local_remaining -= nzk;

505:     ui[k+1] = ui[k] + nzk;
506:   }

508: #if defined(PETSC_USE_INFO)
509:   if (ai[mbs] != 0) {
510:     PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
511:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
512:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
513:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
514:   } else {
515:     PetscInfo(A,"Empty matrix.\n");
516:   }
517: #endif

519:   ISRestoreIndices(perm,&rip);
520:   PetscFree4(ui_ptr,il,jl,cols);

522:   /* destroy list of free space and other temporary array(s) */
523:   PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);
524:   PetscFreeSpaceContiguous(&free_space,uj);
525:   PetscLLDestroy(lnk,lnkbt);

527:   /* put together the new matrix in MATSEQSBAIJ format */
528:   MatSeqSBAIJSetPreallocation_SeqSBAIJ(fact,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
529: 
530:   b = (Mat_SeqSBAIJ*)(fact)->data;
531:   b->singlemalloc = PETSC_FALSE;
532:   b->free_a       = PETSC_TRUE;
533:   b->free_ij      = PETSC_TRUE;
534:   PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);
535:   b->j    = uj;
536:   b->i    = ui;
537:   b->diag = 0;
538:   b->ilen = 0;
539:   b->imax = 0;
540:   b->row  = perm;
541:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
542:   PetscObjectReference((PetscObject)perm);
543:   b->icol = perm;
544:   PetscObjectReference((PetscObject)perm);
545:   PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);
546:   PetscLogObjectMemory(fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
547:   b->maxnz = b->nz = ui[mbs];
548: 
549:   (fact)->info.factor_mallocs    = reallocs;
550:   (fact)->info.fill_ratio_given  = fill;
551:   if (ai[mbs] != 0) {
552:     (fact)->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
553:   } else {
554:     (fact)->info.fill_ratio_needed = 0.0;
555:   }
556:   MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity);
557:   return(0);
558: }

562: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
563: {
564:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
565:   IS             perm = b->row;
567:   const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j;
568:   PetscInt       i,j;
569:   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
570:   PetscInt       bs=A->rmap->bs,bs2 = a->bs2,bslog = 0;
571:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
572:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
573:   MatScalar      *work;
574:   PetscInt       *pivots;

577:   /* initialization */
578:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
579:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
580:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);
581:   for (i=0; i<mbs; i++) {
582:     jl[i] = mbs; il[0] = 0;
583:   }
584:   PetscMalloc3(bs2,MatScalar,&dk,bs2,MatScalar,&uik,bs,MatScalar,&work);
585:   PetscMalloc(bs*sizeof(PetscInt),&pivots);
586: 
587:   ISGetIndices(perm,&perm_ptr);
588: 
589:   /* check permutation */
590:   if (!a->permute){
591:     ai = a->i; aj = a->j; aa = a->a;
592:   } else {
593:     ai   = a->inew; aj = a->jnew;
594:     PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
595:     PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
596:     PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
597:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));

599:     /* flops in while loop */
600:     bslog = 2*bs*bs2;

602:     for (i=0; i<mbs; i++){
603:       jmin = ai[i]; jmax = ai[i+1];
604:       for (j=jmin; j<jmax; j++){
605:         while (a2anew[j] != j){
606:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
607:           for (k1=0; k1<bs2; k1++){
608:             dk[k1]       = aa[k*bs2+k1];
609:             aa[k*bs2+k1] = aa[j*bs2+k1];
610:             aa[j*bs2+k1] = dk[k1];
611:           }
612:         }
613:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
614:         if (i > aj[j]){
615:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
616:           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
617:           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
618:           for (k=0; k<bs; k++){               /* j-th block of aa <- dk^T */
619:             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
620:           }
621:         }
622:       }
623:     }
624:     PetscFree(a2anew);
625:   }
626: 
627:   /* for each row k */
628:   for (k = 0; k<mbs; k++){

630:     /*initialize k-th row with elements nonzero in row perm(k) of A */
631:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
632: 
633:     ap = aa + jmin*bs2;
634:     for (j = jmin; j < jmax; j++){
635:       vj = perm_ptr[aj[j]];         /* block col. index */
636:       rtmp_ptr = rtmp + vj*bs2;
637:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
638:     }

640:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
641:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
642:     i = jl[k]; /* first row to be added to k_th row  */

644:     while (i < k){
645:       nexti = jl[i]; /* next row to be added to k_th row */

647:       /* compute multiplier */
648:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

650:       /* uik = -inv(Di)*U_bar(i,k) */
651:       diag = ba + i*bs2;
652:       u    = ba + ili*bs2;
653:       PetscMemzero(uik,bs2*sizeof(MatScalar));
654:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
655: 
656:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
657:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
658:       PetscLogFlops(bslog*2.0);
659: 
660:       /* update -U(i,k) */
661:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

663:       /* add multiple of row i to k-th row ... */
664:       jmin = ili + 1; jmax = bi[i+1];
665:       if (jmin < jmax){
666:         for (j=jmin; j<jmax; j++) {
667:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
668:           rtmp_ptr = rtmp + bj[j]*bs2;
669:           u = ba + j*bs2;
670:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
671:         }
672:         PetscLogFlops(bslog*(jmax-jmin));
673: 
674:         /* ... add i to row list for next nonzero entry */
675:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
676:         j     = bj[jmin];
677:         jl[i] = jl[j]; jl[j] = i; /* update jl */
678:       }
679:       i = nexti;
680:     }

682:     /* save nonzero entries in k-th row of U ... */

684:     /* invert diagonal block */
685:     diag = ba+k*bs2;
686:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
687:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
688: 
689:     jmin = bi[k]; jmax = bi[k+1];
690:     if (jmin < jmax) {
691:       for (j=jmin; j<jmax; j++){
692:          vj = bj[j];           /* block col. index of U */
693:          u   = ba + j*bs2;
694:          rtmp_ptr = rtmp + vj*bs2;
695:          for (k1=0; k1<bs2; k1++){
696:            *u++        = *rtmp_ptr;
697:            *rtmp_ptr++ = 0.0;
698:          }
699:       }
700: 
701:       /* ... add k to row list for first nonzero entry in k-th row */
702:       il[k] = jmin;
703:       i     = bj[jmin];
704:       jl[k] = jl[i]; jl[i] = k;
705:     }
706:   }

708:   PetscFree(rtmp);
709:   PetscFree2(il,jl);
710:   PetscFree3(dk,uik,work);
711:   PetscFree(pivots);
712:   if (a->permute){
713:     PetscFree(aa);
714:   }

716:   ISRestoreIndices(perm,&perm_ptr);
717:   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
718:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
719:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
720:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;

722:   C->assembled    = PETSC_TRUE;
723:   C->preallocated = PETSC_TRUE;
724:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
725:   return(0);
726: }

730: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
731: {
732:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
734:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
735:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
736:   PetscInt       bs=A->rmap->bs,bs2 = a->bs2,bslog;
737:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
738:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
739:   MatScalar      *work;
740:   PetscInt       *pivots;

743:   PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
744:   PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
745:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);
746:   for (i=0; i<mbs; i++) {
747:     jl[i] = mbs; il[0] = 0;
748:   }
749:   PetscMalloc3(bs2,MatScalar,&dk,bs2,MatScalar,&uik,bs,MatScalar,&work);
750:   PetscMalloc(bs*sizeof(PetscInt),&pivots);
751: 
752:   ai = a->i; aj = a->j; aa = a->a;

754:   /* flops in while loop */
755:   bslog = 2*bs*bs2;
756: 
757:   /* for each row k */
758:   for (k = 0; k<mbs; k++){

760:     /*initialize k-th row with elements nonzero in row k of A */
761:     jmin = ai[k]; jmax = ai[k+1];
762:     ap = aa + jmin*bs2;
763:     for (j = jmin; j < jmax; j++){
764:       vj = aj[j];         /* block col. index */
765:       rtmp_ptr = rtmp + vj*bs2;
766:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
767:     }

769:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
770:     PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
771:     i = jl[k]; /* first row to be added to k_th row  */

773:     while (i < k){
774:       nexti = jl[i]; /* next row to be added to k_th row */

776:       /* compute multiplier */
777:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

779:       /* uik = -inv(Di)*U_bar(i,k) */
780:       diag = ba + i*bs2;
781:       u    = ba + ili*bs2;
782:       PetscMemzero(uik,bs2*sizeof(MatScalar));
783:       Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
784: 
785:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
786:       Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
787:       PetscLogFlops(bslog*2.0);
788: 
789:       /* update -U(i,k) */
790:       PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));

792:       /* add multiple of row i to k-th row ... */
793:       jmin = ili + 1; jmax = bi[i+1];
794:       if (jmin < jmax){
795:         for (j=jmin; j<jmax; j++) {
796:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
797:           rtmp_ptr = rtmp + bj[j]*bs2;
798:           u = ba + j*bs2;
799:           Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
800:         }
801:         PetscLogFlops(bslog*(jmax-jmin));
802: 
803:         /* ... add i to row list for next nonzero entry */
804:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
805:         j     = bj[jmin];
806:         jl[i] = jl[j]; jl[j] = i; /* update jl */
807:       }
808:       i = nexti;
809:     }

811:     /* save nonzero entries in k-th row of U ... */

813:     /* invert diagonal block */
814:     diag = ba+k*bs2;
815:     PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
816:     Kernel_A_gets_inverse_A(bs,diag,pivots,work);
817: 
818:     jmin = bi[k]; jmax = bi[k+1];
819:     if (jmin < jmax) {
820:       for (j=jmin; j<jmax; j++){
821:          vj = bj[j];           /* block col. index of U */
822:          u   = ba + j*bs2;
823:          rtmp_ptr = rtmp + vj*bs2;
824:          for (k1=0; k1<bs2; k1++){
825:            *u++        = *rtmp_ptr;
826:            *rtmp_ptr++ = 0.0;
827:          }
828:       }
829: 
830:       /* ... add k to row list for first nonzero entry in k-th row */
831:       il[k] = jmin;
832:       i     = bj[jmin];
833:       jl[k] = jl[i]; jl[i] = k;
834:     }
835:   }

837:   PetscFree(rtmp);
838:   PetscFree2(il,jl);
839:   PetscFree3(dk,uik,work);
840:   PetscFree(pivots);

842:   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
843:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
844:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
845:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
846:   C->assembled = PETSC_TRUE;
847:   C->preallocated = PETSC_TRUE;
848:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
849:   return(0);
850: }

852: /*
853:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
854:     Version for blocks 2 by 2.
855: */
858: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info)
859: {
860:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
861:   IS             perm = b->row;
863:   const PetscInt *ai,*aj,*perm_ptr;
864:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
865:   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
866:   MatScalar      *ba = b->a,*aa,*ap;
867:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4];
868:   PetscReal      shift = info->shiftamount;

871:   /* initialization */
872:   /* il and jl record the first nonzero element in each row of the accessing 
873:      window U(0:k, k:mbs-1).
874:      jl:    list of rows to be added to uneliminated rows 
875:             i>= k: jl(i) is the first row to be added to row i
876:             i<  k: jl(i) is the row following row i in some list of rows
877:             jl(i) = mbs indicates the end of a list                        
878:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
879:             row i of U */
880:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
881:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
882:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);
883:   for (i=0; i<mbs; i++) {
884:     jl[i] = mbs; il[0] = 0;
885:   }
886:   ISGetIndices(perm,&perm_ptr);

888:   /* check permutation */
889:   if (!a->permute){
890:     ai = a->i; aj = a->j; aa = a->a;
891:   } else {
892:     ai   = a->inew; aj = a->jnew;
893:     PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
894:     PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
895:     PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
896:     PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));

898:     for (i=0; i<mbs; i++){
899:       jmin = ai[i]; jmax = ai[i+1];
900:       for (j=jmin; j<jmax; j++){
901:         while (a2anew[j] != j){
902:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
903:           for (k1=0; k1<4; k1++){
904:             dk[k1]       = aa[k*4+k1];
905:             aa[k*4+k1] = aa[j*4+k1];
906:             aa[j*4+k1] = dk[k1];
907:           }
908:         }
909:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
910:         if (i > aj[j]){
911:           /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
912:           ap = aa + j*4;     /* ptr to the beginning of the block */
913:           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
914:           ap[1] = ap[2];
915:           ap[2] = dk[1];
916:         }
917:       }
918:     }
919:     PetscFree(a2anew);
920:   }

922:   /* for each row k */
923:   for (k = 0; k<mbs; k++){

925:     /*initialize k-th row with elements nonzero in row perm(k) of A */
926:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
927:     ap = aa + jmin*4;
928:     for (j = jmin; j < jmax; j++){
929:       vj = perm_ptr[aj[j]];         /* block col. index */
930:       rtmp_ptr = rtmp + vj*4;
931:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
932:     }

934:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
935:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
936:     i = jl[k]; /* first row to be added to k_th row  */

938:     while (i < k){
939:       nexti = jl[i]; /* next row to be added to k_th row */

941:       /* compute multiplier */
942:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

944:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
945:       diag = ba + i*4;
946:       u    = ba + ili*4;
947:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
948:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
949:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
950:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
951: 
952:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
953:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
954:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
955:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
956:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

958:       PetscLogFlops(16.0*2.0);

960:       /* update -U(i,k): ba[ili] = uik */
961:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

963:       /* add multiple of row i to k-th row ... */
964:       jmin = ili + 1; jmax = bi[i+1];
965:       if (jmin < jmax){
966:         for (j=jmin; j<jmax; j++) {
967:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
968:           rtmp_ptr = rtmp + bj[j]*4;
969:           u = ba + j*4;
970:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
971:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
972:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
973:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
974:         }
975:         PetscLogFlops(16.0*(jmax-jmin));
976: 
977:         /* ... add i to row list for next nonzero entry */
978:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
979:         j     = bj[jmin];
980:         jl[i] = jl[j]; jl[j] = i; /* update jl */
981:       }
982:       i = nexti;
983:     }

985:     /* save nonzero entries in k-th row of U ... */

987:     /* invert diagonal block */
988:     diag = ba+k*4;
989:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
990:     Kernel_A_gets_inverse_A_2(diag,shift);
991: 
992:     jmin = bi[k]; jmax = bi[k+1];
993:     if (jmin < jmax) {
994:       for (j=jmin; j<jmax; j++){
995:          vj = bj[j];           /* block col. index of U */
996:          u   = ba + j*4;
997:          rtmp_ptr = rtmp + vj*4;
998:          for (k1=0; k1<4; k1++){
999:            *u++        = *rtmp_ptr;
1000:            *rtmp_ptr++ = 0.0;
1001:          }
1002:       }
1003: 
1004:       /* ... add k to row list for first nonzero entry in k-th row */
1005:       il[k] = jmin;
1006:       i     = bj[jmin];
1007:       jl[k] = jl[i]; jl[i] = k;
1008:     }
1009:   }

1011:   PetscFree(rtmp);
1012:   PetscFree2(il,jl);
1013:   if (a->permute) {
1014:     PetscFree(aa);
1015:   }
1016:   ISRestoreIndices(perm,&perm_ptr);
1017:   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1018:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1019:   C->assembled = PETSC_TRUE;
1020:   C->preallocated = PETSC_TRUE;
1021:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1022:   return(0);
1023: }

1025: /*
1026:       Version for when blocks are 2 by 2 Using natural ordering
1027: */
1030: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
1031: {
1032:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1034:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1035:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1036:   MatScalar      *ba = b->a,*aa,*ap,dk[8],uik[8];
1037:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
1038:   PetscReal      shift = info->shiftamount;

1041:   /* initialization */
1042:   /* il and jl record the first nonzero element in each row of the accessing 
1043:      window U(0:k, k:mbs-1).
1044:      jl:    list of rows to be added to uneliminated rows 
1045:             i>= k: jl(i) is the first row to be added to row i
1046:             i<  k: jl(i) is the row following row i in some list of rows
1047:             jl(i) = mbs indicates the end of a list                        
1048:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1049:             row i of U */
1050:   PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
1051:   PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
1052:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);
1053:   for (i=0; i<mbs; i++) {
1054:     jl[i] = mbs; il[0] = 0;
1055:   }
1056:   ai = a->i; aj = a->j; aa = a->a;

1058:   /* for each row k */
1059:   for (k = 0; k<mbs; k++){

1061:     /*initialize k-th row with elements nonzero in row k of A */
1062:     jmin = ai[k]; jmax = ai[k+1];
1063:     ap = aa + jmin*4;
1064:     for (j = jmin; j < jmax; j++){
1065:       vj = aj[j];         /* block col. index */
1066:       rtmp_ptr = rtmp + vj*4;
1067:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1068:     }
1069: 
1070:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1071:     PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
1072:     i = jl[k]; /* first row to be added to k_th row  */

1074:     while (i < k){
1075:       nexti = jl[i]; /* next row to be added to k_th row */

1077:       /* compute multiplier */
1078:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

1080:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1081:       diag = ba + i*4;
1082:       u    = ba + ili*4;
1083:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1084:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1085:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1086:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1087: 
1088:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1089:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1090:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1091:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1092:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

1094:       PetscLogFlops(16.0*2.0);

1096:       /* update -U(i,k): ba[ili] = uik */
1097:       PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));

1099:       /* add multiple of row i to k-th row ... */
1100:       jmin = ili + 1; jmax = bi[i+1];
1101:       if (jmin < jmax){
1102:         for (j=jmin; j<jmax; j++) {
1103:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1104:           rtmp_ptr = rtmp + bj[j]*4;
1105:           u = ba + j*4;
1106:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1107:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1108:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1109:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1110:         }
1111:         PetscLogFlops(16.0*(jmax-jmin));

1113:         /* ... add i to row list for next nonzero entry */
1114:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1115:         j     = bj[jmin];
1116:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1117:       }
1118:       i = nexti;
1119:     }

1121:     /* save nonzero entries in k-th row of U ... */

1123:     /* invert diagonal block */
1124:     diag = ba+k*4;
1125:     PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1126:     Kernel_A_gets_inverse_A_2(diag,shift);
1127: 
1128:     jmin = bi[k]; jmax = bi[k+1];
1129:     if (jmin < jmax) {
1130:       for (j=jmin; j<jmax; j++){
1131:          vj = bj[j];           /* block col. index of U */
1132:          u   = ba + j*4;
1133:          rtmp_ptr = rtmp + vj*4;
1134:          for (k1=0; k1<4; k1++){
1135:            *u++        = *rtmp_ptr;
1136:            *rtmp_ptr++ = 0.0;
1137:          }
1138:       }
1139: 
1140:       /* ... add k to row list for first nonzero entry in k-th row */
1141:       il[k] = jmin;
1142:       i     = bj[jmin];
1143:       jl[k] = jl[i]; jl[i] = k;
1144:     }
1145:   }

1147:   PetscFree(rtmp);
1148:   PetscFree2(il,jl);

1150:   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1151:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1152:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1153:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1154:   C->assembled = PETSC_TRUE;
1155:   C->preallocated = PETSC_TRUE;
1156:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1157:   return(0);
1158: }

1160: /*
1161:     Numeric U^T*D*U factorization for SBAIJ format. 
1162:     Version for blocks are 1 by 1.
1163: */
1166: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info)
1167: {
1168:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1169:   IS             ip=b->row;
1171:   const PetscInt *ai,*aj,*rip;
1172:   PetscInt       *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1173:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1174:   MatScalar      *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1175:   PetscReal      rs,shift;
1176:   ChShift_Ctx    sctx;
1177:   PetscInt       newshift;

1180:   /* initialization */
1181:   shift   = info->shiftamount;

1183:   ISGetIndices(ip,&rip);
1184:   if (!a->permute){
1185:     ai = a->i; aj = a->j; aa = a->a;
1186:   } else {
1187:     ai = a->inew; aj = a->jnew;
1188:     nz = ai[mbs];
1189:     PetscMalloc(nz*sizeof(MatScalar),&aa);
1190:     a2anew = a->a2anew;
1191:     bval   = a->a;
1192:     for (j=0; j<nz; j++){
1193:       aa[a2anew[j]] = *(bval++);
1194:     }
1195:   }
1196: 
1197:   /* initialization */
1198:   /* il and jl record the first nonzero element in each row of the accessing 
1199:      window U(0:k, k:mbs-1).
1200:      jl:    list of rows to be added to uneliminated rows 
1201:             i>= k: jl(i) is the first row to be added to row i
1202:             i<  k: jl(i) is the row following row i in some list of rows
1203:             jl(i) = mbs indicates the end of a list                        
1204:      il(i): points to the first nonzero element in columns k,...,mbs-1 of 
1205:             row i of U */
1206:   PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);

1208:   sctx.shift_amount = 0;
1209:   sctx.nshift       = 0;
1210:   do {
1211:     sctx.chshift = PETSC_FALSE;
1212:     for (i=0; i<mbs; i++) {
1213:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1214:     }
1215: 
1216:     for (k = 0; k<mbs; k++){
1217:       /*initialize k-th row by the perm[k]-th row of A */
1218:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1219:       bval = ba + bi[k];
1220:       for (j = jmin; j < jmax; j++){
1221:         col = rip[aj[j]];
1222:         rtmp[col] = aa[j];
1223:         *bval++  = 0.0; /* for in-place factorization */
1224:       }

1226:       /* shift the diagonal of the matrix */
1227:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1229:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1230:       dk = rtmp[k];
1231:       i = jl[k]; /* first row to be added to k_th row  */

1233:       while (i < k){
1234:         nexti = jl[i]; /* next row to be added to k_th row */

1236:         /* compute multiplier, update diag(k) and U(i,k) */
1237:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1238:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1239:         dk += uikdi*ba[ili];
1240:         ba[ili] = uikdi; /* -U(i,k) */

1242:         /* add multiple of row i to k-th row */
1243:         jmin = ili + 1; jmax = bi[i+1];
1244:         if (jmin < jmax){
1245:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1246:           PetscLogFlops(2.0*(jmax-jmin));

1248:           /* update il and jl for row i */
1249:           il[i] = jmin;
1250:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1251:         }
1252:         i = nexti;
1253:       }

1255:       /* shift the diagonals when zero pivot is detected */
1256:       /* compute rs=sum of abs(off-diagonal) */
1257:       rs   = 0.0;
1258:       jmin = bi[k]+1;
1259:       nz   = bi[k+1] - jmin;
1260:       if (nz){
1261:         bcol = bj + jmin;
1262:         while (nz--){
1263:           rs += PetscAbsScalar(rtmp[*bcol]);
1264:           bcol++;
1265:         }
1266:       }

1268:       sctx.rs = rs;
1269:       sctx.pv = dk;
1270:       MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1271:       if (newshift == 1) break;    /* sctx.shift_amount is updated */
1272: 
1273:       /* copy data into U(k,:) */
1274:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1275:       jmin = bi[k]+1; jmax = bi[k+1];
1276:       if (jmin < jmax) {
1277:         for (j=jmin; j<jmax; j++){
1278:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1279:         }
1280:         /* add the k-th row into il and jl */
1281:         il[k] = jmin;
1282:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1283:       }
1284:     }
1285:   } while (sctx.chshift);
1286:   PetscFree3(rtmp,il,jl);
1287:   if (a->permute){PetscFree(aa);}

1289:   ISRestoreIndices(ip,&rip);
1290:   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1291:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1292:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1293:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1294:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1295:   C->assembled    = PETSC_TRUE;
1296:   C->preallocated = PETSC_TRUE;
1297:   PetscLogFlops(C->rmap->N);
1298:   if (sctx.nshift){
1299:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1300:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1301:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1302:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1303:     }
1304:   }
1305:   return(0);
1306: }

1308: /*
1309:   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1310:   Modified from MatCholeskyFactorNumeric_SeqAIJ() 
1311: */
1314: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info)
1315: {
1316:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1317:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)B->data;
1319:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
1320:   PetscInt       *ai=a->i,*aj=a->j,*ajtmp;
1321:   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
1322:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1323:   FactorShiftCtx sctx;
1324:   PetscReal      rs;
1325:   MatScalar      d,*v;

1328:   PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);

1330:   /* MatPivotSetUp(): initialize shift context sctx */
1331:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1333:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1334:     sctx.shift_top = info->zeropivot;
1335:     PetscMemzero(rtmp,mbs*sizeof(MatScalar));
1336:     for (i=0; i<mbs; i++) {
1337:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1338:       d  = (aa)[a->diag[i]];
1339:       rtmp[i] += - PetscRealPart(d); /* diagonal entry */
1340:       ajtmp = aj + ai[i] + 1;        /* exclude diagonal */
1341:       v     = aa + ai[i] + 1;
1342:       nz    = ai[i+1] - ai[i] - 1 ;
1343:       for (j=0; j<nz; j++){
1344:         rtmp[i] += PetscAbsScalar(v[j]);
1345:         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1346:       }
1347:       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1348:     }
1349:     sctx.shift_top   *= 1.1;
1350:     sctx.nshift_max   = 5;
1351:     sctx.shift_lo     = 0.;
1352:     sctx.shift_hi     = 1.;
1353:   }

1355:   /* allocate working arrays
1356:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1357:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 
1358:   */
1359:   do {
1360:     sctx.useshift = PETSC_FALSE;

1362:     for (i=0; i<mbs; i++)  c2r[i] = mbs;
1363:     il[0] = 0;
1364: 
1365:     for (k = 0; k<mbs; k++){
1366:       /* zero rtmp */
1367:       nz = bi[k+1] - bi[k];
1368:       bjtmp = bj + bi[k];
1369:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
1370: 
1371:       /* load in initial unfactored row */
1372:       bval = ba + bi[k];
1373:       jmin = ai[k]; jmax = ai[k+1];
1374:       for (j = jmin; j < jmax; j++){
1375:         col = aj[j];
1376:         rtmp[col] = aa[j];
1377:         *bval++   = 0.0; /* for in-place factorization */
1378:       }
1379:       /* shift the diagonal of the matrix: ZeropivotApply() */
1380:       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
1381: 
1382:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1383:       dk = rtmp[k];
1384:       i  = c2r[k]; /* first row to be added to k_th row  */

1386:       while (i < k){
1387:         nexti = c2r[i]; /* next row to be added to k_th row */
1388: 
1389:         /* compute multiplier, update diag(k) and U(i,k) */
1390:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1391:         uikdi = - ba[ili]*ba[bdiag[i]];  /* diagonal(k) */
1392:         dk   += uikdi*ba[ili]; /* update diag[k] */
1393:         ba[ili] = uikdi; /* -U(i,k) */

1395:         /* add multiple of row i to k-th row */
1396:         jmin = ili + 1; jmax = bi[i+1];
1397:         if (jmin < jmax){
1398:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1399:           /* update il and c2r for row i */
1400:           il[i] = jmin;
1401:           j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
1402:         }
1403:         i = nexti;
1404:       }

1406:       /* copy data into U(k,:) */
1407:       rs   = 0.0;
1408:       jmin = bi[k]; jmax = bi[k+1]-1;
1409:       if (jmin < jmax) {
1410:         for (j=jmin; j<jmax; j++){
1411:           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
1412:         }
1413:         /* add the k-th row into il and c2r */
1414:         il[k] = jmin;
1415:         i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
1416:       }

1418:       /* MatPivotCheck() */
1419:       sctx.rs  = rs;
1420:       sctx.pv  = dk;
1421:       if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO){
1422:         MatPivotCheck_nz(info,sctx,k);
1423:       } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE){
1424:         MatPivotCheck_pd(info,sctx,k);
1425:       } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
1426:         MatPivotCheck_inblocks(info,sctx,k);
1427:       } else {
1428:         MatPivotCheck_none(info,sctx,k);
1429:       }
1430:       dk = sctx.pv;
1431: 
1432:       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
1433:     }
1434:   } while (sctx.useshift);
1435: 
1436:   PetscFree3(rtmp,il,c2r);
1437: 
1438:   B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1439:   B->ops->solves          = MatSolves_SeqSBAIJ_1;
1440:   B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1441:   B->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1442:   B->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;

1444:   B->assembled    = PETSC_TRUE;
1445:   B->preallocated = PETSC_TRUE;
1446:   PetscLogFlops(B->rmap->n);

1448:   /* MatPivotView() */
1449:   if (sctx.nshift){
1450:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1451:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
1452:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1453:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1454:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
1455:       PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
1456:     }
1457:   }
1458:   return(0);
1459: }

1463: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info)
1464: {
1465:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1467:   PetscInt       i,j,mbs = a->mbs;
1468:   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1469:   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1470:   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1471:   PetscReal      rs;
1472:   ChShift_Ctx    sctx;
1473:   PetscInt       newshift;

1476:   /* initialization */
1477:   /* il and jl record the first nonzero element in each row of the accessing 
1478:      window U(0:k, k:mbs-1).
1479:      jl:    list of rows to be added to uneliminated rows 
1480:             i>= k: jl(i) is the first row to be added to row i
1481:             i<  k: jl(i) is the row following row i in some list of rows
1482:             jl(i) = mbs indicates the end of a list                        
1483:      il(i): points to the first nonzero element in U(i,k:mbs-1) 
1484:   */
1485:   PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1486:   PetscMalloc2(mbs,PetscInt,&il,mbs,PetscInt,&jl);

1488:   sctx.shift_amount = 0;
1489:   sctx.nshift       = 0;
1490:   do {
1491:     sctx.chshift = PETSC_FALSE;
1492:     for (i=0; i<mbs; i++) {
1493:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1494:     }

1496:     for (k = 0; k<mbs; k++){
1497:       /*initialize k-th row with elements nonzero in row perm(k) of A */
1498:       nz   = ai[k+1] - ai[k];
1499:       acol = aj + ai[k];
1500:       aval = aa + ai[k];
1501:       bval = ba + bi[k];
1502:       while (nz -- ){
1503:         rtmp[*acol++] = *aval++;
1504:         *bval++       = 0.0; /* for in-place factorization */
1505:       }

1507:       /* shift the diagonal of the matrix */
1508:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1509: 
1510:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1511:       dk = rtmp[k];
1512:       i  = jl[k]; /* first row to be added to k_th row  */

1514:       while (i < k){
1515:         nexti = jl[i]; /* next row to be added to k_th row */
1516:         /* compute multiplier, update D(k) and U(i,k) */
1517:         ili   = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1518:         uikdi = - ba[ili]*ba[bi[i]];
1519:         dk   += uikdi*ba[ili];
1520:         ba[ili] = uikdi; /* -U(i,k) */

1522:         /* add multiple of row i to k-th row ... */
1523:         jmin = ili + 1;
1524:         nz   = bi[i+1] - jmin;
1525:         if (nz > 0){
1526:           bcol = bj + jmin;
1527:           bval = ba + jmin;
1528:           PetscLogFlops(2.0*nz);
1529:           while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1530: 
1531:           /* update il and jl for i-th row */
1532:           il[i] = jmin;
1533:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1534:         }
1535:         i = nexti;
1536:       }

1538:       /* shift the diagonals when zero pivot is detected */
1539:       /* compute rs=sum of abs(off-diagonal) */
1540:       rs   = 0.0;
1541:       jmin = bi[k]+1;
1542:       nz   = bi[k+1] - jmin;
1543:       if (nz){
1544:         bcol = bj + jmin;
1545:         while (nz--){
1546:           rs += PetscAbsScalar(rtmp[*bcol]);
1547:           bcol++;
1548:         }
1549:       }

1551:       sctx.rs = rs;
1552:       sctx.pv = dk;
1553:       MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1554:       if (newshift == 1) break;    /* sctx.shift_amount is updated */
1555: 
1556:       /* copy data into U(k,:) */
1557:       ba[bi[k]] = 1.0/dk;
1558:       jmin      = bi[k]+1;
1559:       nz        = bi[k+1] - jmin;
1560:       if (nz){
1561:         bcol = bj + jmin;
1562:         bval = ba + jmin;
1563:         while (nz--){
1564:           *bval++       = rtmp[*bcol];
1565:           rtmp[*bcol++] = 0.0;
1566:         }
1567:         /* add k-th row into il and jl */
1568:         il[k] = jmin;
1569:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1570:       }
1571:     } /* end of for (k = 0; k<mbs; k++) */
1572:   } while (sctx.chshift);
1573:   PetscFree(rtmp);
1574:   PetscFree2(il,jl);
1575: 
1576:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1577:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1578:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1579:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1580:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1582:   C->assembled    = PETSC_TRUE;
1583:   C->preallocated = PETSC_TRUE;
1584:   PetscLogFlops(C->rmap->N);
1585:   if (sctx.nshift){
1586:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1587:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1588:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1589:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1590:     }
1591:   }
1592:   return(0);
1593: }

1597: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info)
1598: {
1600:   Mat            C;

1603:   MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C);
1604:   MatCholeskyFactorSymbolic(C,A,perm,info);
1605:   MatCholeskyFactorNumeric(C,A,info);
1606:   A->ops->solve            = C->ops->solve;
1607:   A->ops->solvetranspose   = C->ops->solvetranspose;
1608:   MatHeaderCopy(A,C);
1609:   return(0);
1610: }