Actual source code: aijfact.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/aij/seq/aij.h
 4:  #include src/inline/dot.h
 5:  #include src/inline/spops.h
 6:  #include petscbt.h
 7:  #include src/mat/utils/freespace.h

 11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
 12: {

 15:   SETERRQ(PETSC_ERR_SUP,"Code not written");
 16: #if !defined(PETSC_USE_DEBUG)
 17:   return(0);
 18: #endif
 19: }


 22: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 23: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
 24: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
 25: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
 26: #endif

 30:   /* ------------------------------------------------------------

 32:           This interface was contribed by Tony Caola

 34:      This routine is an interface to the pivoting drop-tolerance 
 35:      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 
 36:      SPARSEKIT2.

 38:      The SPARSEKIT2 routines used here are covered by the GNU 
 39:      copyright; see the file gnu in this directory.

 41:      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
 42:      help in getting this routine ironed out.

 44:      The major drawback to this routine is that if info->fill is 
 45:      not large enough it fails rather than allocating more space;
 46:      this can be fixed by hacking/improving the f2c version of 
 47:      Yousef Saad's code.

 49:      ------------------------------------------------------------
 50: */
 51: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
 52: {
 53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 55:   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
 56:   You can obtain the drop tolerance routines by installing PETSc from\n\
 57:   www.mcs.anl.gov/petsc\n");
 58: #else
 59:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
 60:   IS             iscolf,isicol,isirow;
 61:   PetscTruth     reorder;
 62:   PetscErrorCode ierr,sierr;
 63:   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
 64:   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
 65:   PetscInt       *ordcol,*iwk,*iperm,*jw;
 66:   PetscInt       jmax,lfill,job,*o_i,*o_j;
 67:   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
 68:   PetscReal      af;


 72:   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
 73:   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
 74:   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
 75:   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
 76:   lfill   = (PetscInt)(info->dtcount/2.0);
 77:   jmax    = (PetscInt)(info->fill*a->nz);


 80:   /* ------------------------------------------------------------
 81:      If reorder=.TRUE., then the original matrix has to be 
 82:      reordered to reflect the user selected ordering scheme, and
 83:      then de-reordered so it is in it's original format.  
 84:      Because Saad's dperm() is NOT in place, we have to copy 
 85:      the original matrix and allocate more storage. . . 
 86:      ------------------------------------------------------------
 87:   */

 89:   /* set reorder to true if either isrow or iscol is not identity */
 90:   ISIdentity(isrow,&reorder);
 91:   if (reorder) {ISIdentity(iscol,&reorder);}
 92:   reorder = PetscNot(reorder);

 94: 
 95:   /* storage for ilu factor */
 96:   PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
 97:   PetscMalloc(jmax*sizeof(PetscInt),&new_j);
 98:   PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
 99:   PetscMalloc(n*sizeof(PetscInt),&ordcol);

101:   /* ------------------------------------------------------------
102:      Make sure that everything is Fortran formatted (1-Based)
103:      ------------------------------------------------------------
104:   */
105:   for (i=old_i[0];i<old_i[n];i++) {
106:     old_j[i]++;
107:   }
108:   for(i=0;i<n+1;i++) {
109:     old_i[i]++;
110:   };
111: 

113:   if (reorder) {
114:     ISGetIndices(iscol,&c);
115:     ISGetIndices(isrow,&r);
116:     for(i=0;i<n;i++) {
117:       r[i]  = r[i]+1;
118:       c[i]  = c[i]+1;
119:     }
120:     PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
121:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
122:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
123:     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124:     for (i=0;i<n;i++) {
125:       r[i]  = r[i]-1;
126:       c[i]  = c[i]-1;
127:     }
128:     ISRestoreIndices(iscol,&c);
129:     ISRestoreIndices(isrow,&r);
130:     o_a = old_a2;
131:     o_j = old_j2;
132:     o_i = old_i2;
133:   } else {
134:     o_a = old_a;
135:     o_j = old_j;
136:     o_i = old_i;
137:   }

139:   /* ------------------------------------------------------------
140:      Call Saad's ilutp() routine to generate the factorization
141:      ------------------------------------------------------------
142:   */

144:   PetscMalloc(2*n*sizeof(PetscInt),&iperm);
145:   PetscMalloc(2*n*sizeof(PetscInt),&jw);
146:   PetscMalloc(n*sizeof(PetscScalar),&w);

148:   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149:   if (sierr) {
150:     switch (sierr) {
151:       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152:       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153:       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154:       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155:       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156:       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157:     }
158:   }

160:   PetscFree(w);
161:   PetscFree(jw);

163:   /* ------------------------------------------------------------
164:      Saad's routine gives the result in Modified Sparse Row (msr)
165:      Convert to Compressed Sparse Row format (csr) 
166:      ------------------------------------------------------------
167:   */

169:   PetscMalloc(n*sizeof(PetscScalar),&wk);
170:   PetscMalloc((n+1)*sizeof(PetscInt),&iwk);

172:   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);

174:   PetscFree(iwk);
175:   PetscFree(wk);

177:   if (reorder) {
178:     PetscFree(old_a2);
179:     PetscFree(old_j2);
180:     PetscFree(old_i2);
181:   } else {
182:     /* fix permutation of old_j that the factorization introduced */
183:     for (i=old_i[0]; i<old_i[n]; i++) {
184:       old_j[i-1] = iperm[old_j[i-1]-1];
185:     }
186:   }

188:   /* get rid of the shift to indices starting at 1 */
189:   for (i=0; i<n+1; i++) {
190:     old_i[i]--;
191:   }
192:   for (i=old_i[0];i<old_i[n];i++) {
193:     old_j[i]--;
194:   }
195: 
196:   /* Make the factored matrix 0-based */
197:   for (i=0; i<n+1; i++) {
198:     new_i[i]--;
199:   }
200:   for (i=new_i[0];i<new_i[n];i++) {
201:     new_j[i]--;
202:   }

204:   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205:   /*-- permute the right-hand-side and solution vectors           --*/
206:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207:   ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
208:   ISGetIndices(isicol,&ic);
209:   for(i=0; i<n; i++) {
210:     ordcol[i] = ic[iperm[i]-1];
211:   };
212:   ISRestoreIndices(isicol,&ic);
213:   ISDestroy(isicol);

215:   PetscFree(iperm);

217:   ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
218:   PetscFree(ordcol);

220:   /*----- put together the new matrix -----*/

222:   MatCreate(((PetscObject)A)->comm,fact);
223:   MatSetSizes(*fact,n,n,n,n);
224:   MatSetType(*fact,((PetscObject)A)->type_name);
225:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
226:   (*fact)->factor    = FACTOR_LU;
227:   (*fact)->assembled = PETSC_TRUE;

229:   b = (Mat_SeqAIJ*)(*fact)->data;
230:   b->free_a        = PETSC_TRUE;
231:   b->free_ij       = PETSC_TRUE;
232:   b->singlemalloc  = PETSC_FALSE;
233:   b->a             = new_a;
234:   b->j             = new_j;
235:   b->i             = new_i;
236:   b->ilen          = 0;
237:   b->imax          = 0;
238:   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239:   b->row           = isirow;
240:   b->col           = iscolf;
241:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
242:   b->maxnz = b->nz = new_i[n];
243:   MatMarkDiagonal_SeqAIJ(*fact);
244:   (*fact)->info.factor_mallocs = 0;

246:   af = ((double)b->nz)/((double)a->nz) + .001;
247:   PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);
248:   PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
249:   PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
250:   PetscInfo(A,"for best performance.\n");

252:   MatILUDTFactor_Inode(A,isrow,iscol,info,fact);

254:   return(0);
255: #endif
256: }

260: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
261: {
262:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
263:   IS                 isicol;
264:   PetscErrorCode     ierr;
265:   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
266:   PetscInt           *bi,*bj,*ajtmp;
267:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
268:   PetscReal          f;
269:   PetscInt           nlnk,*lnk,k,**bi_ptr;
270:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
271:   PetscBT            lnkbt;

274:   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
275:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
276:   ISGetIndices(isrow,&r);
277:   ISGetIndices(isicol,&ic);

279:   /* get new row pointers */
280:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
281:   bi[0] = 0;

283:   /* bdiag is location of diagonal in factor */
284:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
285:   bdiag[0] = 0;

287:   /* linked list for storing column indices of the active row */
288:   nlnk = n + 1;
289:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

291:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

293:   /* initial FreeSpace size is f*(ai[n]+1) */
294:   f = info->fill;
295:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
296:   current_space = free_space;

298:   for (i=0; i<n; i++) {
299:     /* copy previous fill into linked list */
300:     nzi = 0;
301:     nnz = ai[r[i]+1] - ai[r[i]];
302:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
303:     ajtmp = aj + ai[r[i]];
304:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
305:     nzi += nlnk;

307:     /* add pivot rows into linked list */
308:     row = lnk[n];
309:     while (row < i) {
310:       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
311:       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
312:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
313:       nzi += nlnk;
314:       row  = lnk[row];
315:     }
316:     bi[i+1] = bi[i] + nzi;
317:     im[i]   = nzi;

319:     /* mark bdiag */
320:     nzbd = 0;
321:     nnz  = nzi;
322:     k    = lnk[n];
323:     while (nnz-- && k < i){
324:       nzbd++;
325:       k = lnk[k];
326:     }
327:     bdiag[i] = bi[i] + nzbd;

329:     /* if free space is not available, make more free space */
330:     if (current_space->local_remaining<nzi) {
331:       nnz = (n - i)*nzi; /* estimated and max additional space needed */
332:       PetscFreeSpaceGet(nnz,&current_space);
333:       reallocs++;
334:     }

336:     /* copy data into free space, then initialize lnk */
337:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
338:     bi_ptr[i] = current_space->array;
339:     current_space->array           += nzi;
340:     current_space->local_used      += nzi;
341:     current_space->local_remaining -= nzi;
342:   }
343: #if defined(PETSC_USE_INFO)
344:   if (ai[n] != 0) {
345:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
346:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
347:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
348:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
349:     PetscInfo(A,"for best performance.\n");
350:   } else {
351:     PetscInfo(A,"Empty matrix\n");
352:   }
353: #endif

355:   ISRestoreIndices(isrow,&r);
356:   ISRestoreIndices(isicol,&ic);

358:   /* destroy list of free space and other temporary array(s) */
359:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
360:   PetscFreeSpaceContiguous(&free_space,bj);
361:   PetscLLDestroy(lnk,lnkbt);
362:   PetscFree2(bi_ptr,im);

364:   /* put together the new matrix */
365:   MatCreate(((PetscObject)A)->comm,B);
366:   MatSetSizes(*B,n,n,n,n);
367:   MatSetType(*B,((PetscObject)A)->type_name);
368:   MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
369:   PetscLogObjectParent(*B,isicol);
370:   b    = (Mat_SeqAIJ*)(*B)->data;
371:   b->free_a       = PETSC_TRUE;
372:   b->free_ij      = PETSC_TRUE;
373:   b->singlemalloc = PETSC_FALSE;
374:   PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
375:   b->j          = bj;
376:   b->i          = bi;
377:   b->diag       = bdiag;
378:   b->ilen       = 0;
379:   b->imax       = 0;
380:   b->row        = isrow;
381:   b->col        = iscol;
382:   PetscObjectReference((PetscObject)isrow);
383:   PetscObjectReference((PetscObject)iscol);
384:   b->icol       = isicol;
385:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);

387:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
388:   PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
389:   b->maxnz = b->nz = bi[n] ;

391:   (*B)->factor                 =  FACTOR_LU;
392:   (*B)->info.factor_mallocs    = reallocs;
393:   (*B)->info.fill_ratio_given  = f;

395:   if (ai[n] != 0) {
396:     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
397:   } else {
398:     (*B)->info.fill_ratio_needed = 0.0;
399:   }
400:   MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
401:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
402:   return(0);
403: }

405: /*
406:     Trouble in factorization, should we dump the original matrix?
407: */
410: PetscErrorCode MatFactorDumpMatrix(Mat A)
411: {
413:   PetscTruth     flg;

416:   PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);
417:   if (flg) {
418:     PetscViewer viewer;
419:     char        filename[PETSC_MAX_PATH_LEN];

421:     PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
422:     PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
423:     MatView(A,viewer);
424:     PetscViewerDestroy(viewer);
425:   }
426:   return(0);
427: }

429: /* ----------------------------------------------------------- */
432: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
433: {
434:   Mat            C=*B;
435:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
436:   IS             isrow = b->row,isicol = b->icol;
438:   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
439:   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
440:   PetscInt       *diag_offset = b->diag,diag,*pj;
441:   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
442:   PetscScalar    d;
443:   PetscReal      rs;
444:   LUShift_Ctx    sctx;
445:   PetscInt       newshift,*ddiag;

448:   ISGetIndices(isrow,&r);
449:   ISGetIndices(isicol,&ic);
450:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
451:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
452:   rtmps = rtmp; ics = ic;

454:   sctx.shift_top  = 0;
455:   sctx.nshift_max = 0;
456:   sctx.shift_lo   = 0;
457:   sctx.shift_hi   = 0;

459:   /* if both shift schemes are chosen by user, only use info->shiftpd */
460:   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
461:   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
462:     PetscInt *aai = a->i;
463:     ddiag          = a->diag;
464:     sctx.shift_top = 0;
465:     for (i=0; i<n; i++) {
466:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
467:       d  = (a->a)[ddiag[i]];
468:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
469:       v  = a->a+aai[i];
470:       nz = aai[i+1] - aai[i];
471:       for (j=0; j<nz; j++)
472:         rs += PetscAbsScalar(v[j]);
473:       if (rs>sctx.shift_top) sctx.shift_top = rs;
474:     }
475:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
476:     sctx.shift_top    *= 1.1;
477:     sctx.nshift_max   = 5;
478:     sctx.shift_lo     = 0.;
479:     sctx.shift_hi     = 1.;
480:   }

482:   sctx.shift_amount = 0;
483:   sctx.nshift       = 0;
484:   do {
485:     sctx.lushift = PETSC_FALSE;
486:     for (i=0; i<n; i++){
487:       nz    = bi[i+1] - bi[i];
488:       bjtmp = bj + bi[i];
489:       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;

491:       /* load in initial (unfactored row) */
492:       nz    = a->i[r[i]+1] - a->i[r[i]];
493:       ajtmp = a->j + a->i[r[i]];
494:       v     = a->a + a->i[r[i]];
495:       for (j=0; j<nz; j++) {
496:         rtmp[ics[ajtmp[j]]] = v[j];
497:       }
498:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

500:       row = *bjtmp++;
501:       while  (row < i) {
502:         pc = rtmp + row;
503:         if (*pc != 0.0) {
504:           pv         = b->a + diag_offset[row];
505:           pj         = b->j + diag_offset[row] + 1;
506:           multiplier = *pc / *pv++;
507:           *pc        = multiplier;
508:           nz         = bi[row+1] - diag_offset[row] - 1;
509:           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
510:           PetscLogFlops(2*nz);
511:         }
512:         row = *bjtmp++;
513:       }
514:       /* finished row so stick it into b->a */
515:       pv   = b->a + bi[i] ;
516:       pj   = b->j + bi[i] ;
517:       nz   = bi[i+1] - bi[i];
518:       diag = diag_offset[i] - bi[i];
519:       rs   = 0.0;
520:       for (j=0; j<nz; j++) {
521:         pv[j] = rtmps[pj[j]];
522:         if (j != diag) rs += PetscAbsScalar(pv[j]);
523:       }

525:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
526:       sctx.rs  = rs;
527:       sctx.pv  = pv[diag];
528:       MatLUCheckShift_inline(info,sctx,i,newshift);
529:       if (newshift == 1) break;
530:     }

532:     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
533:       /*
534:        * if no shift in this attempt & shifting & started shifting & can refine,
535:        * then try lower shift
536:        */
537:       sctx.shift_hi        = info->shift_fraction;
538:       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
539:       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
540:       sctx.lushift         = PETSC_TRUE;
541:       sctx.nshift++;
542:     }
543:   } while (sctx.lushift);

545:   /* invert diagonal entries for simplier triangular solves */
546:   for (i=0; i<n; i++) {
547:     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
548:   }

550:   PetscFree(rtmp);
551:   ISRestoreIndices(isicol,&ic);
552:   ISRestoreIndices(isrow,&r);
553:   C->factor = FACTOR_LU;
554:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
555:   C->assembled = PETSC_TRUE;
556:   PetscLogFlops(C->cmap.n);
557:   if (sctx.nshift){
558:     if (info->shiftnz) {
559:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
560:     } else if (info->shiftpd) {
561:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
562:     }
563:   }
564:   return(0);
565: }

567: /* 
568:    This routine implements inplace ILU(0) with row or/and column permutations. 
569:    Input: 
570:      A - original matrix
571:    Output;
572:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 
573:          a->j (col index) is permuted by the inverse of colperm, then sorted
574:          a->a reordered accordingly with a->j
575:          a->diag (ptr to diagonal elements) is updated.
576: */
579: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
580: {
581:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
582:   IS             isrow = a->row,isicol = a->icol;
584:   PetscInt       *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j;
585:   PetscInt       *ajtmp,nz,row,*ics;
586:   PetscInt       *diag = a->diag,nbdiag,*pj;
587:   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,d;
588:   PetscReal      rs;
589:   LUShift_Ctx    sctx;
590:   PetscInt       newshift;

593:   if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
594:   ISGetIndices(isrow,&r);
595:   ISGetIndices(isicol,&ic);
596:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
597:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
598:   ics = ic;

600:   sctx.shift_top  = 0;
601:   sctx.nshift_max = 0;
602:   sctx.shift_lo   = 0;
603:   sctx.shift_hi   = 0;

605:   /* if both shift schemes are chosen by user, only use info->shiftpd */
606:   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
607:   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
608:     sctx.shift_top = 0;
609:     for (i=0; i<n; i++) {
610:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
611:       d  = (a->a)[diag[i]];
612:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
613:       v  = a->a+ai[i];
614:       nz = ai[i+1] - ai[i];
615:       for (j=0; j<nz; j++)
616:         rs += PetscAbsScalar(v[j]);
617:       if (rs>sctx.shift_top) sctx.shift_top = rs;
618:     }
619:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
620:     sctx.shift_top    *= 1.1;
621:     sctx.nshift_max   = 5;
622:     sctx.shift_lo     = 0.;
623:     sctx.shift_hi     = 1.;
624:   }

626:   sctx.shift_amount = 0;
627:   sctx.nshift       = 0;
628:   do {
629:     sctx.lushift = PETSC_FALSE;
630:     for (i=0; i<n; i++){
631:       /* load in initial unfactored row */
632:       nz    = ai[r[i]+1] - ai[r[i]];
633:       ajtmp = aj + ai[r[i]];
634:       v     = a->a + ai[r[i]];
635:       /* sort permuted ajtmp and values v accordingly */
636:       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
637:       PetscSortIntWithScalarArray(nz,ajtmp,v);

639:       diag[r[i]] = ai[r[i]];
640:       for (j=0; j<nz; j++) {
641:         rtmp[ajtmp[j]] = v[j];
642:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
643:       }
644:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

646:       row = *ajtmp++;
647:       while  (row < i) {
648:         pc = rtmp + row;
649:         if (*pc != 0.0) {
650:           pv         = a->a + diag[r[row]];
651:           pj         = aj + diag[r[row]] + 1;

653:           multiplier = *pc / *pv++;
654:           *pc        = multiplier;
655:           nz         = ai[r[row]+1] - diag[r[row]] - 1;
656:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
657:           PetscLogFlops(2*nz);
658:         }
659:         row = *ajtmp++;
660:       }
661:       /* finished row so overwrite it onto a->a */
662:       pv   = a->a + ai[r[i]] ;
663:       pj   = aj + ai[r[i]] ;
664:       nz   = ai[r[i]+1] - ai[r[i]];
665:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
666: 
667:       rs   = 0.0;
668:       for (j=0; j<nz; j++) {
669:         pv[j] = rtmp[pj[j]];
670:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
671:       }

673:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
674:       sctx.rs  = rs;
675:       sctx.pv  = pv[nbdiag];
676:       MatLUCheckShift_inline(info,sctx,i,newshift);
677:       if (newshift == 1) break;
678:     }

680:     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
681:       /*
682:        * if no shift in this attempt & shifting & started shifting & can refine,
683:        * then try lower shift
684:        */
685:       sctx.shift_hi        = info->shift_fraction;
686:       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
687:       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
688:       sctx.lushift         = PETSC_TRUE;
689:       sctx.nshift++;
690:     }
691:   } while (sctx.lushift);

693:   /* invert diagonal entries for simplier triangular solves */
694:   for (i=0; i<n; i++) {
695:     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
696:   }

698:   PetscFree(rtmp);
699:   ISRestoreIndices(isicol,&ic);
700:   ISRestoreIndices(isrow,&r);
701:   A->factor     = FACTOR_LU;
702:   A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
703:   A->assembled = PETSC_TRUE;
704:   PetscLogFlops(A->cmap.n);
705:   if (sctx.nshift){
706:     if (info->shiftnz) {
707:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
708:     } else if (info->shiftpd) {
709:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
710:     }
711:   }
712:   return(0);
713: }

717: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
718: {
720:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
721:   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
722:   return(0);
723: }


726: /* ----------------------------------------------------------- */
729: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
730: {
732:   Mat            C;

735:   MatLUFactorSymbolic(A,row,col,info,&C);
736:   MatLUFactorNumeric(A,info,&C);
737:   MatHeaderCopy(A,C);
738:   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
739:   return(0);
740: }
741: /* ----------------------------------------------------------- */
744: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
745: {
746:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
747:   IS             iscol = a->col,isrow = a->row;
749:   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
750:   PetscInt       nz,*rout,*cout;
751:   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

754:   if (!n) return(0);

756:   VecGetArray(bb,&b);
757:   VecGetArray(xx,&x);
758:   tmp  = a->solve_work;

760:   ISGetIndices(isrow,&rout); r = rout;
761:   ISGetIndices(iscol,&cout); c = cout + (n-1);

763:   /* forward solve the lower triangular */
764:   tmp[0] = b[*r++];
765:   tmps   = tmp;
766:   for (i=1; i<n; i++) {
767:     v   = aa + ai[i] ;
768:     vi  = aj + ai[i] ;
769:     nz  = a->diag[i] - ai[i];
770:     sum = b[*r++];
771:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
772:     tmp[i] = sum;
773:   }

775:   /* backward solve the upper triangular */
776:   for (i=n-1; i>=0; i--){
777:     v   = aa + a->diag[i] + 1;
778:     vi  = aj + a->diag[i] + 1;
779:     nz  = ai[i+1] - a->diag[i] - 1;
780:     sum = tmp[i];
781:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
782:     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
783:   }

785:   ISRestoreIndices(isrow,&rout);
786:   ISRestoreIndices(iscol,&cout);
787:   VecRestoreArray(bb,&b);
788:   VecRestoreArray(xx,&x);
789:   PetscLogFlops(2*a->nz - A->cmap.n);
790:   return(0);
791: }

795: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
796: {
797:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
798:   IS             iscol = a->col,isrow = a->row;
800:   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
801:   PetscInt       nz,*rout,*cout,neq;
802:   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

805:   if (!n) return(0);

807:   MatGetArray(B,&b);
808:   MatGetArray(X,&x);
809: 
810:   tmp  = a->solve_work;
811:   ISGetIndices(isrow,&rout); r = rout;
812:   ISGetIndices(iscol,&cout); c = cout;

814:   for (neq=0; neq<n; neq++){
815:     /* forward solve the lower triangular */
816:     tmp[0] = b[r[0]];
817:     tmps   = tmp;
818:     for (i=1; i<n; i++) {
819:       v   = aa + ai[i] ;
820:       vi  = aj + ai[i] ;
821:       nz  = a->diag[i] - ai[i];
822:       sum = b[r[i]];
823:       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
824:       tmp[i] = sum;
825:     }
826:     /* backward solve the upper triangular */
827:     for (i=n-1; i>=0; i--){
828:       v   = aa + a->diag[i] + 1;
829:       vi  = aj + a->diag[i] + 1;
830:       nz  = ai[i+1] - a->diag[i] - 1;
831:       sum = tmp[i];
832:       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
833:       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
834:     }

836:     b += n;
837:     x += n;
838:   }
839:   ISRestoreIndices(isrow,&rout);
840:   ISRestoreIndices(iscol,&cout);
841:   MatRestoreArray(B,&b);
842:   MatRestoreArray(X,&x);
843:   PetscLogFlops(n*(2*a->nz - n));
844:   return(0);
845: }

849: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
850: {
851:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
852:   IS             iscol = a->col,isrow = a->row;
854:   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
855:   PetscInt       nz,*rout,*cout,row;
856:   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

859:   if (!n) return(0);

861:   VecGetArray(bb,&b);
862:   VecGetArray(xx,&x);
863:   tmp  = a->solve_work;

865:   ISGetIndices(isrow,&rout); r = rout;
866:   ISGetIndices(iscol,&cout); c = cout + (n-1);

868:   /* forward solve the lower triangular */
869:   tmp[0] = b[*r++];
870:   tmps   = tmp;
871:   for (row=1; row<n; row++) {
872:     i   = rout[row]; /* permuted row */
873:     v   = aa + ai[i] ;
874:     vi  = aj + ai[i] ;
875:     nz  = a->diag[i] - ai[i];
876:     sum = b[*r++];
877:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
878:     tmp[row] = sum;
879:   }

881:   /* backward solve the upper triangular */
882:   for (row=n-1; row>=0; row--){
883:     i   = rout[row]; /* permuted row */
884:     v   = aa + a->diag[i] + 1;
885:     vi  = aj + a->diag[i] + 1;
886:     nz  = ai[i+1] - a->diag[i] - 1;
887:     sum = tmp[row];
888:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
889:     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
890:   }

892:   ISRestoreIndices(isrow,&rout);
893:   ISRestoreIndices(iscol,&cout);
894:   VecRestoreArray(bb,&b);
895:   VecRestoreArray(xx,&x);
896:   PetscLogFlops(2*a->nz - A->cmap.n);
897:   return(0);
898: }

900: /* ----------------------------------------------------------- */
903: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
904: {
905:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
907:   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
908:   PetscScalar    *x,*b,*aa = a->a;
909: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
910:   PetscInt       adiag_i,i,*vi,nz,ai_i;
911:   PetscScalar    *v,sum;
912: #endif

915:   if (!n) return(0);

917:   VecGetArray(bb,&b);
918:   VecGetArray(xx,&x);

920: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
921:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
922: #else
923:   /* forward solve the lower triangular */
924:   x[0] = b[0];
925:   for (i=1; i<n; i++) {
926:     ai_i = ai[i];
927:     v    = aa + ai_i;
928:     vi   = aj + ai_i;
929:     nz   = adiag[i] - ai_i;
930:     sum  = b[i];
931:     while (nz--) sum -= *v++ * x[*vi++];
932:     x[i] = sum;
933:   }

935:   /* backward solve the upper triangular */
936:   for (i=n-1; i>=0; i--){
937:     adiag_i = adiag[i];
938:     v       = aa + adiag_i + 1;
939:     vi      = aj + adiag_i + 1;
940:     nz      = ai[i+1] - adiag_i - 1;
941:     sum     = x[i];
942:     while (nz--) sum -= *v++ * x[*vi++];
943:     x[i]    = sum*aa[adiag_i];
944:   }
945: #endif
946:   PetscLogFlops(2*a->nz - A->cmap.n);
947:   VecRestoreArray(bb,&b);
948:   VecRestoreArray(xx,&x);
949:   return(0);
950: }

954: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
955: {
956:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
957:   IS             iscol = a->col,isrow = a->row;
959:   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
960:   PetscInt       nz,*rout,*cout;
961:   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;

964:   if (yy != xx) {VecCopy(yy,xx);}

966:   VecGetArray(bb,&b);
967:   VecGetArray(xx,&x);
968:   tmp  = a->solve_work;

970:   ISGetIndices(isrow,&rout); r = rout;
971:   ISGetIndices(iscol,&cout); c = cout + (n-1);

973:   /* forward solve the lower triangular */
974:   tmp[0] = b[*r++];
975:   for (i=1; i<n; i++) {
976:     v   = aa + ai[i] ;
977:     vi  = aj + ai[i] ;
978:     nz  = a->diag[i] - ai[i];
979:     sum = b[*r++];
980:     while (nz--) sum -= *v++ * tmp[*vi++ ];
981:     tmp[i] = sum;
982:   }

984:   /* backward solve the upper triangular */
985:   for (i=n-1; i>=0; i--){
986:     v   = aa + a->diag[i] + 1;
987:     vi  = aj + a->diag[i] + 1;
988:     nz  = ai[i+1] - a->diag[i] - 1;
989:     sum = tmp[i];
990:     while (nz--) sum -= *v++ * tmp[*vi++ ];
991:     tmp[i] = sum*aa[a->diag[i]];
992:     x[*c--] += tmp[i];
993:   }

995:   ISRestoreIndices(isrow,&rout);
996:   ISRestoreIndices(iscol,&cout);
997:   VecRestoreArray(bb,&b);
998:   VecRestoreArray(xx,&x);
999:   PetscLogFlops(2*a->nz);

1001:   return(0);
1002: }
1003: /* -------------------------------------------------------------------*/
1006: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1007: {
1008:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1009:   IS             iscol = a->col,isrow = a->row;
1011:   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1012:   PetscInt       nz,*rout,*cout,*diag = a->diag;
1013:   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;

1016:   VecGetArray(bb,&b);
1017:   VecGetArray(xx,&x);
1018:   tmp  = a->solve_work;

1020:   ISGetIndices(isrow,&rout); r = rout;
1021:   ISGetIndices(iscol,&cout); c = cout;

1023:   /* copy the b into temp work space according to permutation */
1024:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1026:   /* forward solve the U^T */
1027:   for (i=0; i<n; i++) {
1028:     v   = aa + diag[i] ;
1029:     vi  = aj + diag[i] + 1;
1030:     nz  = ai[i+1] - diag[i] - 1;
1031:     s1  = tmp[i];
1032:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1033:     while (nz--) {
1034:       tmp[*vi++ ] -= (*v++)*s1;
1035:     }
1036:     tmp[i] = s1;
1037:   }

1039:   /* backward solve the L^T */
1040:   for (i=n-1; i>=0; i--){
1041:     v   = aa + diag[i] - 1 ;
1042:     vi  = aj + diag[i] - 1 ;
1043:     nz  = diag[i] - ai[i];
1044:     s1  = tmp[i];
1045:     while (nz--) {
1046:       tmp[*vi-- ] -= (*v--)*s1;
1047:     }
1048:   }

1050:   /* copy tmp into x according to permutation */
1051:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1053:   ISRestoreIndices(isrow,&rout);
1054:   ISRestoreIndices(iscol,&cout);
1055:   VecRestoreArray(bb,&b);
1056:   VecRestoreArray(xx,&x);

1058:   PetscLogFlops(2*a->nz-A->cmap.n);
1059:   return(0);
1060: }

1064: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1065: {
1066:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1067:   IS             iscol = a->col,isrow = a->row;
1069:   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1070:   PetscInt       nz,*rout,*cout,*diag = a->diag;
1071:   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;

1074:   if (zz != xx) {VecCopy(zz,xx);}

1076:   VecGetArray(bb,&b);
1077:   VecGetArray(xx,&x);
1078:   tmp = a->solve_work;

1080:   ISGetIndices(isrow,&rout); r = rout;
1081:   ISGetIndices(iscol,&cout); c = cout;

1083:   /* copy the b into temp work space according to permutation */
1084:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1086:   /* forward solve the U^T */
1087:   for (i=0; i<n; i++) {
1088:     v   = aa + diag[i] ;
1089:     vi  = aj + diag[i] + 1;
1090:     nz  = ai[i+1] - diag[i] - 1;
1091:     tmp[i] *= *v++;
1092:     while (nz--) {
1093:       tmp[*vi++ ] -= (*v++)*tmp[i];
1094:     }
1095:   }

1097:   /* backward solve the L^T */
1098:   for (i=n-1; i>=0; i--){
1099:     v   = aa + diag[i] - 1 ;
1100:     vi  = aj + diag[i] - 1 ;
1101:     nz  = diag[i] - ai[i];
1102:     while (nz--) {
1103:       tmp[*vi-- ] -= (*v--)*tmp[i];
1104:     }
1105:   }

1107:   /* copy tmp into x according to permutation */
1108:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1110:   ISRestoreIndices(isrow,&rout);
1111:   ISRestoreIndices(iscol,&cout);
1112:   VecRestoreArray(bb,&b);
1113:   VecRestoreArray(xx,&x);

1115:   PetscLogFlops(2*a->nz);
1116:   return(0);
1117: }
1118: /* ----------------------------------------------------------------*/

1122: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1123: {
1124:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1125:   IS                 isicol;
1126:   PetscErrorCode     ierr;
1127:   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
1128:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1129:   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1130:   PetscInt           i,levels,diagonal_fill;
1131:   PetscTruth         col_identity,row_identity;
1132:   PetscReal          f;
1133:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1134:   PetscBT            lnkbt;
1135:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1136:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1137:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1138:   PetscTruth         missing;

1141:   f             = info->fill;
1142:   levels        = (PetscInt)info->levels;
1143:   diagonal_fill = (PetscInt)info->diagonal_fill;
1144:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

1146:   /* special case that simply copies fill pattern */
1147:   ISIdentity(isrow,&row_identity);
1148:   ISIdentity(iscol,&col_identity);
1149:   if (!levels && row_identity && col_identity) {
1150:     MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
1151:     (*fact)->factor                 = FACTOR_LU;
1152:     (*fact)->info.factor_mallocs    = 0;
1153:     (*fact)->info.fill_ratio_given  = info->fill;
1154:     (*fact)->info.fill_ratio_needed = 1.0;
1155:     b               = (Mat_SeqAIJ*)(*fact)->data;
1156:     MatMissingDiagonal(A,&missing,&d);
1157:     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1158:     b->row              = isrow;
1159:     b->col              = iscol;
1160:     b->icol             = isicol;
1161:     PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);
1162:     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1163:     PetscObjectReference((PetscObject)isrow);
1164:     PetscObjectReference((PetscObject)iscol);
1165:     return(0);
1166:   }

1168:   ISGetIndices(isrow,&r);
1169:   ISGetIndices(isicol,&ic);

1171:   /* get new row pointers */
1172:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1173:   bi[0] = 0;
1174:   /* bdiag is location of diagonal in factor */
1175:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1176:   bdiag[0]  = 0;

1178:   PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
1179:   bjlvl_ptr = (PetscInt**)(bj_ptr + n);

1181:   /* create a linked list for storing column indices of the active row */
1182:   nlnk = n + 1;
1183:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1185:   /* initial FreeSpace size is f*(ai[n]+1) */
1186:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1187:   current_space = free_space;
1188:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1189:   current_space_lvl = free_space_lvl;
1190: 
1191:   for (i=0; i<n; i++) {
1192:     nzi = 0;
1193:     /* copy current row into linked list */
1194:     nnz  = ai[r[i]+1] - ai[r[i]];
1195:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1196:     cols = aj + ai[r[i]];
1197:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1198:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1199:     nzi += nlnk;

1201:     /* make sure diagonal entry is included */
1202:     if (diagonal_fill && lnk[i] == -1) {
1203:       fm = n;
1204:       while (lnk[fm] < i) fm = lnk[fm];
1205:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1206:       lnk[fm]    = i;
1207:       lnk_lvl[i] = 0;
1208:       nzi++; dcount++;
1209:     }

1211:     /* add pivot rows into the active row */
1212:     nzbd = 0;
1213:     prow = lnk[n];
1214:     while (prow < i) {
1215:       nnz      = bdiag[prow];
1216:       cols     = bj_ptr[prow] + nnz + 1;
1217:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1218:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1219:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1220:       nzi += nlnk;
1221:       prow = lnk[prow];
1222:       nzbd++;
1223:     }
1224:     bdiag[i] = nzbd;
1225:     bi[i+1]  = bi[i] + nzi;

1227:     /* if free space is not available, make more free space */
1228:     if (current_space->local_remaining<nzi) {
1229:       nnz = nzi*(n - i); /* estimated and max additional space needed */
1230:       PetscFreeSpaceGet(nnz,&current_space);
1231:       PetscFreeSpaceGet(nnz,&current_space_lvl);
1232:       reallocs++;
1233:     }

1235:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1236:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1237:     bj_ptr[i]    = current_space->array;
1238:     bjlvl_ptr[i] = current_space_lvl->array;

1240:     /* make sure the active row i has diagonal entry */
1241:     if (*(bj_ptr[i]+bdiag[i]) != i) {
1242:       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1243:     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1244:     }

1246:     current_space->array           += nzi;
1247:     current_space->local_used      += nzi;
1248:     current_space->local_remaining -= nzi;
1249:     current_space_lvl->array           += nzi;
1250:     current_space_lvl->local_used      += nzi;
1251:     current_space_lvl->local_remaining -= nzi;
1252:   }

1254:   ISRestoreIndices(isrow,&r);
1255:   ISRestoreIndices(isicol,&ic);

1257:   /* destroy list of free space and other temporary arrays */
1258:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1259:   PetscFreeSpaceContiguous(&free_space,bj);
1260:   PetscIncompleteLLDestroy(lnk,lnkbt);
1261:   PetscFreeSpaceDestroy(free_space_lvl);
1262:   PetscFree(bj_ptr);

1264: #if defined(PETSC_USE_INFO)
1265:   {
1266:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1267:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1268:     PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1269:     PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1270:     PetscInfo(A,"for best performance.\n");
1271:     if (diagonal_fill) {
1272:       PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1273:     }
1274:   }
1275: #endif

1277:   /* put together the new matrix */
1278:   MatCreate(((PetscObject)A)->comm,fact);
1279:   MatSetSizes(*fact,n,n,n,n);
1280:   MatSetType(*fact,((PetscObject)A)->type_name);
1281:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1282:   PetscLogObjectParent(*fact,isicol);
1283:   b = (Mat_SeqAIJ*)(*fact)->data;
1284:   b->free_a       = PETSC_TRUE;
1285:   b->free_ij      = PETSC_TRUE;
1286:   b->singlemalloc = PETSC_FALSE;
1287:   len = (bi[n] )*sizeof(PetscScalar);
1288:   PetscMalloc(len+1,&b->a);
1289:   b->j          = bj;
1290:   b->i          = bi;
1291:   for (i=0; i<n; i++) bdiag[i] += bi[i];
1292:   b->diag       = bdiag;
1293:   b->ilen       = 0;
1294:   b->imax       = 0;
1295:   b->row        = isrow;
1296:   b->col        = iscol;
1297:   PetscObjectReference((PetscObject)isrow);
1298:   PetscObjectReference((PetscObject)iscol);
1299:   b->icol       = isicol;
1300:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1301:   /* In b structure:  Free imax, ilen, old a, old j.  
1302:      Allocate bdiag, solve_work, new a, new j */
1303:   PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1304:   b->maxnz             = b->nz = bi[n] ;
1305:   (*fact)->factor = FACTOR_LU;
1306:   (*fact)->info.factor_mallocs    = reallocs;
1307:   (*fact)->info.fill_ratio_given  = f;
1308:   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);

1310:   MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1311:   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

1313:   return(0);
1314: }

1316:  #include src/mat/impls/sbaij/seq/sbaij.h
1319: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1320: {
1321:   Mat            C = *B;
1322:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1323:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1324:   IS             ip=b->row,iip = b->icol;
1326:   PetscInt       *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1327:   PetscInt       *ai=a->i,*aj=a->j;
1328:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1329:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1330:   PetscReal      zeropivot,rs,shiftnz;
1331:   PetscReal      shiftpd;
1332:   ChShift_Ctx    sctx;
1333:   PetscInt       newshift;


1337:   shiftnz   = info->shiftnz;
1338:   shiftpd   = info->shiftpd;
1339:   zeropivot = info->zeropivot;

1341:   ISGetIndices(ip,&rip);
1342:   ISGetIndices(iip,&riip);
1343: 
1344:   /* initialization */
1345:   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1346:   PetscMalloc(nz,&il);
1347:   jl   = il + mbs;
1348:   rtmp = (MatScalar*)(jl + mbs);

1350:   sctx.shift_amount = 0;
1351:   sctx.nshift       = 0;
1352:   do {
1353:     sctx.chshift = PETSC_FALSE;
1354:     for (i=0; i<mbs; i++) {
1355:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1356:     }
1357: 
1358:     for (k = 0; k<mbs; k++){
1359:       bval = ba + bi[k];
1360:       /* initialize k-th row by the perm[k]-th row of A */
1361:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1362:       for (j = jmin; j < jmax; j++){
1363:         col = riip[aj[j]];
1364:         if (col >= k){ /* only take upper triangular entry */
1365:           rtmp[col] = aa[j];
1366:           *bval++  = 0.0; /* for in-place factorization */
1367:         }
1368:       }
1369:       /* shift the diagonal of the matrix */
1370:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1372:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1373:       dk = rtmp[k];
1374:       i = jl[k]; /* first row to be added to k_th row  */

1376:       while (i < k){
1377:         nexti = jl[i]; /* next row to be added to k_th row */

1379:         /* compute multiplier, update diag(k) and U(i,k) */
1380:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1381:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1382:         dk += uikdi*ba[ili];
1383:         ba[ili] = uikdi; /* -U(i,k) */

1385:         /* add multiple of row i to k-th row */
1386:         jmin = ili + 1; jmax = bi[i+1];
1387:         if (jmin < jmax){
1388:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1389:           /* update il and jl for row i */
1390:           il[i] = jmin;
1391:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1392:         }
1393:         i = nexti;
1394:       }

1396:       /* shift the diagonals when zero pivot is detected */
1397:       /* compute rs=sum of abs(off-diagonal) */
1398:       rs   = 0.0;
1399:       jmin = bi[k]+1;
1400:       nz   = bi[k+1] - jmin;
1401:       bcol = bj + jmin;
1402:       while (nz--){
1403:         rs += PetscAbsScalar(rtmp[*bcol]);
1404:         bcol++;
1405:       }

1407:       sctx.rs = rs;
1408:       sctx.pv = dk;
1409:       MatCholeskyCheckShift_inline(info,sctx,k,newshift);

1411:       if (newshift == 1) {
1412:         if (!sctx.shift_amount) {
1413:           sctx.shift_amount = 1e-5;
1414:         }
1415:         break;
1416:       }
1417: 
1418:       /* copy data into U(k,:) */
1419:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1420:       jmin = bi[k]+1; jmax = bi[k+1];
1421:       if (jmin < jmax) {
1422:         for (j=jmin; j<jmax; j++){
1423:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1424:         }
1425:         /* add the k-th row into il and jl */
1426:         il[k] = jmin;
1427:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1428:       }
1429:     }
1430:   } while (sctx.chshift);
1431:   PetscFree(il);

1433:   ISRestoreIndices(ip,&rip);
1434:   ISRestoreIndices(iip,&riip);
1435:   C->factor       = FACTOR_CHOLESKY;
1436:   C->assembled    = PETSC_TRUE;
1437:   C->preallocated = PETSC_TRUE;
1438:   PetscLogFlops(C->rmap.n);
1439:   if (sctx.nshift){
1440:     if (shiftnz) {
1441:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1442:     } else if (shiftpd) {
1443:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1444:     }
1445:   }
1446:   return(0);
1447: }

1451: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1452: {
1453:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1454:   Mat_SeqSBAIJ       *b;
1455:   Mat                B;
1456:   PetscErrorCode     ierr;
1457:   PetscTruth         perm_identity,missing;
1458:   PetscInt           reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1459:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1460:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1461:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1462:   PetscReal          fill=info->fill,levels=info->levels;
1463:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1464:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1465:   PetscBT            lnkbt;
1466:   IS                 iperm;
1467: 
1469:   MatMissingDiagonal(A,&missing,&d);
1470:   if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1471:   ISIdentity(perm,&perm_identity);
1472:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

1474:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1475:   ui[0] = 0;

1477:   /* ICC(0) without matrix ordering: simply copies fill pattern */
1478:   if (!levels && perm_identity) {

1480:     for (i=0; i<am; i++) {
1481:       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1482:     }
1483:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1484:     cols = uj;
1485:     for (i=0; i<am; i++) {
1486:       aj    = a->j + a->diag[i];
1487:       ncols = ui[i+1] - ui[i];
1488:       for (j=0; j<ncols; j++) *cols++ = *aj++;
1489:     }
1490:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1491:     ISGetIndices(iperm,&riip);
1492:     ISGetIndices(perm,&rip);

1494:     /* initialization */
1495:     PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);

1497:     /* jl: linked list for storing indices of the pivot rows 
1498:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1499:     PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1500:     il         = jl + am;
1501:     uj_ptr     = (PetscInt**)(il + am);
1502:     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1503:     for (i=0; i<am; i++){
1504:       jl[i] = am; il[i] = 0;
1505:     }

1507:     /* create and initialize a linked list for storing column indices of the active row k */
1508:     nlnk = am + 1;
1509:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

1511:     /* initial FreeSpace size is fill*(ai[am]+1) */
1512:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1513:     current_space = free_space;
1514:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1515:     current_space_lvl = free_space_lvl;

1517:     for (k=0; k<am; k++){  /* for each active row k */
1518:       /* initialize lnk by the column indices of row rip[k] of A */
1519:       nzk   = 0;
1520:       ncols = ai[rip[k]+1] - ai[rip[k]];
1521:       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1522:       ncols_upper = 0;
1523:       for (j=0; j<ncols; j++){
1524:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1525:         if (riip[i] >= k){ /* only take upper triangular entry */
1526:           ajtmp[ncols_upper] = i;
1527:           ncols_upper++;
1528:         }
1529:       }
1530:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
1531:       nzk += nlnk;

1533:       /* update lnk by computing fill-in for each pivot row to be merged in */
1534:       prow = jl[k]; /* 1st pivot row */
1535: 
1536:       while (prow < k){
1537:         nextprow = jl[prow];
1538: 
1539:         /* merge prow into k-th row */
1540:         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1541:         jmax = ui[prow+1];
1542:         ncols = jmax-jmin;
1543:         i     = jmin - ui[prow];
1544:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1545:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1546:         j     = *(uj - 1);
1547:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1548:         nzk += nlnk;

1550:         /* update il and jl for prow */
1551:         if (jmin < jmax){
1552:           il[prow] = jmin;
1553:           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1554:         }
1555:         prow = nextprow;
1556:       }

1558:       /* if free space is not available, make more free space */
1559:       if (current_space->local_remaining<nzk) {
1560:         i = am - k + 1; /* num of unfactored rows */
1561:         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1562:         PetscFreeSpaceGet(i,&current_space);
1563:         PetscFreeSpaceGet(i,&current_space_lvl);
1564:         reallocs++;
1565:       }

1567:       /* copy data into free_space and free_space_lvl, then initialize lnk */
1568:       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1569:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

1571:       /* add the k-th row into il and jl */
1572:       if (nzk > 1){
1573:         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1574:         jl[k] = jl[i]; jl[i] = k;
1575:         il[k] = ui[k] + 1;
1576:       }
1577:       uj_ptr[k]     = current_space->array;
1578:       uj_lvl_ptr[k] = current_space_lvl->array;

1580:       current_space->array           += nzk;
1581:       current_space->local_used      += nzk;
1582:       current_space->local_remaining -= nzk;

1584:       current_space_lvl->array           += nzk;
1585:       current_space_lvl->local_used      += nzk;
1586:       current_space_lvl->local_remaining -= nzk;

1588:       ui[k+1] = ui[k] + nzk;
1589:     }

1591: #if defined(PETSC_USE_INFO)
1592:     if (ai[am] != 0) {
1593:       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1594:       PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1595:       PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1596:       PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1597:     } else {
1598:       PetscInfo(A,"Empty matrix.\n");
1599:     }
1600: #endif

1602:     ISRestoreIndices(perm,&rip);
1603:     ISRestoreIndices(iperm,&riip);
1604:     PetscFree(jl);
1605:     PetscFree(ajtmp);

1607:     /* destroy list of free space and other temporary array(s) */
1608:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1609:     PetscFreeSpaceContiguous(&free_space,uj);
1610:     PetscIncompleteLLDestroy(lnk,lnkbt);
1611:     PetscFreeSpaceDestroy(free_space_lvl);

1613:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

1615:   /* put together the new matrix in MATSEQSBAIJ format */
1616:   MatCreate(PETSC_COMM_SELF,fact);
1617:   MatSetSizes(*fact,am,am,am,am);
1618:   B = *fact;
1619:   MatSetType(B,MATSEQSBAIJ);
1620:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

1622:   b    = (Mat_SeqSBAIJ*)B->data;
1623:   b->singlemalloc = PETSC_FALSE;
1624:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1625:   b->j    = uj;
1626:   b->i    = ui;
1627:   b->diag = 0;
1628:   b->ilen = 0;
1629:   b->imax = 0;
1630:   b->row  = perm;
1631:   b->col  = perm;
1632:   PetscObjectReference((PetscObject)perm);
1633:   PetscObjectReference((PetscObject)perm);
1634:   b->icol = iperm;
1635:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1636:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1637:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1638:   b->maxnz   = b->nz = ui[am];
1639:   b->free_a  = PETSC_TRUE;
1640:   b->free_ij = PETSC_TRUE;
1641: 
1642:   B->factor                 = FACTOR_CHOLESKY;
1643:   B->info.factor_mallocs    = reallocs;
1644:   B->info.fill_ratio_given  = fill;
1645:   if (ai[am] != 0) {
1646:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1647:   } else {
1648:     B->info.fill_ratio_needed = 0.0;
1649:   }
1650:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1651:   if (perm_identity){
1652:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1653:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1654:   }
1655:   return(0);
1656: }

1660: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1661: {
1662:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1663:   Mat_SeqSBAIJ       *b;
1664:   Mat                B;
1665:   PetscErrorCode     ierr;
1666:   PetscTruth         perm_identity;
1667:   PetscReal          fill = info->fill;
1668:   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1669:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1670:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1671:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1672:   PetscBT            lnkbt;
1673:   IS                 iperm;

1676:   /* check whether perm is the identity mapping */
1677:   ISIdentity(perm,&perm_identity);
1678:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1679:   ISGetIndices(iperm,&riip);
1680:   ISGetIndices(perm,&rip);

1682:   /* initialization */
1683:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1684:   ui[0] = 0;

1686:   /* jl: linked list for storing indices of the pivot rows 
1687:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1688:   PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1689:   il     = jl + am;
1690:   cols   = il + am;
1691:   ui_ptr = (PetscInt**)(cols + am);
1692:   for (i=0; i<am; i++){
1693:     jl[i] = am; il[i] = 0;
1694:   }

1696:   /* create and initialize a linked list for storing column indices of the active row k */
1697:   nlnk = am + 1;
1698:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

1700:   /* initial FreeSpace size is fill*(ai[am]+1) */
1701:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1702:   current_space = free_space;

1704:   for (k=0; k<am; k++){  /* for each active row k */
1705:     /* initialize lnk by the column indices of row rip[k] of A */
1706:     nzk   = 0;
1707:     ncols = ai[rip[k]+1] - ai[rip[k]];
1708:     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1709:     ncols_upper = 0;
1710:     for (j=0; j<ncols; j++){
1711:       i = riip[*(aj + ai[rip[k]] + j)];
1712:       if (i >= k){ /* only take upper triangular entry */
1713:         cols[ncols_upper] = i;
1714:         ncols_upper++;
1715:       }
1716:     }
1717:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1718:     nzk += nlnk;

1720:     /* update lnk by computing fill-in for each pivot row to be merged in */
1721:     prow = jl[k]; /* 1st pivot row */
1722: 
1723:     while (prow < k){
1724:       nextprow = jl[prow];
1725:       /* merge prow into k-th row */
1726:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1727:       jmax = ui[prow+1];
1728:       ncols = jmax-jmin;
1729:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1730:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1731:       nzk += nlnk;

1733:       /* update il and jl for prow */
1734:       if (jmin < jmax){
1735:         il[prow] = jmin;
1736:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1737:       }
1738:       prow = nextprow;
1739:     }

1741:     /* if free space is not available, make more free space */
1742:     if (current_space->local_remaining<nzk) {
1743:       i = am - k + 1; /* num of unfactored rows */
1744:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1745:       PetscFreeSpaceGet(i,&current_space);
1746:       reallocs++;
1747:     }

1749:     /* copy data into free space, then initialize lnk */
1750:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

1752:     /* add the k-th row into il and jl */
1753:     if (nzk-1 > 0){
1754:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1755:       jl[k] = jl[i]; jl[i] = k;
1756:       il[k] = ui[k] + 1;
1757:     }
1758:     ui_ptr[k] = current_space->array;
1759:     current_space->array           += nzk;
1760:     current_space->local_used      += nzk;
1761:     current_space->local_remaining -= nzk;

1763:     ui[k+1] = ui[k] + nzk;
1764:   }

1766: #if defined(PETSC_USE_INFO)
1767:   if (ai[am] != 0) {
1768:     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1769:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1770:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1771:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1772:   } else {
1773:      PetscInfo(A,"Empty matrix.\n");
1774:   }
1775: #endif

1777:   ISRestoreIndices(perm,&rip);
1778:   ISRestoreIndices(iperm,&riip);
1779:   PetscFree(jl);

1781:   /* destroy list of free space and other temporary array(s) */
1782:   PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1783:   PetscFreeSpaceContiguous(&free_space,uj);
1784:   PetscLLDestroy(lnk,lnkbt);

1786:   /* put together the new matrix in MATSEQSBAIJ format */
1787:   MatCreate(PETSC_COMM_SELF,fact);
1788:   MatSetSizes(*fact,am,am,am,am);
1789:   B    = *fact;
1790:   MatSetType(B,MATSEQSBAIJ);
1791:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

1793:   b = (Mat_SeqSBAIJ*)B->data;
1794:   b->singlemalloc = PETSC_FALSE;
1795:   b->free_a       = PETSC_TRUE;
1796:   b->free_ij      = PETSC_TRUE;
1797:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1798:   b->j    = uj;
1799:   b->i    = ui;
1800:   b->diag = 0;
1801:   b->ilen = 0;
1802:   b->imax = 0;
1803:   b->row  = perm;
1804:   b->col  = perm;
1805:   PetscObjectReference((PetscObject)perm);
1806:   PetscObjectReference((PetscObject)perm);
1807:   b->icol = iperm;
1808:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1809:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1810:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1811:   b->maxnz = b->nz = ui[am];
1812: 
1813:   B->factor                 = FACTOR_CHOLESKY;
1814:   B->info.factor_mallocs    = reallocs;
1815:   B->info.fill_ratio_given  = fill;
1816:   if (ai[am] != 0) {
1817:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1818:   } else {
1819:     B->info.fill_ratio_needed = 0.0;
1820:   }
1821:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1822:   if (perm_identity){
1823:     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1824:     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1825:     (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1826:     (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1827:   } else {
1828:     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1;
1829:     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1830:     (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1831:     (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1832:   }
1833:   return(0);
1834: }