Actual source code: aijfact.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/aij/seq/aij.h
 4:  #include src/inline/dot.h
 5:  #include src/inline/spops.h
 6:  #include petscbt.h
 7:  #include src/mat/utils/freespace.h

 11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
 12: {

 15:   SETERRQ(PETSC_ERR_SUP,"Code not written");
 16: #if !defined(PETSC_USE_DEBUG)
 17:   return(0);
 18: #endif
 19: }


 22: EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);

 24: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 25: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
 26: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
 27: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
 28: #endif

 32:   /* ------------------------------------------------------------

 34:           This interface was contribed by Tony Caola

 36:      This routine is an interface to the pivoting drop-tolerance 
 37:      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 
 38:      SPARSEKIT2.

 40:      The SPARSEKIT2 routines used here are covered by the GNU 
 41:      copyright; see the file gnu in this directory.

 43:      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
 44:      help in getting this routine ironed out.

 46:      The major drawback to this routine is that if info->fill is 
 47:      not large enough it fails rather than allocating more space;
 48:      this can be fixed by hacking/improving the f2c version of 
 49:      Yousef Saad's code.

 51:      ------------------------------------------------------------
 52: */
 53: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
 54: {
 55: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 57:   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
 58:   You can obtain the drop tolerance routines by installing PETSc from\n\
 59:   www.mcs.anl.gov/petsc\n");
 60: #else
 61:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
 62:   IS             iscolf,isicol,isirow;
 63:   PetscTruth     reorder;
 64:   PetscErrorCode ierr,sierr;
 65:   PetscInt       *c,*r,*ic,i,n = A->m;
 66:   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
 67:   PetscInt       *ordcol,*iwk,*iperm,*jw;
 68:   PetscInt       jmax,lfill,job,*o_i,*o_j;
 69:   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
 70:   PetscReal      af;


 74:   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
 75:   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
 76:   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
 77:   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
 78:   lfill   = (PetscInt)(info->dtcount/2.0);
 79:   jmax    = (PetscInt)(info->fill*a->nz);


 82:   /* ------------------------------------------------------------
 83:      If reorder=.TRUE., then the original matrix has to be 
 84:      reordered to reflect the user selected ordering scheme, and
 85:      then de-reordered so it is in it's original format.  
 86:      Because Saad's dperm() is NOT in place, we have to copy 
 87:      the original matrix and allocate more storage. . . 
 88:      ------------------------------------------------------------
 89:   */

 91:   /* set reorder to true if either isrow or iscol is not identity */
 92:   ISIdentity(isrow,&reorder);
 93:   if (reorder) {ISIdentity(iscol,&reorder);}
 94:   reorder = PetscNot(reorder);

 96: 
 97:   /* storage for ilu factor */
 98:   PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
 99:   PetscMalloc(jmax*sizeof(PetscInt),&new_j);
100:   PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
101:   PetscMalloc(n*sizeof(PetscInt),&ordcol);

103:   /* ------------------------------------------------------------
104:      Make sure that everything is Fortran formatted (1-Based)
105:      ------------------------------------------------------------
106:   */
107:   for (i=old_i[0];i<old_i[n];i++) {
108:     old_j[i]++;
109:   }
110:   for(i=0;i<n+1;i++) {
111:     old_i[i]++;
112:   };
113: 

115:   if (reorder) {
116:     ISGetIndices(iscol,&c);
117:     ISGetIndices(isrow,&r);
118:     for(i=0;i<n;i++) {
119:       r[i]  = r[i]+1;
120:       c[i]  = c[i]+1;
121:     }
122:     PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
123:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
124:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
125:     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126:     for (i=0;i<n;i++) {
127:       r[i]  = r[i]-1;
128:       c[i]  = c[i]-1;
129:     }
130:     ISRestoreIndices(iscol,&c);
131:     ISRestoreIndices(isrow,&r);
132:     o_a = old_a2;
133:     o_j = old_j2;
134:     o_i = old_i2;
135:   } else {
136:     o_a = old_a;
137:     o_j = old_j;
138:     o_i = old_i;
139:   }

141:   /* ------------------------------------------------------------
142:      Call Saad's ilutp() routine to generate the factorization
143:      ------------------------------------------------------------
144:   */

146:   PetscMalloc(2*n*sizeof(PetscInt),&iperm);
147:   PetscMalloc(2*n*sizeof(PetscInt),&jw);
148:   PetscMalloc(n*sizeof(PetscScalar),&w);

150:   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151:   if (sierr) {
152:     switch (sierr) {
153:       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
154:       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
155:       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156:       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157:       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158:       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159:     }
160:   }

162:   PetscFree(w);
163:   PetscFree(jw);

165:   /* ------------------------------------------------------------
166:      Saad's routine gives the result in Modified Sparse Row (msr)
167:      Convert to Compressed Sparse Row format (csr) 
168:      ------------------------------------------------------------
169:   */

171:   PetscMalloc(n*sizeof(PetscScalar),&wk);
172:   PetscMalloc((n+1)*sizeof(PetscInt),&iwk);

174:   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);

176:   PetscFree(iwk);
177:   PetscFree(wk);

179:   if (reorder) {
180:     PetscFree(old_a2);
181:     PetscFree(old_j2);
182:     PetscFree(old_i2);
183:   } else {
184:     /* fix permutation of old_j that the factorization introduced */
185:     for (i=old_i[0]; i<old_i[n]; i++) {
186:       old_j[i-1] = iperm[old_j[i-1]-1];
187:     }
188:   }

190:   /* get rid of the shift to indices starting at 1 */
191:   for (i=0; i<n+1; i++) {
192:     old_i[i]--;
193:   }
194:   for (i=old_i[0];i<old_i[n];i++) {
195:     old_j[i]--;
196:   }
197: 
198:   /* Make the factored matrix 0-based */
199:   for (i=0; i<n+1; i++) {
200:     new_i[i]--;
201:   }
202:   for (i=new_i[0];i<new_i[n];i++) {
203:     new_j[i]--;
204:   }

206:   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207:   /*-- permute the right-hand-side and solution vectors           --*/
208:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209:   ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
210:   ISGetIndices(isicol,&ic);
211:   for(i=0; i<n; i++) {
212:     ordcol[i] = ic[iperm[i]-1];
213:   };
214:   ISRestoreIndices(isicol,&ic);
215:   ISDestroy(isicol);

217:   PetscFree(iperm);

219:   ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
220:   PetscFree(ordcol);

222:   /*----- put together the new matrix -----*/

224:   MatCreate(A->comm,fact);
225:   MatSetSizes(*fact,n,n,n,n);
226:   MatSetType(*fact,A->type_name);
227:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
228:   (*fact)->factor    = FACTOR_LU;
229:   (*fact)->assembled = PETSC_TRUE;

231:   b = (Mat_SeqAIJ*)(*fact)->data;
232:   b->freedata      = PETSC_TRUE;
233:   b->sorted        = PETSC_FALSE;
234:   b->singlemalloc  = PETSC_FALSE;
235:   b->a             = new_a;
236:   b->j             = new_j;
237:   b->i             = new_i;
238:   b->ilen          = 0;
239:   b->imax          = 0;
240:   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241:   b->row           = isirow;
242:   b->col           = iscolf;
243:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
244:   b->maxnz = b->nz = new_i[n];
245:   MatMarkDiagonal_SeqAIJ(*fact);
246:   (*fact)->info.factor_mallocs = 0;

248:   MatMarkDiagonal_SeqAIJ(A);

250:   af = ((double)b->nz)/((double)a->nz) + .001;
251:   PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));
252:   PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));
253:   PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));
254:   PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));

256:   MatILUDTFactor_Inode(A,isrow,iscol,info,fact);

258:   return(0);
259: #endif
260: }

264: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265: {
266:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
267:   IS             isicol;
269:   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270:   PetscInt       *bi,*bj,*ajtmp;
271:   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272:   PetscReal      f;
273:   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
274:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
275:   PetscBT        lnkbt;

278:   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
280:   ISGetIndices(isrow,&r);
281:   ISGetIndices(isicol,&ic);

283:   /* get new row pointers */
284:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
285:   bi[0] = 0;

287:   /* bdiag is location of diagonal in factor */
288:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
289:   bdiag[0] = 0;

291:   /* linked list for storing column indices of the active row */
292:   nlnk = n + 1;
293:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

295:   PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);
296:   im     = cols + n;
297:   bi_ptr = (PetscInt**)(im + n);

299:   /* initial FreeSpace size is f*(ai[n]+1) */
300:   f = info->fill;
301:   GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);
302:   current_space = free_space;

304:   for (i=0; i<n; i++) {
305:     /* copy previous fill into linked list */
306:     nzi = 0;
307:     nnz = ai[r[i]+1] - ai[r[i]];
308:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309:     ajtmp = aj + ai[r[i]];
310:     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
311:     PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);
312:     nzi += nlnk;

314:     /* add pivot rows into linked list */
315:     row = lnk[n];
316:     while (row < i) {
317:       nzbd    = bdiag[row] - bi[row] + 1;
318:       ajtmp   = bi_ptr[row] + nzbd;
319:       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i ? */
320:       PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
321:       nzi     += nlnk;
322:       row = lnk[row];
323:     }

325:     bi[i+1] = bi[i] + nzi;
326:     im[i]   = nzi;

328:     /* mark bdiag */
329:     nzbd = 0;
330:     nnz  = nzi;
331:     k    = lnk[n];
332:     while (nnz-- && k < i){
333:       nzbd++;
334:       k = lnk[k];
335:     }
336:     bdiag[i] = bi[i] + nzbd;

338:     /* if free space is not available, make more free space */
339:     if (current_space->local_remaining<nzi) {
340:       nnz = (n - i)*nzi; /* estimated and max additional space needed */
341:       GetMoreSpace(nnz,&current_space);
342:       reallocs++;
343:     }

345:     /* copy data into free space, then initialize lnk */
346:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
347:     bi_ptr[i] = current_space->array;
348:     current_space->array           += nzi;
349:     current_space->local_used      += nzi;
350:     current_space->local_remaining -= nzi;
351:   }
352: #if defined(PETSC_USE_DEBUG)
353:   if (ai[n] != 0) {
354:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
355:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));
356:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));
357:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));
358:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));
359:   } else {
360:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));
361:   }
362: #endif

364:   ISRestoreIndices(isrow,&r);
365:   ISRestoreIndices(isicol,&ic);

367:   /* destroy list of free space and other temporary array(s) */
368:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
369:   MakeSpaceContiguous(&free_space,bj);
370:   PetscLLDestroy(lnk,lnkbt);
371:   PetscFree(cols);

373:   /* put together the new matrix */
374:   MatCreate(A->comm,B);
375:   MatSetSizes(*B,n,n,n,n);
376:   MatSetType(*B,A->type_name);
377:   MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
378:   PetscLogObjectParent(*B,isicol);
379:   b    = (Mat_SeqAIJ*)(*B)->data;
380:   b->freedata     = PETSC_TRUE;
381:   b->singlemalloc = PETSC_FALSE;
382:   PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
383:   b->j          = bj;
384:   b->i          = bi;
385:   b->diag       = bdiag;
386:   b->ilen       = 0;
387:   b->imax       = 0;
388:   b->row        = isrow;
389:   b->col        = iscol;
390:   PetscObjectReference((PetscObject)isrow);
391:   PetscObjectReference((PetscObject)iscol);
392:   b->icol       = isicol;
393:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);

395:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
396:   PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
397:   b->maxnz = b->nz = bi[n] ;

399:   (*B)->factor                 =  FACTOR_LU;
400:   (*B)->info.factor_mallocs    = reallocs;
401:   (*B)->info.fill_ratio_given  = f;

403:   if (ai[n] != 0) {
404:     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
405:   } else {
406:     (*B)->info.fill_ratio_needed = 0.0;
407:   }
408:   MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
409:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
410:   return(0);
411: }

413: /* ----------------------------------------------------------- */
416: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
417: {
418:   Mat            C=*B;
419:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
420:   IS             isrow = b->row,isicol = b->icol;
422:   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
423:   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
424:   PetscInt       *diag_offset = b->diag,diag,*pj;
425:   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
426:   PetscScalar    d;
427:   PetscReal      rs;
428:   LUShift_Ctx    sctx;
429:   PetscInt       newshift;

432:   ISGetIndices(isrow,&r);
433:   ISGetIndices(isicol,&ic);
434:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
435:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
436:   rtmps = rtmp; ics = ic;

438:   if (!a->diag) {
439:     MatMarkDiagonal_SeqAIJ(A);
440:   }
441:   /* if both shift schemes are chosen by user, only use info->shiftpd */
442:   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
443:   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
444:     PetscInt *aai = a->i,*ddiag = a->diag;
445:     sctx.shift_top = 0;
446:     for (i=0; i<n; i++) {
447:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
448:       d  = (a->a)[ddiag[i]];
449:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
450:       v  = a->a+aai[i];
451:       nz = aai[i+1] - aai[i];
452:       for (j=0; j<nz; j++)
453:         rs += PetscAbsScalar(v[j]);
454:       if (rs>sctx.shift_top) sctx.shift_top = rs;
455:     }
456:     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
457:     sctx.shift_top    *= 1.1;
458:     sctx.nshift_max   = 5;
459:     sctx.shift_lo     = 0.;
460:     sctx.shift_hi     = 1.;
461:   }

463:   sctx.shift_amount = 0;
464:   sctx.nshift       = 0;
465:   do {
466:     sctx.lushift = PETSC_FALSE;
467:     for (i=0; i<n; i++){
468:       nz    = bi[i+1] - bi[i];
469:       bjtmp = bj + bi[i];
470:       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;

472:       /* load in initial (unfactored row) */
473:       nz    = a->i[r[i]+1] - a->i[r[i]];
474:       ajtmp = a->j + a->i[r[i]];
475:       v     = a->a + a->i[r[i]];
476:       for (j=0; j<nz; j++) {
477:         rtmp[ics[ajtmp[j]]] = v[j];
478:       }
479:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

481:       row = *bjtmp++;
482:       while  (row < i) {
483:         pc = rtmp + row;
484:         if (*pc != 0.0) {
485:           pv         = b->a + diag_offset[row];
486:           pj         = b->j + diag_offset[row] + 1;
487:           multiplier = *pc / *pv++;
488:           *pc        = multiplier;
489:           nz         = bi[row+1] - diag_offset[row] - 1;
490:           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
491:           PetscLogFlops(2*nz);
492:         }
493:         row = *bjtmp++;
494:       }
495:       /* finished row so stick it into b->a */
496:       pv   = b->a + bi[i] ;
497:       pj   = b->j + bi[i] ;
498:       nz   = bi[i+1] - bi[i];
499:       diag = diag_offset[i] - bi[i];
500:       rs   = 0.0;
501:       for (j=0; j<nz; j++) {
502:         pv[j] = rtmps[pj[j]];
503:         if (j != diag) rs += PetscAbsScalar(pv[j]);
504:       }

506:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
507:       sctx.rs  = rs;
508:       sctx.pv  = pv[diag];
509:       MatLUCheckShift_inline(info,sctx,newshift);
510:       if (newshift == 1){
511:         break;    /* sctx.shift_amount is updated */
512:       } else if (newshift == -1){
513:         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
514:       }
515:     }

517:     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
518:       /*
519:        * if no shift in this attempt & shifting & started shifting & can refine,
520:        * then try lower shift
521:        */
522:       sctx.shift_hi        = info->shift_fraction;
523:       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
524:       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
525:       sctx.lushift         = PETSC_TRUE;
526:       sctx.nshift++;
527:     }
528:   } while (sctx.lushift);

530:   /* invert diagonal entries for simplier triangular solves */
531:   for (i=0; i<n; i++) {
532:     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
533:   }

535:   PetscFree(rtmp);
536:   ISRestoreIndices(isicol,&ic);
537:   ISRestoreIndices(isrow,&r);
538:   C->factor = FACTOR_LU;
539:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
540:   C->assembled = PETSC_TRUE;
541:   PetscLogFlops(C->n);
542:   if (sctx.nshift){
543:     if (info->shiftnz) {
544:       PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
545:     } else if (info->shiftpd) {
546:       PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));
547:     }
548:   }
549:   return(0);
550: }

554: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
555: {
557:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
558:   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
559:   return(0);
560: }


563: /* ----------------------------------------------------------- */
566: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
567: {
569:   Mat            C;

572:   MatLUFactorSymbolic(A,row,col,info,&C);
573:   MatLUFactorNumeric(A,info,&C);
574:   MatHeaderCopy(A,C);
575:   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
576:   return(0);
577: }
578: /* ----------------------------------------------------------- */
581: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
582: {
583:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
584:   IS             iscol = a->col,isrow = a->row;
586:   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
587:   PetscInt       nz,*rout,*cout;
588:   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

591:   if (!n) return(0);

593:   VecGetArray(bb,&b);
594:   VecGetArray(xx,&x);
595:   tmp  = a->solve_work;

597:   ISGetIndices(isrow,&rout); r = rout;
598:   ISGetIndices(iscol,&cout); c = cout + (n-1);

600:   /* forward solve the lower triangular */
601:   tmp[0] = b[*r++];
602:   tmps   = tmp;
603:   for (i=1; i<n; i++) {
604:     v   = aa + ai[i] ;
605:     vi  = aj + ai[i] ;
606:     nz  = a->diag[i] - ai[i];
607:     sum = b[*r++];
608:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
609:     tmp[i] = sum;
610:   }

612:   /* backward solve the upper triangular */
613:   for (i=n-1; i>=0; i--){
614:     v   = aa + a->diag[i] + 1;
615:     vi  = aj + a->diag[i] + 1;
616:     nz  = ai[i+1] - a->diag[i] - 1;
617:     sum = tmp[i];
618:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
619:     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
620:   }

622:   ISRestoreIndices(isrow,&rout);
623:   ISRestoreIndices(iscol,&cout);
624:   VecRestoreArray(bb,&b);
625:   VecRestoreArray(xx,&x);
626:   PetscLogFlops(2*a->nz - A->n);
627:   return(0);
628: }

630: /* ----------------------------------------------------------- */
633: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
634: {
635:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
637:   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
638:   PetscScalar    *x,*b,*aa = a->a;
639: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
640:   PetscInt       adiag_i,i,*vi,nz,ai_i;
641:   PetscScalar    *v,sum;
642: #endif

645:   if (!n) return(0);

647:   VecGetArray(bb,&b);
648:   VecGetArray(xx,&x);

650: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
651:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
652: #else
653:   /* forward solve the lower triangular */
654:   x[0] = b[0];
655:   for (i=1; i<n; i++) {
656:     ai_i = ai[i];
657:     v    = aa + ai_i;
658:     vi   = aj + ai_i;
659:     nz   = adiag[i] - ai_i;
660:     sum  = b[i];
661:     while (nz--) sum -= *v++ * x[*vi++];
662:     x[i] = sum;
663:   }

665:   /* backward solve the upper triangular */
666:   for (i=n-1; i>=0; i--){
667:     adiag_i = adiag[i];
668:     v       = aa + adiag_i + 1;
669:     vi      = aj + adiag_i + 1;
670:     nz      = ai[i+1] - adiag_i - 1;
671:     sum     = x[i];
672:     while (nz--) sum -= *v++ * x[*vi++];
673:     x[i]    = sum*aa[adiag_i];
674:   }
675: #endif
676:   PetscLogFlops(2*a->nz - A->n);
677:   VecRestoreArray(bb,&b);
678:   VecRestoreArray(xx,&x);
679:   return(0);
680: }

684: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
685: {
686:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
687:   IS             iscol = a->col,isrow = a->row;
689:   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
690:   PetscInt       nz,*rout,*cout;
691:   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;

694:   if (yy != xx) {VecCopy(yy,xx);}

696:   VecGetArray(bb,&b);
697:   VecGetArray(xx,&x);
698:   tmp  = a->solve_work;

700:   ISGetIndices(isrow,&rout); r = rout;
701:   ISGetIndices(iscol,&cout); c = cout + (n-1);

703:   /* forward solve the lower triangular */
704:   tmp[0] = b[*r++];
705:   for (i=1; i<n; i++) {
706:     v   = aa + ai[i] ;
707:     vi  = aj + ai[i] ;
708:     nz  = a->diag[i] - ai[i];
709:     sum = b[*r++];
710:     while (nz--) sum -= *v++ * tmp[*vi++ ];
711:     tmp[i] = sum;
712:   }

714:   /* backward solve the upper triangular */
715:   for (i=n-1; i>=0; i--){
716:     v   = aa + a->diag[i] + 1;
717:     vi  = aj + a->diag[i] + 1;
718:     nz  = ai[i+1] - a->diag[i] - 1;
719:     sum = tmp[i];
720:     while (nz--) sum -= *v++ * tmp[*vi++ ];
721:     tmp[i] = sum*aa[a->diag[i]];
722:     x[*c--] += tmp[i];
723:   }

725:   ISRestoreIndices(isrow,&rout);
726:   ISRestoreIndices(iscol,&cout);
727:   VecRestoreArray(bb,&b);
728:   VecRestoreArray(xx,&x);
729:   PetscLogFlops(2*a->nz);

731:   return(0);
732: }
733: /* -------------------------------------------------------------------*/
736: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
737: {
738:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
739:   IS             iscol = a->col,isrow = a->row;
741:   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
742:   PetscInt       nz,*rout,*cout,*diag = a->diag;
743:   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;

746:   VecGetArray(bb,&b);
747:   VecGetArray(xx,&x);
748:   tmp  = a->solve_work;

750:   ISGetIndices(isrow,&rout); r = rout;
751:   ISGetIndices(iscol,&cout); c = cout;

753:   /* copy the b into temp work space according to permutation */
754:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

756:   /* forward solve the U^T */
757:   for (i=0; i<n; i++) {
758:     v   = aa + diag[i] ;
759:     vi  = aj + diag[i] + 1;
760:     nz  = ai[i+1] - diag[i] - 1;
761:     s1  = tmp[i];
762:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
763:     while (nz--) {
764:       tmp[*vi++ ] -= (*v++)*s1;
765:     }
766:     tmp[i] = s1;
767:   }

769:   /* backward solve the L^T */
770:   for (i=n-1; i>=0; i--){
771:     v   = aa + diag[i] - 1 ;
772:     vi  = aj + diag[i] - 1 ;
773:     nz  = diag[i] - ai[i];
774:     s1  = tmp[i];
775:     while (nz--) {
776:       tmp[*vi-- ] -= (*v--)*s1;
777:     }
778:   }

780:   /* copy tmp into x according to permutation */
781:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

783:   ISRestoreIndices(isrow,&rout);
784:   ISRestoreIndices(iscol,&cout);
785:   VecRestoreArray(bb,&b);
786:   VecRestoreArray(xx,&x);

788:   PetscLogFlops(2*a->nz-A->n);
789:   return(0);
790: }

794: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
795: {
796:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
797:   IS             iscol = a->col,isrow = a->row;
799:   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
800:   PetscInt       nz,*rout,*cout,*diag = a->diag;
801:   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;

804:   if (zz != xx) {VecCopy(zz,xx);}

806:   VecGetArray(bb,&b);
807:   VecGetArray(xx,&x);
808:   tmp = a->solve_work;

810:   ISGetIndices(isrow,&rout); r = rout;
811:   ISGetIndices(iscol,&cout); c = cout;

813:   /* copy the b into temp work space according to permutation */
814:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

816:   /* forward solve the U^T */
817:   for (i=0; i<n; i++) {
818:     v   = aa + diag[i] ;
819:     vi  = aj + diag[i] + 1;
820:     nz  = ai[i+1] - diag[i] - 1;
821:     tmp[i] *= *v++;
822:     while (nz--) {
823:       tmp[*vi++ ] -= (*v++)*tmp[i];
824:     }
825:   }

827:   /* backward solve the L^T */
828:   for (i=n-1; i>=0; i--){
829:     v   = aa + diag[i] - 1 ;
830:     vi  = aj + diag[i] - 1 ;
831:     nz  = diag[i] - ai[i];
832:     while (nz--) {
833:       tmp[*vi-- ] -= (*v--)*tmp[i];
834:     }
835:   }

837:   /* copy tmp into x according to permutation */
838:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

840:   ISRestoreIndices(isrow,&rout);
841:   ISRestoreIndices(iscol,&cout);
842:   VecRestoreArray(bb,&b);
843:   VecRestoreArray(xx,&x);

845:   PetscLogFlops(2*a->nz);
846:   return(0);
847: }
848: /* ----------------------------------------------------------------*/
849: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);

853: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
854: {
855:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
856:   IS             isicol;
858:   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
859:   PetscInt       *bi,*cols,nnz,*cols_lvl;
860:   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
861:   PetscInt       i,levels,diagonal_fill;
862:   PetscTruth     col_identity,row_identity;
863:   PetscReal      f;
864:   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
865:   PetscBT        lnkbt;
866:   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
867:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
868:   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
869: 
871:   f             = info->fill;
872:   levels        = (PetscInt)info->levels;
873:   diagonal_fill = (PetscInt)info->diagonal_fill;
874:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

876:   /* special case that simply copies fill pattern */
877:   ISIdentity(isrow,&row_identity);
878:   ISIdentity(iscol,&col_identity);
879:   if (!levels && row_identity && col_identity) {
880:     MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
881:     (*fact)->factor = FACTOR_LU;
882:     b               = (Mat_SeqAIJ*)(*fact)->data;
883:     if (!b->diag) {
884:       MatMarkDiagonal_SeqAIJ(*fact);
885:     }
886:     MatMissingDiagonal_SeqAIJ(*fact);
887:     b->row              = isrow;
888:     b->col              = iscol;
889:     b->icol             = isicol;
890:     PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
891:     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
892:     PetscObjectReference((PetscObject)isrow);
893:     PetscObjectReference((PetscObject)iscol);
894:     return(0);
895:   }

897:   ISGetIndices(isrow,&r);
898:   ISGetIndices(isicol,&ic);

900:   /* get new row pointers */
901:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
902:   bi[0] = 0;
903:   /* bdiag is location of diagonal in factor */
904:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
905:   bdiag[0]  = 0;

907:   PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
908:   bjlvl_ptr = (PetscInt**)(bj_ptr + n);

910:   /* create a linked list for storing column indices of the active row */
911:   nlnk = n + 1;
912:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

914:   /* initial FreeSpace size is f*(ai[n]+1) */
915:   GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);
916:   current_space = free_space;
917:   GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
918:   current_space_lvl = free_space_lvl;
919: 
920:   for (i=0; i<n; i++) {
921:     nzi = 0;
922:     /* copy current row into linked list */
923:     nnz  = ai[r[i]+1] - ai[r[i]];
924:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
925:     cols = aj + ai[r[i]];
926:     lnk[i] = -1; /* marker to indicate if diagonal exists */
927:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
928:     nzi += nlnk;

930:     /* make sure diagonal entry is included */
931:     if (diagonal_fill && lnk[i] == -1) {
932:       fm = n;
933:       while (lnk[fm] < i) fm = lnk[fm];
934:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
935:       lnk[fm]    = i;
936:       lnk_lvl[i] = 0;
937:       nzi++; dcount++;
938:     }

940:     /* add pivot rows into the active row */
941:     nzbd = 0;
942:     prow = lnk[n];
943:     while (prow < i) {
944:       nnz      = bdiag[prow];
945:       cols     = bj_ptr[prow] + nnz + 1;
946:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
947:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
948:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
949:       nzi += nlnk;
950:       prow = lnk[prow];
951:       nzbd++;
952:     }
953:     bdiag[i] = nzbd;
954:     bi[i+1]  = bi[i] + nzi;

956:     /* if free space is not available, make more free space */
957:     if (current_space->local_remaining<nzi) {
958:       nnz = nzi*(n - i); /* estimated and max additional space needed */
959:       GetMoreSpace(nnz,&current_space);
960:       GetMoreSpace(nnz,&current_space_lvl);
961:       reallocs++;
962:     }

964:     /* copy data into free_space and free_space_lvl, then initialize lnk */
965:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
966:     bj_ptr[i]    = current_space->array;
967:     bjlvl_ptr[i] = current_space_lvl->array;

969:     /* make sure the active row i has diagonal entry */
970:     if (*(bj_ptr[i]+bdiag[i]) != i) {
971:       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
972:     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
973:     }

975:     current_space->array           += nzi;
976:     current_space->local_used      += nzi;
977:     current_space->local_remaining -= nzi;
978:     current_space_lvl->array           += nzi;
979:     current_space_lvl->local_used      += nzi;
980:     current_space_lvl->local_remaining -= nzi;
981:   }

983:   ISRestoreIndices(isrow,&r);
984:   ISRestoreIndices(isicol,&ic);

986:   /* destroy list of free space and other temporary arrays */
987:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
988:   MakeSpaceContiguous(&free_space,bj);
989:   PetscIncompleteLLDestroy(lnk,lnkbt);
990:   DestroySpace(free_space_lvl);
991:   PetscFree(bj_ptr);

993: #if defined(PETSC_USE_DEBUG)
994:   {
995:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
996:     PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));
997:     PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));
998:     PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));
999:     PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));
1000:     if (diagonal_fill) {
1001:       PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));
1002:     }
1003:   }
1004: #endif

1006:   /* put together the new matrix */
1007:   MatCreate(A->comm,fact);
1008:   MatSetSizes(*fact,n,n,n,n);
1009:   MatSetType(*fact,A->type_name);
1010:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1011:   PetscLogObjectParent(*fact,isicol);
1012:   b = (Mat_SeqAIJ*)(*fact)->data;
1013:   b->freedata     = PETSC_TRUE;
1014:   b->singlemalloc = PETSC_FALSE;
1015:   len = (bi[n] )*sizeof(PetscScalar);
1016:   PetscMalloc(len+1,&b->a);
1017:   b->j          = bj;
1018:   b->i          = bi;
1019:   for (i=0; i<n; i++) bdiag[i] += bi[i];
1020:   b->diag       = bdiag;
1021:   b->ilen       = 0;
1022:   b->imax       = 0;
1023:   b->row        = isrow;
1024:   b->col        = iscol;
1025:   PetscObjectReference((PetscObject)isrow);
1026:   PetscObjectReference((PetscObject)iscol);
1027:   b->icol       = isicol;
1028:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1029:   /* In b structure:  Free imax, ilen, old a, old j.  
1030:      Allocate bdiag, solve_work, new a, new j */
1031:   PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1032:   b->maxnz             = b->nz = bi[n] ;
1033:   (*fact)->factor = FACTOR_LU;
1034:   (*fact)->info.factor_mallocs    = reallocs;
1035:   (*fact)->info.fill_ratio_given  = f;
1036:   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);

1038:   MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1039:   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

1041:   return(0);
1042: }

1044:  #include src/mat/impls/sbaij/seq/sbaij.h
1047: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1048: {
1049:   Mat            C = *B;
1050:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1051:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1052:   IS             ip=b->row;
1054:   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1055:   PetscInt       *ai=a->i,*aj=a->j;
1056:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1057:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1058:   PetscReal      zeropivot,rs,shiftnz;
1059:   PetscTruth     shiftpd;
1060:   ChShift_Ctx    sctx;
1061:   PetscInt       newshift;

1064:   shiftnz   = info->shiftnz;
1065:   shiftpd   = info->shiftpd;
1066:   zeropivot = info->zeropivot;

1068:   ISGetIndices(ip,&rip);
1069: 
1070:   /* initialization */
1071:   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1072:   PetscMalloc(nz,&il);
1073:   jl   = il + mbs;
1074:   rtmp = (MatScalar*)(jl + mbs);

1076:   sctx.shift_amount = 0;
1077:   sctx.nshift       = 0;
1078:   do {
1079:     sctx.chshift = PETSC_FALSE;
1080:     for (i=0; i<mbs; i++) {
1081:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1082:     }
1083: 
1084:     for (k = 0; k<mbs; k++){
1085:       bval = ba + bi[k];
1086:       /* initialize k-th row by the perm[k]-th row of A */
1087:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1088:       for (j = jmin; j < jmax; j++){
1089:         col = rip[aj[j]];
1090:         if (col >= k){ /* only take upper triangular entry */
1091:           rtmp[col] = aa[j];
1092:           *bval++  = 0.0; /* for in-place factorization */
1093:         }
1094:       }
1095:       /* shift the diagonal of the matrix */
1096:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1098:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1099:       dk = rtmp[k];
1100:       i = jl[k]; /* first row to be added to k_th row  */

1102:       while (i < k){
1103:         nexti = jl[i]; /* next row to be added to k_th row */

1105:         /* compute multiplier, update diag(k) and U(i,k) */
1106:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1107:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1108:         dk += uikdi*ba[ili];
1109:         ba[ili] = uikdi; /* -U(i,k) */

1111:         /* add multiple of row i to k-th row */
1112:         jmin = ili + 1; jmax = bi[i+1];
1113:         if (jmin < jmax){
1114:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1115:           /* update il and jl for row i */
1116:           il[i] = jmin;
1117:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1118:         }
1119:         i = nexti;
1120:       }

1122:       /* shift the diagonals when zero pivot is detected */
1123:       /* compute rs=sum of abs(off-diagonal) */
1124:       rs   = 0.0;
1125:       jmin = bi[k]+1;
1126:       nz   = bi[k+1] - jmin;
1127:       if (nz){
1128:         bcol = bj + jmin;
1129:         while (nz--){
1130:           rs += PetscAbsScalar(rtmp[*bcol]);
1131:           bcol++;
1132:         }
1133:       }

1135:       sctx.rs = rs;
1136:       sctx.pv = dk;
1137:       MatCholeskyCheckShift_inline(info,sctx,newshift);
1138:       if (newshift == 1){
1139:         break;    /* sctx.shift_amount is updated */
1140:       } else if (newshift == -1){
1141:         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1142:       }
1143: 
1144:       /* copy data into U(k,:) */
1145:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1146:       jmin = bi[k]+1; jmax = bi[k+1];
1147:       if (jmin < jmax) {
1148:         for (j=jmin; j<jmax; j++){
1149:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1150:         }
1151:         /* add the k-th row into il and jl */
1152:         il[k] = jmin;
1153:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1154:       }
1155:     }
1156:   } while (sctx.chshift);
1157:   PetscFree(il);

1159:   ISRestoreIndices(ip,&rip);
1160:   C->factor       = FACTOR_CHOLESKY;
1161:   C->assembled    = PETSC_TRUE;
1162:   C->preallocated = PETSC_TRUE;
1163:   PetscLogFlops(C->m);
1164:   if (sctx.nshift){
1165:     if (shiftnz) {
1166:       PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
1167:     } else if (shiftpd) {
1168:       PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
1169:     }
1170:   }
1171:   return(0);
1172: }

1176: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1177: {
1178:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1179:   Mat_SeqSBAIJ   *b;
1180:   Mat            B;
1182:   PetscTruth     perm_identity;
1183:   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1184:   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1185:   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1186:   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1187:   PetscReal      fill=info->fill,levels=info->levels;
1188:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1189:   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1190:   PetscBT        lnkbt;
1191: 
1193:   ISIdentity(perm,&perm_identity);
1194:   ISGetIndices(perm,&rip);

1196:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1197:   ui[0] = 0;

1199:   /* special case that simply copies fill pattern */
1200:   if (!levels && perm_identity) {
1201:     MatMarkDiagonal_SeqAIJ(A);
1202:     for (i=0; i<am; i++) {
1203:       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1204:     }
1205:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1206:     cols = uj;
1207:     for (i=0; i<am; i++) {
1208:       aj    = a->j + a->diag[i];
1209:       ncols = ui[i+1] - ui[i];
1210:       for (j=0; j<ncols; j++) *cols++ = *aj++;
1211:     }
1212:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1213:     /* initialization */
1214:     PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);

1216:     /* jl: linked list for storing indices of the pivot rows 
1217:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1218:     PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1219:     il         = jl + am;
1220:     uj_ptr     = (PetscInt**)(il + am);
1221:     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1222:     for (i=0; i<am; i++){
1223:       jl[i] = am; il[i] = 0;
1224:     }

1226:     /* create and initialize a linked list for storing column indices of the active row k */
1227:     nlnk = am + 1;
1228:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

1230:     /* initial FreeSpace size is fill*(ai[am]+1) */
1231:     GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);
1232:     current_space = free_space;
1233:     GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1234:     current_space_lvl = free_space_lvl;

1236:     for (k=0; k<am; k++){  /* for each active row k */
1237:       /* initialize lnk by the column indices of row rip[k] of A */
1238:       nzk   = 0;
1239:       ncols = ai[rip[k]+1] - ai[rip[k]];
1240:       ncols_upper = 0;
1241:       for (j=0; j<ncols; j++){
1242:         i = *(aj + ai[rip[k]] + j);
1243:         if (rip[i] >= k){ /* only take upper triangular entry */
1244:           ajtmp[ncols_upper] = i;
1245:           ncols_upper++;
1246:         }
1247:       }
1248:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);
1249:       nzk += nlnk;

1251:       /* update lnk by computing fill-in for each pivot row to be merged in */
1252:       prow = jl[k]; /* 1st pivot row */
1253: 
1254:       while (prow < k){
1255:         nextprow = jl[prow];
1256: 
1257:         /* merge prow into k-th row */
1258:         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1259:         jmax = ui[prow+1];
1260:         ncols = jmax-jmin;
1261:         i     = jmin - ui[prow];
1262:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1263:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1264:         j     = *(uj - 1);
1265:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1266:         nzk += nlnk;

1268:         /* update il and jl for prow */
1269:         if (jmin < jmax){
1270:           il[prow] = jmin;
1271:           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1272:         }
1273:         prow = nextprow;
1274:       }

1276:       /* if free space is not available, make more free space */
1277:       if (current_space->local_remaining<nzk) {
1278:         i = am - k + 1; /* num of unfactored rows */
1279:         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1280:         GetMoreSpace(i,&current_space);
1281:         GetMoreSpace(i,&current_space_lvl);
1282:         reallocs++;
1283:       }

1285:       /* copy data into free_space and free_space_lvl, then initialize lnk */
1286:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

1288:       /* add the k-th row into il and jl */
1289:       if (nzk > 1){
1290:         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1291:         jl[k] = jl[i]; jl[i] = k;
1292:         il[k] = ui[k] + 1;
1293:       }
1294:       uj_ptr[k]     = current_space->array;
1295:       uj_lvl_ptr[k] = current_space_lvl->array;

1297:       current_space->array           += nzk;
1298:       current_space->local_used      += nzk;
1299:       current_space->local_remaining -= nzk;

1301:       current_space_lvl->array           += nzk;
1302:       current_space_lvl->local_used      += nzk;
1303:       current_space_lvl->local_remaining -= nzk;

1305:       ui[k+1] = ui[k] + nzk;
1306:     }

1308: #if defined(PETSC_USE_DEBUG)
1309:     if (ai[am] != 0) {
1310:       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1311:       PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));
1312:       PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));
1313:       PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));
1314:     } else {
1315:       PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));
1316:     }
1317: #endif

1319:     ISRestoreIndices(perm,&rip);
1320:     PetscFree(jl);
1321:     PetscFree(ajtmp);

1323:     /* destroy list of free space and other temporary array(s) */
1324:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1325:     MakeSpaceContiguous(&free_space,uj);
1326:     PetscIncompleteLLDestroy(lnk,lnkbt);
1327:     DestroySpace(free_space_lvl);

1329:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

1331:   /* put together the new matrix in MATSEQSBAIJ format */
1332:   MatCreate(PETSC_COMM_SELF,fact);
1333:   MatSetSizes(*fact,am,am,am,am);
1334:   B = *fact;
1335:   MatSetType(B,MATSEQSBAIJ);
1336:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

1338:   b    = (Mat_SeqSBAIJ*)B->data;
1339:   b->singlemalloc = PETSC_FALSE;
1340:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1341:   b->j    = uj;
1342:   b->i    = ui;
1343:   b->diag = 0;
1344:   b->ilen = 0;
1345:   b->imax = 0;
1346:   b->row  = perm;
1347:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1348:   PetscObjectReference((PetscObject)perm);
1349:   b->icol = perm;
1350:   PetscObjectReference((PetscObject)perm);
1351:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1352:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1353:   b->maxnz = b->nz = ui[am];
1354: 
1355:   B->factor                 = FACTOR_CHOLESKY;
1356:   B->info.factor_mallocs    = reallocs;
1357:   B->info.fill_ratio_given  = fill;
1358:   if (ai[am] != 0) {
1359:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1360:   } else {
1361:     B->info.fill_ratio_needed = 0.0;
1362:   }
1363:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1364:   if (perm_identity){
1365:     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1366:     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1367:   }
1368:   return(0);
1369: }

1373: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1374: {
1375:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1376:   Mat_SeqSBAIJ   *b;
1377:   Mat            B;
1379:   PetscTruth     perm_identity;
1380:   PetscReal      fill = info->fill;
1381:   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1382:   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1383:   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1384:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1385:   PetscBT        lnkbt;
1386:   IS             iperm;

1389:   /* check whether perm is the identity mapping */
1390:   ISIdentity(perm,&perm_identity);
1391:   ISGetIndices(perm,&rip);

1393:   if (!perm_identity){
1394:     /* check if perm is symmetric! */
1395:     ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1396:     ISGetIndices(iperm,&riip);
1397:     for (i=0; i<am; i++) {
1398:       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1399:     }
1400:     ISRestoreIndices(iperm,&riip);
1401:     ISDestroy(iperm);
1402:   }

1404:   /* initialization */
1405:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1406:   ui[0] = 0;

1408:   /* jl: linked list for storing indices of the pivot rows 
1409:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1410:   PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1411:   il     = jl + am;
1412:   cols   = il + am;
1413:   ui_ptr = (PetscInt**)(cols + am);
1414:   for (i=0; i<am; i++){
1415:     jl[i] = am; il[i] = 0;
1416:   }

1418:   /* create and initialize a linked list for storing column indices of the active row k */
1419:   nlnk = am + 1;
1420:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

1422:   /* initial FreeSpace size is fill*(ai[am]+1) */
1423:   GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);
1424:   current_space = free_space;

1426:   for (k=0; k<am; k++){  /* for each active row k */
1427:     /* initialize lnk by the column indices of row rip[k] of A */
1428:     nzk   = 0;
1429:     ncols = ai[rip[k]+1] - ai[rip[k]];
1430:     ncols_upper = 0;
1431:     for (j=0; j<ncols; j++){
1432:       i = rip[*(aj + ai[rip[k]] + j)];
1433:       if (i >= k){ /* only take upper triangular entry */
1434:         cols[ncols_upper] = i;
1435:         ncols_upper++;
1436:       }
1437:     }
1438:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1439:     nzk += nlnk;

1441:     /* update lnk by computing fill-in for each pivot row to be merged in */
1442:     prow = jl[k]; /* 1st pivot row */
1443: 
1444:     while (prow < k){
1445:       nextprow = jl[prow];
1446:       /* merge prow into k-th row */
1447:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1448:       jmax = ui[prow+1];
1449:       ncols = jmax-jmin;
1450:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1451:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1452:       nzk += nlnk;

1454:       /* update il and jl for prow */
1455:       if (jmin < jmax){
1456:         il[prow] = jmin;
1457:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1458:       }
1459:       prow = nextprow;
1460:     }

1462:     /* if free space is not available, make more free space */
1463:     if (current_space->local_remaining<nzk) {
1464:       i = am - k + 1; /* num of unfactored rows */
1465:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1466:       GetMoreSpace(i,&current_space);
1467:       reallocs++;
1468:     }

1470:     /* copy data into free space, then initialize lnk */
1471:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

1473:     /* add the k-th row into il and jl */
1474:     if (nzk-1 > 0){
1475:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1476:       jl[k] = jl[i]; jl[i] = k;
1477:       il[k] = ui[k] + 1;
1478:     }
1479:     ui_ptr[k] = current_space->array;
1480:     current_space->array           += nzk;
1481:     current_space->local_used      += nzk;
1482:     current_space->local_remaining -= nzk;

1484:     ui[k+1] = ui[k] + nzk;
1485:   }

1487: #if defined(PETSC_USE_DEBUG)
1488:   if (ai[am] != 0) {
1489:     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1490:     PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));
1491:     PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));
1492:     PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));
1493:   } else {
1494:      PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));
1495:   }
1496: #endif

1498:   ISRestoreIndices(perm,&rip);
1499:   PetscFree(jl);

1501:   /* destroy list of free space and other temporary array(s) */
1502:   PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1503:   MakeSpaceContiguous(&free_space,uj);
1504:   PetscLLDestroy(lnk,lnkbt);

1506:   /* put together the new matrix in MATSEQSBAIJ format */
1507:   MatCreate(PETSC_COMM_SELF,fact);
1508:   MatSetSizes(*fact,am,am,am,am);
1509:   B    = *fact;
1510:   MatSetType(B,MATSEQSBAIJ);
1511:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

1513:   b = (Mat_SeqSBAIJ*)B->data;
1514:   b->singlemalloc = PETSC_FALSE;
1515:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1516:   b->j    = uj;
1517:   b->i    = ui;
1518:   b->diag = 0;
1519:   b->ilen = 0;
1520:   b->imax = 0;
1521:   b->row  = perm;
1522:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1523:   PetscObjectReference((PetscObject)perm);
1524:   b->icol = perm;
1525:   PetscObjectReference((PetscObject)perm);
1526:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1527:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1528:   b->maxnz = b->nz = ui[am];
1529: 
1530:   B->factor                 = FACTOR_CHOLESKY;
1531:   B->info.factor_mallocs    = reallocs;
1532:   B->info.fill_ratio_given  = fill;
1533:   if (ai[am] != 0) {
1534:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1535:   } else {
1536:     B->info.fill_ratio_needed = 0.0;
1537:   }
1538:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1539:   if (perm_identity){
1540:     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1541:     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1542:   }
1543:   return(0);
1544: }