Actual source code: aijfact.c

  1: /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/

 3:  #include src/mat/impls/aij/seq/aij.h
 4:  #include src/vec/vecimpl.h
 5:  #include src/inline/dot.h

  7: #undef __FUNCT__  
  9: int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
 10: {

 13:   SETERRQ(PETSC_ERR_SUP,"Code not written");
 14: #if !defined(PETSC_USE_DEBUG)
 15:   return(0);
 16: #endif
 17: }


 20: EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
 21: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);

 23: EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
 24: EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
 25: EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);

 27: #undef __FUNCT__  
 29:   /* ------------------------------------------------------------

 31:           This interface was contribed by Tony Caola

 33:      This routine is an interface to the pivoting drop-tolerance 
 34:      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 
 35:      SPARSEKIT2.

 37:      The SPARSEKIT2 routines used here are covered by the GNU 
 38:      copyright; see the file gnu in this directory.

 40:      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
 41:      help in getting this routine ironed out.

 43:      The major drawback to this routine is that if info->fill is 
 44:      not large enough it fails rather than allocating more space;
 45:      this can be fixed by hacking/improving the f2c version of 
 46:      Yousef Saad's code.

 48:      ishift = 0, for indices start at 1
 49:      ishift = 1, for indices starting at 0
 50:      ------------------------------------------------------------
 51: */
 52: int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
 53: {
 54: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 56:   SETERRQ(1,"This distribution does not include GNU Copyright coden
 57:   You can obtain the drop tolerance routines by installing PETSc fromn
 58:   www.mcs.anl.gov/petscn");
 59: #else
 60:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b;
 61:   IS           iscolf,isicol,isirow;
 62:   PetscTruth   reorder;
 63:   int          *c,*r,*ic,ierr,i,n = A->m;
 64:   int          *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
 65:   int          *ordcol,*iwk,*iperm,*jw;
 66:   int          ishift = !a->indexshift;
 67:   int          jmax,lfill,job,*o_i,*o_j;
 68:   PetscScalar  *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
 69:   PetscReal    permtol,af;


 73:   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
 74:   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
 75:   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
 76:   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
 77:   lfill   = (int)(info->dtcount/2.0);
 78:   jmax    = (int)(info->fill*a->nz);
 79:   permtol = info->dtcol;


 82:   /* ------------------------------------------------------------
 83:      If reorder=.TRUE., then the original matrix has to be 
 84:      reordered to reflect the user selected ordering scheme, and
 85:      then de-reordered so it is in it's original format.  
 86:      Because Saad's dperm() is NOT in place, we have to copy 
 87:      the original matrix and allocate more storage. . . 
 88:      ------------------------------------------------------------
 89:   */

 91:   /* set reorder to true if either isrow or iscol is not identity */
 92:   ISIdentity(isrow,&reorder);
 93:   if (reorder) {ISIdentity(iscol,&reorder);}
 94:   reorder = PetscNot(reorder);

 96: 
 97:   /* storage for ilu factor */
 98:   PetscMalloc((n+1)*sizeof(int),&new_i);
 99:   PetscMalloc(jmax*sizeof(int),&new_j);
100:   PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
101:   PetscMalloc(n*sizeof(int),&ordcol);

103:   /* ------------------------------------------------------------
104:      Make sure that everything is Fortran formatted (1-Based)
105:      ------------------------------------------------------------
106:   */
107:   if (ishift) {
108:     for (i=old_i[0];i<old_i[n];i++) {
109:       old_j[i]++;
110:     }
111:     for(i=0;i<n+1;i++) {
112:       old_i[i]++;
113:     };
114:   };

116:   if (reorder) {
117:     ISGetIndices(iscol,&c);
118:     ISGetIndices(isrow,&r);
119:     for(i=0;i<n;i++) {
120:       r[i]  = r[i]+1;
121:       c[i]  = c[i]+1;
122:     }
123:     PetscMalloc((n+1)*sizeof(int),&old_i2);
124:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);
125:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
126:     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
127:     for (i=0;i<n;i++) {
128:       r[i]  = r[i]-1;
129:       c[i]  = c[i]-1;
130:     }
131:     ISRestoreIndices(iscol,&c);
132:     ISRestoreIndices(isrow,&r);
133:     o_a = old_a2;
134:     o_j = old_j2;
135:     o_i = old_i2;
136:   } else {
137:     o_a = old_a;
138:     o_j = old_j;
139:     o_i = old_i;
140:   }

142:   /* ------------------------------------------------------------
143:      Call Saad's ilutp() routine to generate the factorization
144:      ------------------------------------------------------------
145:   */

147:   PetscMalloc(2*n*sizeof(int),&iperm);
148:   PetscMalloc(2*n*sizeof(int),&jw);
149:   PetscMalloc(n*sizeof(PetscScalar),&w);

151:   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
152:   if (ierr) {
153:     switch (ierr) {
154:       case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
155:       case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
156:       case -5: SETERRQ(1,"ilutp(), zero row encountered");
157:       case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
158:       case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
159:       default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
160:     }
161:   }

163:   PetscFree(w);
164:   PetscFree(jw);

166:   /* ------------------------------------------------------------
167:      Saad's routine gives the result in Modified Sparse Row (msr)
168:      Convert to Compressed Sparse Row format (csr) 
169:      ------------------------------------------------------------
170:   */

172:   PetscMalloc(n*sizeof(PetscScalar),&wk);
173:   PetscMalloc((n+1)*sizeof(int),&iwk);

175:   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);

177:   PetscFree(iwk);
178:   PetscFree(wk);

180:   if (reorder) {
181:     PetscFree(old_a2);
182:     PetscFree(old_j2);
183:     PetscFree(old_i2);
184:   } else {
185:     /* fix permutation of old_j that the factorization introduced */
186:     for (i=old_i[0]; i<old_i[n]; i++) {
187:       old_j[i-1] = iperm[old_j[i-1]-1];
188:     }
189:   }

191:   /* get rid of the shift to indices starting at 1 */
192:   if (ishift) {
193:     for (i=0; i<n+1; i++) {
194:       old_i[i]--;
195:     }
196:     for (i=old_i[0];i<old_i[n];i++) {
197:       old_j[i]--;
198:     }
199:   }

201:   /* Make the factored matrix 0-based */
202:   if (ishift) {
203:     for (i=0; i<n+1; i++) {
204:       new_i[i]--;
205:     }
206:     for (i=new_i[0];i<new_i[n];i++) {
207:       new_j[i]--;
208:     }
209:   }

211:   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
212:   /*-- permute the right-hand-side and solution vectors           --*/
213:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
214:   ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
215:   ISGetIndices(isicol,&ic);
216:   for(i=0; i<n; i++) {
217:     ordcol[i] = ic[iperm[i]-1];
218:   };
219:   ISRestoreIndices(isicol,&ic);
220:   ISDestroy(isicol);

222:   PetscFree(iperm);

224:   ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
225:   PetscFree(ordcol);

227:   /*----- put together the new matrix -----*/

229:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
230:   (*fact)->factor    = FACTOR_LU;
231:   (*fact)->assembled = PETSC_TRUE;

233:   b = (Mat_SeqAIJ*)(*fact)->data;
234:   PetscFree(b->imax);
235:   b->sorted        = PETSC_FALSE;
236:   b->singlemalloc  = PETSC_FALSE;
237:   /* the next line frees the default space generated by the MatCreate() */
238:   ierr             = PetscFree(b->a);
239:   ierr             = PetscFree(b->ilen);
240:   b->a             = new_a;
241:   b->j             = new_j;
242:   b->i             = new_i;
243:   b->ilen          = 0;
244:   b->imax          = 0;
245:   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
246:   b->row           = isirow;
247:   b->col           = iscolf;
248:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
249:   b->maxnz = b->nz = new_i[n];
250:   b->indexshift    = a->indexshift;
251:   MatMarkDiagonal_SeqAIJ(*fact);
252:   (*fact)->info.factor_mallocs = 0;

254:   MatMarkDiagonal_SeqAIJ(A);

256:   /* check out for identical nodes. If found, use inode functions */
257:   Mat_AIJ_CheckInode(*fact,PETSC_FALSE);

259:   af = ((double)b->nz)/((double)a->nz) + .001;
260:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %gn",info->fill,af);
261:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use n",af);
262:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);n",af);
263:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.n");

265:   return(0);
266: #endif
267: }

269: /*
270:     Factorization code for AIJ format. 
271: */
272: #undef __FUNCT__  
274: int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B)
275: {
276:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
277:   IS         isicol;
278:   int        *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
279:   int        *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift;
280:   int        *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
281:   PetscReal  f;

284:   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
285:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
286:   ISGetIndices(isrow,&r);
287:   ISGetIndices(isicol,&ic);

289:   /* get new row pointers */
290:   PetscMalloc((n+1)*sizeof(int),&ainew);
291:   ainew[0] = -shift;
292:   /* don't know how many column pointers are needed so estimate */
293:   f = info->fill;
294:   jmax  = (int)(f*ai[n]+(!shift));
295:   PetscMalloc((jmax)*sizeof(int),&ajnew);
296:   /* fill is a linked list of nonzeros in active row */
297:   PetscMalloc((2*n+1)*sizeof(int),&fill);
298:   im   = fill + n + 1;
299:   /* idnew is location of diagonal in factor */
300:   PetscMalloc((n+1)*sizeof(int),&idnew);
301:   idnew[0] = -shift;

303:   for (i=0; i<n; i++) {
304:     /* first copy previous fill into linked list */
305:     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
306:     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
307:     ajtmp   = aj + ai[r[i]] + shift;
308:     fill[n] = n;
309:     while (nz--) {
310:       fm  = n;
311:       idx = ic[*ajtmp++ + shift];
312:       do {
313:         m  = fm;
314:         fm = fill[m];
315:       } while (fm < idx);
316:       fill[m]   = idx;
317:       fill[idx] = fm;
318:     }
319:     row = fill[n];
320:     while (row < i) {
321:       ajtmp = ajnew + idnew[row] + (!shift);
322:       nzbd  = 1 + idnew[row] - ainew[row];
323:       nz    = im[row] - nzbd;
324:       fm    = row;
325:       while (nz-- > 0) {
326:         idx = *ajtmp++ + shift;
327:         nzbd++;
328:         if (idx == i) im[row] = nzbd;
329:         do {
330:           m  = fm;
331:           fm = fill[m];
332:         } while (fm < idx);
333:         if (fm != idx) {
334:           fill[m]   = idx;
335:           fill[idx] = fm;
336:           fm        = idx;
337:           nnz++;
338:         }
339:       }
340:       row = fill[row];
341:     }
342:     /* copy new filled row into permanent storage */
343:     ainew[i+1] = ainew[i] + nnz;
344:     if (ainew[i+1] > jmax) {

346:       /* estimate how much additional space we will need */
347:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
348:       /* just double the memory each time */
349:       int maxadd = jmax;
350:       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
351:       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
352:       jmax += maxadd;

354:       /* allocate a longer ajnew */
355:       PetscMalloc(jmax*sizeof(int),&ajtmp);
356:       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
357:       ierr  = PetscFree(ajnew);
358:       ajnew = ajtmp;
359:       realloc++; /* count how many times we realloc */
360:     }
361:     ajtmp = ajnew + ainew[i] + shift;
362:     fm    = fill[n];
363:     nzi   = 0;
364:     im[i] = nnz;
365:     while (nnz--) {
366:       if (fm < i) nzi++;
367:       *ajtmp++ = fm - shift;
368:       fm       = fill[fm];
369:     }
370:     idnew[i] = ainew[i] + nzi;
371:   }
372:   if (ai[n] != 0) {
373:     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
374:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
375:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use n",af);
376:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);n",af);
377:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.n");
378:   } else {
379:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrixn");
380:   }

382:   ISRestoreIndices(isrow,&r);
383:   ISRestoreIndices(isicol,&ic);

385:   PetscFree(fill);

387:   /* put together the new matrix */
388:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);
389:   PetscLogObjectParent(*B,isicol);
390:   b = (Mat_SeqAIJ*)(*B)->data;
391:   PetscFree(b->imax);
392:   b->singlemalloc = PETSC_FALSE;
393:   /* the next line frees the default space generated by the Create() */
394:   PetscFree(b->a);
395:   PetscFree(b->ilen);
396:   ierr          = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);
397:   b->j          = ajnew;
398:   b->i          = ainew;
399:   b->diag       = idnew;
400:   b->ilen       = 0;
401:   b->imax       = 0;
402:   b->row        = isrow;
403:   b->col        = iscol;
404:   b->lu_damping   = info->damping;
405:   b->lu_zeropivot = info->zeropivot;
406:   ierr          = PetscObjectReference((PetscObject)isrow);
407:   ierr          = PetscObjectReference((PetscObject)iscol);
408:   b->icol       = isicol;
409:   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
410:   /* In b structure:  Free imax, ilen, old a, old j.  
411:      Allocate idnew, solve_work, new a, new j */
412:   PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar)));
413:   b->maxnz = b->nz = ainew[n] + shift;

415:   (*B)->factor                 =  FACTOR_LU;
416:   (*B)->info.factor_mallocs    = realloc;
417:   (*B)->info.fill_ratio_given  = f;
418:   Mat_AIJ_CheckInode(*B,PETSC_FALSE);
419:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

421:   if (ai[n] != 0) {
422:     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
423:   } else {
424:     (*B)->info.fill_ratio_needed = 0.0;
425:   }
426:   return(0);
427: }
428: /* ----------------------------------------------------------- */
429: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);

431: #undef __FUNCT__  
433: int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
434: {
435:   Mat          C = *B;
436:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
437:   IS           isrow = b->row,isicol = b->icol;
438:   int          *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
439:   int          *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift;
440:   int          *diag_offset = b->diag,diag;
441:   int          *pj,ndamp = 0;
442:   PetscScalar  *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443:   PetscReal    damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs;
444:   PetscTruth   damp;

447:   ierr  = ISGetIndices(isrow,&r);
448:   ierr  = ISGetIndices(isicol,&ic);
449:   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
450:   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
451:   rtmps = rtmp + shift; ics = ic + shift;

453:   do {
454:     damp = PETSC_FALSE;
455:     for (i=0; i<n; i++) {
456:       nz    = ai[i+1] - ai[i];
457:       ajtmp = aj + ai[i] + shift;
458:       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;

460:       /* load in initial (unfactored row) */
461:       nz       = a->i[r[i]+1] - a->i[r[i]];
462:       ajtmpold = a->j + a->i[r[i]] + shift;
463:       v        = a->a + a->i[r[i]] + shift;
464:       for (j=0; j<nz; j++) {
465:         rtmp[ics[ajtmpold[j]]] = v[j];
466:         if (ndamp && ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */
467:           rtmp[ics[ajtmpold[j]]] += damping;
468:         }
469:       }

471:       row = *ajtmp++ + shift;
472:       while  (row < i) {
473:         pc = rtmp + row;
474:         if (*pc != 0.0) {
475:           pv         = b->a + diag_offset[row] + shift;
476:           pj         = b->j + diag_offset[row] + (!shift);
477:           multiplier = *pc / *pv++;
478:           *pc        = multiplier;
479:           nz         = ai[row+1] - diag_offset[row] - 1;
480:           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
481:           PetscLogFlops(2*nz);
482:         }
483:         row = *ajtmp++ + shift;
484:       }
485:       /* finished row so stick it into b->a */
486:       pv   = b->a + ai[i] + shift;
487:       pj   = b->j + ai[i] + shift;
488:       nz   = ai[i+1] - ai[i];
489:       diag = diag_offset[i] - ai[i];
490:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
491:       rs   = 0.0;
492:       for (j=0; j<nz; j++) {
493:         pv[j] = rtmps[pj[j]];
494:         if (j != diag) rs += PetscAbsScalar(pv[j]);
495:       }
496:       if (PetscAbsScalar(pv[diag]) < zeropivot*rs) {
497:         if (damping) {
498:           if (ndamp) damping *= 2.0;
499:           damp = PETSC_TRUE;
500:           ndamp++;
501:           break;
502:         } else {
503:           SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
504:         }
505:       }
506:     }
507:   } while (damp);

509:   /* invert diagonal entries for simplier triangular solves */
510:   for (i=0; i<n; i++) {
511:     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
512:   }

514:   PetscFree(rtmp);
515:   ISRestoreIndices(isicol,&ic);
516:   ISRestoreIndices(isrow,&r);
517:   C->factor = FACTOR_LU;
518:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
519:   C->assembled = PETSC_TRUE;
520:   PetscLogFlops(C->n);
521:   if (ndamp) {
522:     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %gn",ndamp,damping);
523:   }
524:   return(0);
525: }
526: /* ----------------------------------------------------------- */
527: #undef __FUNCT__  
529: int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info)
530: {
532:   Mat C;

535:   MatLUFactorSymbolic(A,row,col,info,&C);
536:   MatLUFactorNumeric(A,&C);
537:   MatHeaderCopy(A,C);
538:   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
539:   return(0);
540: }
541: /* ----------------------------------------------------------- */
542: #undef __FUNCT__  
544: int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
545: {
546:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
547:   IS           iscol = a->col,isrow = a->row;
548:   int          *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
549:   int          nz,shift = a->indexshift,*rout,*cout;
550:   PetscScalar  *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

553:   if (!n) return(0);

555:   VecGetArray(bb,&b);
556:   VecGetArray(xx,&x);
557:   tmp  = a->solve_work;

559:   ISGetIndices(isrow,&rout); r = rout;
560:   ISGetIndices(iscol,&cout); c = cout + (n-1);

562:   /* forward solve the lower triangular */
563:   tmp[0] = b[*r++];
564:   tmps   = tmp + shift;
565:   for (i=1; i<n; i++) {
566:     v   = aa + ai[i] + shift;
567:     vi  = aj + ai[i] + shift;
568:     nz  = a->diag[i] - ai[i];
569:     sum = b[*r++];
570:     while (nz--) sum -= *v++ * tmps[*vi++];
571:     tmp[i] = sum;
572:   }

574:   /* backward solve the upper triangular */
575:   for (i=n-1; i>=0; i--){
576:     v   = aa + a->diag[i] + (!shift);
577:     vi  = aj + a->diag[i] + (!shift);
578:     nz  = ai[i+1] - a->diag[i] - 1;
579:     sum = tmp[i];
580:     while (nz--) sum -= *v++ * tmps[*vi++];
581:     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
582:   }

584:   ISRestoreIndices(isrow,&rout);
585:   ISRestoreIndices(iscol,&cout);
586:   VecRestoreArray(bb,&b);
587:   VecRestoreArray(xx,&x);
588:   PetscLogFlops(2*a->nz - A->n);
589:   return(0);
590: }

592: /* ----------------------------------------------------------- */
593: #undef __FUNCT__  
595: int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
596: {
597:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
598:   int          n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
599:   PetscScalar  *x,*b,*aa = a->a;
600: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
601:   int          adiag_i,i,*vi,nz,ai_i;
602:   PetscScalar  *v,sum;
603: #endif

606:   if (!n) return(0);
607:   if (a->indexshift) {
608:      MatSolve_SeqAIJ(A,bb,xx);
609:      return(0);
610:   }

612:   VecGetArray(bb,&b);
613:   VecGetArray(xx,&x);

615: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
616:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
617: #else
618:   /* forward solve the lower triangular */
619:   x[0] = b[0];
620:   for (i=1; i<n; i++) {
621:     ai_i = ai[i];
622:     v    = aa + ai_i;
623:     vi   = aj + ai_i;
624:     nz   = adiag[i] - ai_i;
625:     sum  = b[i];
626:     while (nz--) sum -= *v++ * x[*vi++];
627:     x[i] = sum;
628:   }

630:   /* backward solve the upper triangular */
631:   for (i=n-1; i>=0; i--){
632:     adiag_i = adiag[i];
633:     v       = aa + adiag_i + 1;
634:     vi      = aj + adiag_i + 1;
635:     nz      = ai[i+1] - adiag_i - 1;
636:     sum     = x[i];
637:     while (nz--) sum -= *v++ * x[*vi++];
638:     x[i]    = sum*aa[adiag_i];
639:   }
640: #endif
641:   PetscLogFlops(2*a->nz - A->n);
642:   VecRestoreArray(bb,&b);
643:   VecRestoreArray(xx,&x);
644:   return(0);
645: }

647: #undef __FUNCT__  
649: int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
650: {
651:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
652:   IS           iscol = a->col,isrow = a->row;
653:   int          *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
654:   int          nz,shift = a->indexshift,*rout,*cout;
655:   PetscScalar  *x,*b,*tmp,*aa = a->a,sum,*v;

658:   if (yy != xx) {VecCopy(yy,xx);}

660:   VecGetArray(bb,&b);
661:   VecGetArray(xx,&x);
662:   tmp  = a->solve_work;

664:   ISGetIndices(isrow,&rout); r = rout;
665:   ISGetIndices(iscol,&cout); c = cout + (n-1);

667:   /* forward solve the lower triangular */
668:   tmp[0] = b[*r++];
669:   for (i=1; i<n; i++) {
670:     v   = aa + ai[i] + shift;
671:     vi  = aj + ai[i] + shift;
672:     nz  = a->diag[i] - ai[i];
673:     sum = b[*r++];
674:     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
675:     tmp[i] = sum;
676:   }

678:   /* backward solve the upper triangular */
679:   for (i=n-1; i>=0; i--){
680:     v   = aa + a->diag[i] + (!shift);
681:     vi  = aj + a->diag[i] + (!shift);
682:     nz  = ai[i+1] - a->diag[i] - 1;
683:     sum = tmp[i];
684:     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
685:     tmp[i] = sum*aa[a->diag[i]+shift];
686:     x[*c--] += tmp[i];
687:   }

689:   ISRestoreIndices(isrow,&rout);
690:   ISRestoreIndices(iscol,&cout);
691:   VecRestoreArray(bb,&b);
692:   VecRestoreArray(xx,&x);
693:   PetscLogFlops(2*a->nz);

695:   return(0);
696: }
697: /* -------------------------------------------------------------------*/
698: #undef __FUNCT__  
700: int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
701: {
702:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
703:   IS           iscol = a->col,isrow = a->row;
704:   int          *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
705:   int          nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
706:   PetscScalar  *x,*b,*tmp,*aa = a->a,*v,s1;

709:   VecGetArray(bb,&b);
710:   VecGetArray(xx,&x);
711:   tmp  = a->solve_work;

713:   ISGetIndices(isrow,&rout); r = rout;
714:   ISGetIndices(iscol,&cout); c = cout;

716:   /* copy the b into temp work space according to permutation */
717:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

719:   /* forward solve the U^T */
720:   for (i=0; i<n; i++) {
721:     v   = aa + diag[i] + shift;
722:     vi  = aj + diag[i] + (!shift);
723:     nz  = ai[i+1] - diag[i] - 1;
724:     s1  = tmp[i];
725:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
726:     while (nz--) {
727:       tmp[*vi++ + shift] -= (*v++)*s1;
728:     }
729:     tmp[i] = s1;
730:   }

732:   /* backward solve the L^T */
733:   for (i=n-1; i>=0; i--){
734:     v   = aa + diag[i] - 1 + shift;
735:     vi  = aj + diag[i] - 1 + shift;
736:     nz  = diag[i] - ai[i];
737:     s1  = tmp[i];
738:     while (nz--) {
739:       tmp[*vi-- + shift] -= (*v--)*s1;
740:     }
741:   }

743:   /* copy tmp into x according to permutation */
744:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

746:   ISRestoreIndices(isrow,&rout);
747:   ISRestoreIndices(iscol,&cout);
748:   VecRestoreArray(bb,&b);
749:   VecRestoreArray(xx,&x);

751:   PetscLogFlops(2*a->nz-A->n);
752:   return(0);
753: }

755: #undef __FUNCT__  
757: int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
758: {
759:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
760:   IS           iscol = a->col,isrow = a->row;
761:   int          *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
762:   int          nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
763:   PetscScalar  *x,*b,*tmp,*aa = a->a,*v;

766:   if (zz != xx) VecCopy(zz,xx);

768:   VecGetArray(bb,&b);
769:   VecGetArray(xx,&x);
770:   tmp = a->solve_work;

772:   ISGetIndices(isrow,&rout); r = rout;
773:   ISGetIndices(iscol,&cout); c = cout;

775:   /* copy the b into temp work space according to permutation */
776:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

778:   /* forward solve the U^T */
779:   for (i=0; i<n; i++) {
780:     v   = aa + diag[i] + shift;
781:     vi  = aj + diag[i] + (!shift);
782:     nz  = ai[i+1] - diag[i] - 1;
783:     tmp[i] *= *v++;
784:     while (nz--) {
785:       tmp[*vi++ + shift] -= (*v++)*tmp[i];
786:     }
787:   }

789:   /* backward solve the L^T */
790:   for (i=n-1; i>=0; i--){
791:     v   = aa + diag[i] - 1 + shift;
792:     vi  = aj + diag[i] - 1 + shift;
793:     nz  = diag[i] - ai[i];
794:     while (nz--) {
795:       tmp[*vi-- + shift] -= (*v--)*tmp[i];
796:     }
797:   }

799:   /* copy tmp into x according to permutation */
800:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

802:   ISRestoreIndices(isrow,&rout);
803:   ISRestoreIndices(iscol,&cout);
804:   VecRestoreArray(bb,&b);
805:   VecRestoreArray(xx,&x);

807:   PetscLogFlops(2*a->nz);
808:   return(0);
809: }
810: /* ----------------------------------------------------------------*/
811: EXTERN int MatMissingDiagonal_SeqAIJ(Mat);

813: #undef __FUNCT__  
815: int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
816: {
817:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
818:   IS         isicol;
819:   int        *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
820:   int        *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
821:   int        *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
822:   int        incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
823:   PetscTruth col_identity,row_identity;
824:   PetscReal  f;
825: 
827:   f             = info->fill;
828:   levels        = (int)info->levels;
829:   diagonal_fill = (int)info->diagonal_fill;
830:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

832:   /* special case that simply copies fill pattern */
833:   ISIdentity(isrow,&row_identity);
834:   ISIdentity(iscol,&col_identity);
835:   if (!levels && row_identity && col_identity) {
836:     MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
837:     (*fact)->factor = FACTOR_LU;
838:     b               = (Mat_SeqAIJ*)(*fact)->data;
839:     if (!b->diag) {
840:       MatMarkDiagonal_SeqAIJ(*fact);
841:     }
842:     MatMissingDiagonal_SeqAIJ(*fact);
843:     b->row              = isrow;
844:     b->col              = iscol;
845:     b->icol             = isicol;
846:     b->lu_damping       = info->damping;
847:     b->lu_zeropivot     = info->zeropivot;
848:     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
849:     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
850:     ierr                = PetscObjectReference((PetscObject)isrow);
851:     ierr                = PetscObjectReference((PetscObject)iscol);
852:     return(0);
853:   }

855:   ISGetIndices(isrow,&r);
856:   ISGetIndices(isicol,&ic);

858:   /* get new row pointers */
859:   PetscMalloc((n+1)*sizeof(int),&ainew);
860:   ainew[0] = -shift;
861:   /* don't know how many column pointers are needed so estimate */
862:   jmax = (int)(f*(ai[n]+!shift));
863:   PetscMalloc((jmax)*sizeof(int),&ajnew);
864:   /* ajfill is level of fill for each fill entry */
865:   PetscMalloc((jmax)*sizeof(int),&ajfill);
866:   /* fill is a linked list of nonzeros in active row */
867:   PetscMalloc((n+1)*sizeof(int),&fill);
868:   /* im is level for each filled value */
869:   PetscMalloc((n+1)*sizeof(int),&im);
870:   /* dloc is location of diagonal in factor */
871:   PetscMalloc((n+1)*sizeof(int),&dloc);
872:   dloc[0]  = 0;
873:   for (prow=0; prow<n; prow++) {

875:     /* copy current row into linked list */
876:     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
877:     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
878:     xi      = aj + ai[r[prow]] + shift;
879:     fill[n]    = n;
880:     fill[prow] = -1; /* marker to indicate if diagonal exists */
881:     while (nz--) {
882:       fm  = n;
883:       idx = ic[*xi++ + shift];
884:       do {
885:         m  = fm;
886:         fm = fill[m];
887:       } while (fm < idx);
888:       fill[m]   = idx;
889:       fill[idx] = fm;
890:       im[idx]   = 0;
891:     }

893:     /* make sure diagonal entry is included */
894:     if (diagonal_fill && fill[prow] == -1) {
895:       fm = n;
896:       while (fill[fm] < prow) fm = fill[fm];
897:       fill[prow] = fill[fm]; /* insert diagonal into linked list */
898:       fill[fm]   = prow;
899:       im[prow]   = 0;
900:       nzf++;
901:       dcount++;
902:     }

904:     nzi = 0;
905:     row = fill[n];
906:     while (row < prow) {
907:       incrlev = im[row] + 1;
908:       nz      = dloc[row];
909:       xi      = ajnew  + ainew[row] + shift + nz + 1;
910:       flev    = ajfill + ainew[row] + shift + nz + 1;
911:       nnz     = ainew[row+1] - ainew[row] - nz - 1;
912:       fm      = row;
913:       while (nnz-- > 0) {
914:         idx = *xi++ + shift;
915:         if (*flev + incrlev > levels) {
916:           flev++;
917:           continue;
918:         }
919:         do {
920:           m  = fm;
921:           fm = fill[m];
922:         } while (fm < idx);
923:         if (fm != idx) {
924:           im[idx]   = *flev + incrlev;
925:           fill[m]   = idx;
926:           fill[idx] = fm;
927:           fm        = idx;
928:           nzf++;
929:         } else {
930:           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
931:         }
932:         flev++;
933:       }
934:       row = fill[row];
935:       nzi++;
936:     }
937:     /* copy new filled row into permanent storage */
938:     ainew[prow+1] = ainew[prow] + nzf;
939:     if (ainew[prow+1] > jmax-shift) {

941:       /* estimate how much additional space we will need */
942:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
943:       /* just double the memory each time */
944:       /*  maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
945:       int maxadd = jmax;
946:       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
947:       jmax += maxadd;

949:       /* allocate a longer ajnew and ajfill */
950:       ierr   = PetscMalloc(jmax*sizeof(int),&xi);
951:       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
952:       ierr   = PetscFree(ajnew);
953:       ajnew  = xi;
954:       ierr   = PetscMalloc(jmax*sizeof(int),&xi);
955:       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
956:       ierr   = PetscFree(ajfill);
957:       ajfill = xi;
958:       realloc++; /* count how many times we realloc */
959:     }
960:     xi          = ajnew + ainew[prow] + shift;
961:     flev        = ajfill + ainew[prow] + shift;
962:     dloc[prow]  = nzi;
963:     fm          = fill[n];
964:     while (nzf--) {
965:       *xi++   = fm - shift;
966:       *flev++ = im[fm];
967:       fm      = fill[fm];
968:     }
969:     /* make sure row has diagonal entry */
970:     if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
971:       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrixn
972:     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
973:     }
974:   }
975:   PetscFree(ajfill);
976:   ISRestoreIndices(isrow,&r);
977:   ISRestoreIndices(isicol,&ic);
978:   PetscFree(fill);
979:   PetscFree(im);

981:   {
982:     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
983:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
984:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use n",af);
985:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);n",af);
986:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.n");
987:     if (diagonal_fill) {
988:       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
989:     }
990:   }

992:   /* put together the new matrix */
993:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
994:   PetscLogObjectParent(*fact,isicol);
995:   b = (Mat_SeqAIJ*)(*fact)->data;
996:   PetscFree(b->imax);
997:   b->singlemalloc = PETSC_FALSE;
998:   len = (ainew[n] + shift)*sizeof(PetscScalar);
999:   /* the next line frees the default space generated by the Create() */
1000:   PetscFree(b->a);
1001:   PetscFree(b->ilen);
1002:   PetscMalloc(len+1,&b->a);
1003:   b->j          = ajnew;
1004:   b->i          = ainew;
1005:   for (i=0; i<n; i++) dloc[i] += ainew[i];
1006:   b->diag       = dloc;
1007:   b->ilen       = 0;
1008:   b->imax       = 0;
1009:   b->row        = isrow;
1010:   b->col        = iscol;
1011:   ierr          = PetscObjectReference((PetscObject)isrow);
1012:   ierr          = PetscObjectReference((PetscObject)iscol);
1013:   b->icol       = isicol;
1014:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1015:   /* In b structure:  Free imax, ilen, old a, old j.  
1016:      Allocate dloc, solve_work, new a, new j */
1017:   PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar)));
1018:   b->maxnz        = b->nz = ainew[n] + shift;
1019:   b->lu_damping   = info->damping;
1020:   b->lu_zeropivot = info->zeropivot;
1021:   (*fact)->factor = FACTOR_LU;
1022:   Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
1023:   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

1025:   (*fact)->info.factor_mallocs    = realloc;
1026:   (*fact)->info.fill_ratio_given  = f;
1027:   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1028:   return(0);
1029: }

1031:  #include src/mat/impls/sbaij/seq/sbaij.h
1032: typedef struct {
1033:   int levels;
1034: } Mat_SeqAIJ_SeqSBAIJ;

1036: #undef __FUNCT__  
1038: int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1039: {
1040:   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1041:   int                 ierr;
1042:   Mat_SeqAIJ_SeqSBAIJ *ptr = (Mat_SeqAIJ_SeqSBAIJ*)(*fact)->spptr;

1045:   if (!a->sbaijMat){
1046:     MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1047:   }
1048:   MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(a->sbaijMat,fact);
1049:   if (ptr->levels > 0){
1050:     MatDestroy(a->sbaijMat);
1051:   }
1052:   a->sbaijMat = PETSC_NULL;
1053:   PetscFree(ptr);
1054: 
1055:   return(0);
1056: }

1058: #undef __FUNCT__  
1060: int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,PetscReal fill,int levels,Mat *fact)
1061: {
1062:   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1063:   Mat_SeqSBAIJ        *b;
1064:   int                 ierr;
1065:   PetscTruth          perm_identity;
1066:   Mat_SeqAIJ_SeqSBAIJ *ptr;
1067: 
1069:   ISIdentity(perm,&perm_identity);
1070:   if (!perm_identity){
1071:     SETERRQ(1,"Non-identity permutation is not supported yet");
1072:   }
1073:   if (!a->sbaijMat){
1074:     MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1075:   }

1077:   if (levels > 0){
1078:     MatICCFactorSymbolic(a->sbaijMat,perm,fill,levels,fact);

1080:   } else { /* in-place icc(0) */
1081:     (*fact)             = a->sbaijMat;
1082:     (*fact)->factor     = FACTOR_CHOLESKY;
1083:     b                   = (Mat_SeqSBAIJ*)(*fact)->data;
1084:     b->row              = perm;
1085:     b->icol             = perm;
1086:     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
1087:     ierr                = PetscObjectReference((PetscObject)perm);
1088:     ierr                = PetscObjectReference((PetscObject)perm);
1089:   }

1091:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1092:   (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1094:   ierr           = PetscNew(Mat_SeqAIJ_SeqSBAIJ,&ptr);
1095:   (*fact)->spptr = (void*)ptr;
1096:   ptr->levels    = levels;   /* to be used by CholeskyFactorNumeric_SeqAIJ() */
1097: 
1098:   return(0);
1099: }