Actual source code: aijfact.c

  1: /*$Id: aijfact.c,v 1.160 2001/04/10 19:35:19 bsmith Exp $*/

  3: #include "src/mat/impls/aij/seq/aij.h"
  4: #include "src/vec/vecimpl.h"
  5: #include "src/inline/dot.h"

  7: int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
  8: {

 11:   SETERRQ(PETSC_ERR_SUP,"Code not written");
 12: #if !defined(PETSC_USE_DEBUG)
 13:   return(0);
 14: #endif
 15: }


 18: EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
 19: EXTERN int Mat_AIJ_CheckInode(Mat);

 21: EXTERN int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*);
 22: EXTERN int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,PetscReal*,PetscReal*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*);
 23: EXTERN int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*);

 25:   /* ------------------------------------------------------------

 27:           This interface was contribed by Tony Caola

 29:      This routine is an interface to the pivoting drop-tolerance 
 30:      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 
 31:      SPARSEKIT2.

 33:      The SPARSEKIT2 routines used here are covered by the GNU 
 34:      copyright; see the file gnu in this directory.

 36:      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
 37:      help in getting this routine ironed out.

 39:      The major drawback to this routine is that if info->fill is 
 40:      not large enough it fails rather than allocating more space;
 41:      this can be fixed by hacking/improving the f2c version of 
 42:      Yousef Saad's code.

 44:      ishift = 0, for indices start at 1
 45:      ishift = 1, for indices starting at 0
 46:      ------------------------------------------------------------
 47:   */

 49: int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
 50: {
 51:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
 52:   IS         iscolf,isicol,isirow;
 53:   PetscTruth reorder;
 54:   int        *c,*r,*ic,ierr,i,n = A->m;
 55:   int        *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
 56:   int        *ordcol,*iwk,*iperm,*jw;
 57:   int        ishift = !a->indexshift;
 58:   int        jmax,lfill,job,*o_i,*o_j;
 59:   Scalar     *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
 60:   PetscReal  permtol,af;


 64:   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
 65:   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
 66:   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
 67:   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
 68:   lfill   = (int)(info->dtcount/2.0);
 69:   jmax    = (int)(info->fill*a->nz);
 70:   permtol = info->dtcol;


 73:   /* ------------------------------------------------------------
 74:      If reorder=.TRUE., then the original matrix has to be 
 75:      reordered to reflect the user selected ordering scheme, and
 76:      then de-reordered so it is in it's original format.  
 77:      Because Saad's dperm() is NOT in place, we have to copy 
 78:      the original matrix and allocate more storage. . . 
 79:      ------------------------------------------------------------
 80:   */

 82:   /* set reorder to true if either isrow or iscol is not identity */
 83:   ISIdentity(isrow,&reorder);
 84:   if (reorder) {ISIdentity(iscol,&reorder);}
 85:   reorder = PetscNot(reorder);

 87: 
 88:   /* storage for ilu factor */
 89:   PetscMalloc((n+1)*sizeof(int),&new_i);
 90:   PetscMalloc(jmax*sizeof(int),&new_j);
 91:   PetscMalloc(jmax*sizeof(Scalar),&new_a);
 92:   PetscMalloc(n*sizeof(int),&ordcol);

 94:   /* ------------------------------------------------------------
 95:      Make sure that everything is Fortran formatted (1-Based)
 96:      ------------------------------------------------------------
 97:   */
 98:   if (ishift) {
 99:     for (i=old_i[0];i<old_i[n];i++) {
100:       old_j[i]++;
101:     }
102:     for(i=0;i<n+1;i++) {
103:       old_i[i]++;
104:     };
105:   };

107:   if (reorder) {
108:     ISGetIndices(iscol,&c);
109:     ISGetIndices(isrow,&r);
110:     for(i=0;i<n;i++) {
111:       r[i]  = r[i]+1;
112:       c[i]  = c[i]+1;
113:     }
114:     PetscMalloc((n+1)*sizeof(int),&old_i2);
115:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);
116:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar),old_a2);
117:     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
118:     for (i=0;i<n;i++) {
119:       r[i]  = r[i]-1;
120:       c[i]  = c[i]-1;
121:     }
122:     ISRestoreIndices(iscol,&c);
123:     ISRestoreIndices(isrow,&r);
124:     o_a = old_a2;
125:     o_j = old_j2;
126:     o_i = old_i2;
127:   } else {
128:     o_a = old_a;
129:     o_j = old_j;
130:     o_i = old_i;
131:   }

133:   /* ------------------------------------------------------------
134:      Call Saad's ilutp() routine to generate the factorization
135:      ------------------------------------------------------------
136:   */

138:   PetscMalloc(2*n*sizeof(int),&iperm);
139:   PetscMalloc(2*n*sizeof(int),&jw);
140:   PetscMalloc(n*sizeof(Scalar),&w);

142:   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
143:   if (ierr) {
144:     switch (ierr) {
145:       case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
146:       case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
147:       case -5: SETERRQ(1,"ilutp(), zero row encountered");
148:       case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
149:       case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
150:       default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
151:     }
152:   }

154:   PetscFree(w);
155:   PetscFree(jw);

157:   /* ------------------------------------------------------------
158:      Saad's routine gives the result in Modified Sparse Row (msr)
159:      Convert to Compressed Sparse Row format (csr) 
160:      ------------------------------------------------------------
161:   */

163:   PetscMalloc(n*sizeof(Scalar),&wk);
164:   PetscMalloc((n+1)*sizeof(int),&iwk);

166:   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);

168:   PetscFree(iwk);
169:   PetscFree(wk);

171:   if (reorder) {
172:     PetscFree(old_a2);
173:     PetscFree(old_j2);
174:     PetscFree(old_i2);
175:   } else {
176:     /* fix permutation of old_j that the factorization introduced */
177:     for (i=old_i[0]; i<old_i[n]; i++) {
178:       old_j[i-1] = iperm[old_j[i-1]-1];
179:     }
180:   }

182:   /* get rid of the shift to indices starting at 1 */
183:   if (ishift) {
184:     for (i=0; i<n+1; i++) {
185:       old_i[i]--;
186:     }
187:     for (i=old_i[0];i<old_i[n];i++) {
188:       old_j[i]--;
189:     }
190:   }

192:   /* Make the factored matrix 0-based */
193:   if (ishift) {
194:     for (i=0; i<n+1; i++) {
195:       new_i[i]--;
196:     }
197:     for (i=new_i[0];i<new_i[n];i++) {
198:       new_j[i]--;
199:     }
200:   }

202:   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203:   /*-- permute the right-hand-side and solution vectors           --*/
204:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
205:   ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
206:   ISGetIndices(isicol,&ic);
207:   for(i=0; i<n; i++) {
208:     ordcol[i] = ic[iperm[i]-1];
209:   };
210:   ISRestoreIndices(isicol,&ic);
211:   ISDestroy(isicol);

213:   PetscFree(iperm);

215:   ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
216:   PetscFree(ordcol);

218:   /*----- put together the new matrix -----*/

220:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
221:   (*fact)->factor    = FACTOR_LU;
222:   (*fact)->assembled = PETSC_TRUE;

224:   b = (Mat_SeqAIJ*)(*fact)->data;
225:   PetscFree(b->imax);
226:   b->sorted        = PETSC_FALSE;
227:   b->singlemalloc  = PETSC_FALSE;
228:   /* the next line frees the default space generated by the MatCreate() */
229:   ierr             = PetscFree(b->a);
230:   ierr             = PetscFree(b->ilen);
231:   b->a             = new_a;
232:   b->j             = new_j;
233:   b->i             = new_i;
234:   b->ilen          = 0;
235:   b->imax          = 0;
236:   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
237:   b->row           = isirow;
238:   b->col           = iscolf;
239:   PetscMalloc((n+1)*sizeof(Scalar),&b->solve_work);
240:   b->maxnz = b->nz = new_i[n];
241:   b->indexshift    = a->indexshift;
242:   MatMarkDiagonal_SeqAIJ(*fact);
243:   (*fact)->info.factor_mallocs = 0;

245:   MatMarkDiagonal_SeqAIJ(A);

247:   /* check out for identical nodes. If found, use inode functions */
248:   Mat_AIJ_CheckInode(*fact);

250:   af = ((double)b->nz)/((double)a->nz) + .001;
251:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %gn",info->fill,af);
252:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use n",af);
253:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);n",af);
254:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.n");

256:   return(0);
257: }

259: /*
260:     Factorization code for AIJ format. 
261: */
262: int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B)
263: {
264:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
265:   IS         isicol;
266:   int        *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
267:   int        *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift;
268:   int        *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
269:   PetscReal  f;

274:   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");

276:   if (!isrow) {
277:     ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);
278:   }
279:   if (!iscol) {
280:     ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);
281:   }

283:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
284:   ISGetIndices(isrow,&r);
285:   ISGetIndices(isicol,&ic);

287:   /* get new row pointers */
288:   PetscMalloc((n+1)*sizeof(int),&ainew);
289:   ainew[0] = -shift;
290:   /* don't know how many column pointers are needed so estimate */
291:   if (info) f = info->fill; else f = 1.0;
292:   jmax  = (int)(f*ai[n]+(!shift));
293:   PetscMalloc((jmax)*sizeof(int),&ajnew);
294:   /* fill is a linked list of nonzeros in active row */
295:   PetscMalloc((2*n+1)*sizeof(int),&fill);
296:   im   = fill + n + 1;
297:   /* idnew is location of diagonal in factor */
298:   PetscMalloc((n+1)*sizeof(int),&idnew);
299:   idnew[0] = -shift;

301:   for (i=0; i<n; i++) {
302:     /* first copy previous fill into linked list */
303:     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
304:     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
305:     ajtmp   = aj + ai[r[i]] + shift;
306:     fill[n] = n;
307:     while (nz--) {
308:       fm  = n;
309:       idx = ic[*ajtmp++ + shift];
310:       do {
311:         m  = fm;
312:         fm = fill[m];
313:       } while (fm < idx);
314:       fill[m]   = idx;
315:       fill[idx] = fm;
316:     }
317:     row = fill[n];
318:     while (row < i) {
319:       ajtmp = ajnew + idnew[row] + (!shift);
320:       nzbd  = 1 + idnew[row] - ainew[row];
321:       nz    = im[row] - nzbd;
322:       fm    = row;
323:       while (nz-- > 0) {
324:         idx = *ajtmp++ + shift;
325:         nzbd++;
326:         if (idx == i) im[row] = nzbd;
327:         do {
328:           m  = fm;
329:           fm = fill[m];
330:         } while (fm < idx);
331:         if (fm != idx) {
332:           fill[m]   = idx;
333:           fill[idx] = fm;
334:           fm        = idx;
335:           nnz++;
336:         }
337:       }
338:       row = fill[row];
339:     }
340:     /* copy new filled row into permanent storage */
341:     ainew[i+1] = ainew[i] + nnz;
342:     if (ainew[i+1] > jmax) {

344:       /* estimate how much additional space we will need */
345:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
346:       /* just double the memory each time */
347:       int maxadd = jmax;
348:       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
349:       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
350:       jmax += maxadd;

352:       /* allocate a longer ajnew */
353:       PetscMalloc(jmax*sizeof(int),&ajtmp);
354:       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
355:       ierr  = PetscFree(ajnew);
356:       ajnew = ajtmp;
357:       realloc++; /* count how many times we realloc */
358:     }
359:     ajtmp = ajnew + ainew[i] + shift;
360:     fm    = fill[n];
361:     nzi   = 0;
362:     im[i] = nnz;
363:     while (nnz--) {
364:       if (fm < i) nzi++;
365:       *ajtmp++ = fm - shift;
366:       fm       = fill[fm];
367:     }
368:     idnew[i] = ainew[i] + nzi;
369:   }
370:   if (ai[n] != 0) {
371:     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
372:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
373:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use n",af);
374:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);n",af);
375:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.n");
376:   } else {
377:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrixn");
378:   }

380:   ISRestoreIndices(isrow,&r);
381:   ISRestoreIndices(isicol,&ic);

383:   PetscFree(fill);

385:   /* put together the new matrix */
386:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);
387:   PetscLogObjectParent(*B,isicol);
388:   b = (Mat_SeqAIJ*)(*B)->data;
389:   PetscFree(b->imax);
390:   b->singlemalloc = PETSC_FALSE;
391:   /* the next line frees the default space generated by the Create() */
392:   PetscFree(b->a);
393:   PetscFree(b->ilen);
394:   ierr          = PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar),&b->a);
395:   b->j          = ajnew;
396:   b->i          = ainew;
397:   b->diag       = idnew;
398:   b->ilen       = 0;
399:   b->imax       = 0;
400:   b->row        = isrow;
401:   b->col        = iscol;
402:   if (info) {
403:     b->lu_damp    = (PetscTruth) ((int)info->damp);
404:     b->lu_damping = info->damping;
405:   } else {
406:     b->lu_damp    = PETSC_FALSE;
407:     b->lu_damping = 0.0;
408:   }
409:   ierr          = PetscObjectReference((PetscObject)isrow);
410:   ierr          = PetscObjectReference((PetscObject)iscol);
411:   b->icol       = isicol;
412:   ierr          = PetscMalloc((n+1)*sizeof(Scalar),&b->solve_work);
413:   /* In b structure:  Free imax, ilen, old a, old j.  
414:      Allocate idnew, solve_work, new a, new j */
415:   PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
416:   b->maxnz = b->nz = ainew[n] + shift;

418:   (*B)->factor                 =  FACTOR_LU;
419:   (*B)->info.factor_mallocs    = realloc;
420:   (*B)->info.fill_ratio_given  = f;
421:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

423:   if (ai[n] != 0) {
424:     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
425:   } else {
426:     (*B)->info.fill_ratio_needed = 0.0;
427:   }
428:   return(0);
429: }
430: /* ----------------------------------------------------------- */
431: EXTERN int Mat_AIJ_CheckInode(Mat);

433: int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
434: {
435:   Mat        C = *B;
436:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
437:   IS         isrow = b->row,isicol = b->icol;
438:   int        *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
439:   int        *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift;
440:   int        *diag_offset = b->diag,diag;
441:   int        *pj,ndamp = 0;
442:   Scalar     *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443:   PetscReal  damping = b->lu_damping;
444:   PetscTruth damp;


448:   ierr  = ISGetIndices(isrow,&r);
449:   ierr  = ISGetIndices(isicol,&ic);
450:   PetscMalloc((n+1)*sizeof(Scalar),&rtmp);
451:   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));
452:   rtmps = rtmp + shift; ics = ic + shift;

454:   do {
455:     damp = PETSC_FALSE;
456:     for (i=0; i<n; i++) {
457:       nz    = ai[i+1] - ai[i];
458:       ajtmp = aj + ai[i] + shift;
459:       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;

461:       /* load in initial (unfactored row) */
462:       nz       = a->i[r[i]+1] - a->i[r[i]];
463:       ajtmpold = a->j + a->i[r[i]] + shift;
464:       v        = a->a + a->i[r[i]] + shift;
465:       for (j=0; j<nz; j++) {
466:         rtmp[ics[ajtmpold[j]]] = v[j];
467:         if (ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */
468:           rtmp[ics[ajtmpold[j]]] += damping;
469:         }
470:       }

472:       row = *ajtmp++ + shift;
473:       while  (row < i) {
474:         pc = rtmp + row;
475:         if (*pc != 0.0) {
476:           pv         = b->a + diag_offset[row] + shift;
477:           pj         = b->j + diag_offset[row] + (!shift);
478:           multiplier = *pc / *pv++;
479:           *pc        = multiplier;
480:           nz         = ai[row+1] - diag_offset[row] - 1;
481:           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
482:           PetscLogFlops(2*nz);
483:         }
484:         row = *ajtmp++ + shift;
485:       }
486:       /* finished row so stick it into b->a */
487:       pv = b->a + ai[i] + shift;
488:       pj = b->j + ai[i] + shift;
489:       nz = ai[i+1] - ai[i];
490:       for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];}
491:       diag = diag_offset[i] - ai[i];
492:       if (PetscAbsScalar(pv[diag]) < 1.e-12) {
493:         if (b->lu_damp) {
494:           damp = PETSC_TRUE;
495:           if (damping) damping *= 2.0;
496:           else damping = 1.e-12;
497:           ndamp++;
498:           break;
499:         } else {
500:           SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d",i);
501:         }
502:       }
503:     }
504:   } while (damp);

506:   /* invert diagonal entries for simplier triangular solves */
507:   for (i=0; i<n; i++) {
508:     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
509:   }

511:   PetscFree(rtmp);
512:   ISRestoreIndices(isicol,&ic);
513:   ISRestoreIndices(isrow,&r);
514:   C->factor = FACTOR_LU;
515:   Mat_AIJ_CheckInode(C);
516:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
517:   C->assembled = PETSC_TRUE;
518:   PetscLogFlops(C->n);
519:   if (ndamp || b->lu_damping) {
520:     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %gn",ndamp,damping);
521:   }
522:   return(0);
523: }
524: /* ----------------------------------------------------------- */
525: int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info)
526: {
528:   Mat C;

531:   MatLUFactorSymbolic(A,row,col,info,&C);
532:   MatLUFactorNumeric(A,&C);
533:   MatHeaderCopy(A,C);
534:   return(0);
535: }
536: /* ----------------------------------------------------------- */
537: int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
538: {
539:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
540:   IS         iscol = a->col,isrow = a->row;
541:   int        *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
542:   int        nz,shift = a->indexshift,*rout,*cout;
543:   Scalar     *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

546:   if (!n) return(0);

548:   VecGetArray(bb,&b);
549:   VecGetArray(xx,&x);
550:   tmp  = a->solve_work;

552:   ISGetIndices(isrow,&rout); r = rout;
553:   ISGetIndices(iscol,&cout); c = cout + (n-1);

555:   /* forward solve the lower triangular */
556:   tmp[0] = b[*r++];
557:   tmps   = tmp + shift;
558:   for (i=1; i<n; i++) {
559:     v   = aa + ai[i] + shift;
560:     vi  = aj + ai[i] + shift;
561:     nz  = a->diag[i] - ai[i];
562:     sum = b[*r++];
563:     while (nz--) sum -= *v++ * tmps[*vi++];
564:     tmp[i] = sum;
565:   }

567:   /* backward solve the upper triangular */
568:   for (i=n-1; i>=0; i--){
569:     v   = aa + a->diag[i] + (!shift);
570:     vi  = aj + a->diag[i] + (!shift);
571:     nz  = ai[i+1] - a->diag[i] - 1;
572:     sum = tmp[i];
573:     while (nz--) sum -= *v++ * tmps[*vi++];
574:     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
575:   }

577:   ISRestoreIndices(isrow,&rout);
578:   ISRestoreIndices(iscol,&cout);
579:   VecRestoreArray(bb,&b);
580:   VecRestoreArray(xx,&x);
581:   PetscLogFlops(2*a->nz - A->n);
582:   return(0);
583: }

585: /* ----------------------------------------------------------- */
586: int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
587: {
588:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
589:   int        n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
590:   Scalar     *x,*b,*aa = a->a,sum;
591: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
592:   int        adiag_i,i,*vi,nz,ai_i;
593:   Scalar     *v;
594: #endif

597:   if (!n) return(0);
598:   if (a->indexshift) {
599:      MatSolve_SeqAIJ(A,bb,xx);
600:      return(0);
601:   }

603:   VecGetArray(bb,&b);
604:   VecGetArray(xx,&x);

606: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
607:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
608: #else
609:   /* forward solve the lower triangular */
610:   x[0] = b[0];
611:   for (i=1; i<n; i++) {
612:     ai_i = ai[i];
613:     v    = aa + ai_i;
614:     vi   = aj + ai_i;
615:     nz   = adiag[i] - ai_i;
616:     sum  = b[i];
617:     while (nz--) sum -= *v++ * x[*vi++];
618:     x[i] = sum;
619:   }

621:   /* backward solve the upper triangular */
622:   for (i=n-1; i>=0; i--){
623:     adiag_i = adiag[i];
624:     v       = aa + adiag_i + 1;
625:     vi      = aj + adiag_i + 1;
626:     nz      = ai[i+1] - adiag_i - 1;
627:     sum     = x[i];
628:     while (nz--) sum -= *v++ * x[*vi++];
629:     x[i]    = sum*aa[adiag_i];
630:   }
631: #endif
632:   PetscLogFlops(2*a->nz - A->n);
633:   VecRestoreArray(bb,&b);
634:   VecRestoreArray(xx,&x);
635:   return(0);
636: }

638: int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
639: {
640:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
641:   IS         iscol = a->col,isrow = a->row;
642:   int        *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
643:   int        nz,shift = a->indexshift,*rout,*cout;
644:   Scalar     *x,*b,*tmp,*aa = a->a,sum,*v;

647:   if (yy != xx) {VecCopy(yy,xx);}

649:   VecGetArray(bb,&b);
650:   VecGetArray(xx,&x);
651:   tmp  = a->solve_work;

653:   ISGetIndices(isrow,&rout); r = rout;
654:   ISGetIndices(iscol,&cout); c = cout + (n-1);

656:   /* forward solve the lower triangular */
657:   tmp[0] = b[*r++];
658:   for (i=1; i<n; i++) {
659:     v   = aa + ai[i] + shift;
660:     vi  = aj + ai[i] + shift;
661:     nz  = a->diag[i] - ai[i];
662:     sum = b[*r++];
663:     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
664:     tmp[i] = sum;
665:   }

667:   /* backward solve the upper triangular */
668:   for (i=n-1; i>=0; i--){
669:     v   = aa + a->diag[i] + (!shift);
670:     vi  = aj + a->diag[i] + (!shift);
671:     nz  = ai[i+1] - a->diag[i] - 1;
672:     sum = tmp[i];
673:     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
674:     tmp[i] = sum*aa[a->diag[i]+shift];
675:     x[*c--] += tmp[i];
676:   }

678:   ISRestoreIndices(isrow,&rout);
679:   ISRestoreIndices(iscol,&cout);
680:   VecRestoreArray(bb,&b);
681:   VecRestoreArray(xx,&x);
682:   PetscLogFlops(2*a->nz);

684:   return(0);
685: }
686: /* -------------------------------------------------------------------*/
687: int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
688: {
689:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
690:   IS         iscol = a->col,isrow = a->row;
691:   int        *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
692:   int        nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
693:   Scalar     *x,*b,*tmp,*aa = a->a,*v,s1;

696:   VecGetArray(bb,&b);
697:   VecGetArray(xx,&x);
698:   tmp  = a->solve_work;

700:   ISGetIndices(isrow,&rout); r = rout;
701:   ISGetIndices(iscol,&cout); c = cout;

703:   /* copy the b into temp work space according to permutation */
704:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

706:   /* forward solve the U^T */
707:   for (i=0; i<n; i++) {
708:     v   = aa + diag[i] + shift;
709:     vi  = aj + diag[i] + (!shift);
710:     nz  = ai[i+1] - diag[i] - 1;
711:     s1  = tmp[i];
712:     s1 *= *(v++);  /* multiply by inverse of diagonal entry */
713:     while (nz--) {
714:       tmp[*vi++ + shift] -= (*v++)*s1;
715:     }
716:     tmp[i] = s1;
717:   }

719:   /* backward solve the L^T */
720:   for (i=n-1; i>=0; i--){
721:     v   = aa + diag[i] - 1 + shift;
722:     vi  = aj + diag[i] - 1 + shift;
723:     nz  = diag[i] - ai[i];
724:     s1  = tmp[i];
725:     while (nz--) {
726:       tmp[*vi-- + shift] -= (*v--)*s1;
727:     }
728:   }

730:   /* copy tmp into x according to permutation */
731:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

733:   ISRestoreIndices(isrow,&rout);
734:   ISRestoreIndices(iscol,&cout);
735:   VecRestoreArray(bb,&b);
736:   VecRestoreArray(xx,&x);

738:   PetscLogFlops(2*a->nz-A->n);
739:   return(0);
740: }

742: int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
743: {
744:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
745:   IS         iscol = a->col,isrow = a->row;
746:   int        *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
747:   int        nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
748:   Scalar     *x,*b,*tmp,*aa = a->a,*v;

751:   if (zz != xx) VecCopy(zz,xx);

753:   VecGetArray(bb,&b);
754:   VecGetArray(xx,&x);
755:   tmp = a->solve_work;

757:   ISGetIndices(isrow,&rout); r = rout;
758:   ISGetIndices(iscol,&cout); c = cout;

760:   /* copy the b into temp work space according to permutation */
761:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

763:   /* forward solve the U^T */
764:   for (i=0; i<n; i++) {
765:     v   = aa + diag[i] + shift;
766:     vi  = aj + diag[i] + (!shift);
767:     nz  = ai[i+1] - diag[i] - 1;
768:     tmp[i] *= *v++;
769:     while (nz--) {
770:       tmp[*vi++ + shift] -= (*v++)*tmp[i];
771:     }
772:   }

774:   /* backward solve the L^T */
775:   for (i=n-1; i>=0; i--){
776:     v   = aa + diag[i] - 1 + shift;
777:     vi  = aj + diag[i] - 1 + shift;
778:     nz  = diag[i] - ai[i];
779:     while (nz--) {
780:       tmp[*vi-- + shift] -= (*v--)*tmp[i];
781:     }
782:   }

784:   /* copy tmp into x according to permutation */
785:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

787:   ISRestoreIndices(isrow,&rout);
788:   ISRestoreIndices(iscol,&cout);
789:   VecRestoreArray(bb,&b);
790:   VecRestoreArray(xx,&x);

792:   PetscLogFlops(2*a->nz);
793:   return(0);
794: }
795: /* ----------------------------------------------------------------*/
796: EXTERN int MatMissingDiagonal_SeqAIJ(Mat);

798: int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
799: {
800:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
801:   IS         isicol;
802:   int        *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
803:   int        *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
804:   int        *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
805:   int        incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
806:   PetscTruth col_identity,row_identity;
807:   PetscReal  f;
808: 
810:   if (info) {
811:     f             = info->fill;
812:     levels        = (int)info->levels;
813:     diagonal_fill = (int)info->diagonal_fill;
814:   } else {
815:     f             = 1.0;
816:     levels        = 0;
817:     diagonal_fill = 0;
818:   }
819:   if (!isrow) {
820:     ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);
821:   }
822:   if (!iscol) {
823:     ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);
824:   }

826:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

828:   /* special case that simply copies fill pattern */
829:   ISIdentity(isrow,&row_identity);
830:   ISIdentity(iscol,&col_identity);
831:   if (!levels && row_identity && col_identity) {
832:     MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
833:     (*fact)->factor = FACTOR_LU;
834:     b               = (Mat_SeqAIJ*)(*fact)->data;
835:     if (!b->diag) {
836:       MatMarkDiagonal_SeqAIJ(*fact);
837:     }
838:     MatMissingDiagonal_SeqAIJ(*fact);
839:     b->row              = isrow;
840:     b->col              = iscol;
841:     b->icol             = isicol;
842:     if (info) {
843:       b->lu_damp    = (PetscTruth)((int)info->damp);
844:       b->lu_damping = info->damping;
845:     } else {
846:       b->lu_damp    = PETSC_FALSE;
847:       b->lu_damping = 0.0;
848:     }
849:     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(Scalar),&b->solve_work);
850:     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
851:     ierr                = PetscObjectReference((PetscObject)isrow);
852:     ierr                = PetscObjectReference((PetscObject)iscol);
853:     return(0);
854:   }

856:   ISGetIndices(isrow,&r);
857:   ISGetIndices(isicol,&ic);

859:   /* get new row pointers */
860:   PetscMalloc((n+1)*sizeof(int),&ainew);
861:   ainew[0] = -shift;
862:   /* don't know how many column pointers are needed so estimate */
863:   jmax = (int)(f*(ai[n]+!shift));
864:   PetscMalloc((jmax)*sizeof(int),&ajnew);
865:   /* ajfill is level of fill for each fill entry */
866:   PetscMalloc((jmax)*sizeof(int),&ajfill);
867:   /* fill is a linked list of nonzeros in active row */
868:   PetscMalloc((n+1)*sizeof(int),&fill);
869:   /* im is level for each filled value */
870:   PetscMalloc((n+1)*sizeof(int),&im);
871:   /* dloc is location of diagonal in factor */
872:   PetscMalloc((n+1)*sizeof(int),&dloc);
873:   dloc[0]  = 0;
874:   for (prow=0; prow<n; prow++) {

876:     /* copy current row into linked list */
877:     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
878:     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
879:     xi      = aj + ai[r[prow]] + shift;
880:     fill[n]    = n;
881:     fill[prow] = -1; /* marker to indicate if diagonal exists */
882:     while (nz--) {
883:       fm  = n;
884:       idx = ic[*xi++ + shift];
885:       do {
886:         m  = fm;
887:         fm = fill[m];
888:       } while (fm < idx);
889:       fill[m]   = idx;
890:       fill[idx] = fm;
891:       im[idx]   = 0;
892:     }

894:     /* make sure diagonal entry is included */
895:     if (diagonal_fill && fill[prow] == -1) {
896:       fm = n;
897:       while (fill[fm] < prow) fm = fill[fm];
898:       fill[prow] = fill[fm]; /* insert diagonal into linked list */
899:       fill[fm]   = prow;
900:       im[prow]   = 0;
901:       nzf++;
902:       dcount++;
903:     }

905:     nzi = 0;
906:     row = fill[n];
907:     while (row < prow) {
908:       incrlev = im[row] + 1;
909:       nz      = dloc[row];
910:       xi      = ajnew  + ainew[row] + shift + nz + 1;
911:       flev    = ajfill + ainew[row] + shift + nz + 1;
912:       nnz     = ainew[row+1] - ainew[row] - nz - 1;
913:       fm      = row;
914:       while (nnz-- > 0) {
915:         idx = *xi++ + shift;
916:         if (*flev + incrlev > levels) {
917:           flev++;
918:           continue;
919:         }
920:         do {
921:           m  = fm;
922:           fm = fill[m];
923:         } while (fm < idx);
924:         if (fm != idx) {
925:           im[idx]   = *flev + incrlev;
926:           fill[m]   = idx;
927:           fill[idx] = fm;
928:           fm        = idx;
929:           nzf++;
930:         } else {
931:           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
932:         }
933:         flev++;
934:       }
935:       row = fill[row];
936:       nzi++;
937:     }
938:     /* copy new filled row into permanent storage */
939:     ainew[prow+1] = ainew[prow] + nzf;
940:     if (ainew[prow+1] > jmax-shift) {

942:       /* estimate how much additional space we will need */
943:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
944:       /* just double the memory each time */
945:       /*  maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
946:       int maxadd = jmax;
947:       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
948:       jmax += maxadd;

950:       /* allocate a longer ajnew and ajfill */
951:       ierr   = PetscMalloc(jmax*sizeof(int),&xi);
952:       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
953:       ierr   = PetscFree(ajnew);
954:       ajnew  = xi;
955:       ierr   = PetscMalloc(jmax*sizeof(int),&xi);
956:       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
957:       ierr   = PetscFree(ajfill);
958:       ajfill = xi;
959:       realloc++; /* count how many times we realloc */
960:     }
961:     xi          = ajnew + ainew[prow] + shift;
962:     flev        = ajfill + ainew[prow] + shift;
963:     dloc[prow]  = nzi;
964:     fm          = fill[n];
965:     while (nzf--) {
966:       *xi++   = fm - shift;
967:       *flev++ = im[fm];
968:       fm      = fill[fm];
969:     }
970:     /* make sure row has diagonal entry */
971:     if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
972:       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrixn
973:     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
974:     }
975:   }
976:   PetscFree(ajfill);
977:   ISRestoreIndices(isrow,&r);
978:   ISRestoreIndices(isicol,&ic);
979:   PetscFree(fill);
980:   PetscFree(im);

982:   {
983:     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
984:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
985:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use n",af);
986:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);n",af);
987:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.n");
988:     if (diagonal_fill) {
989:       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount);
990:     }
991:   }

993:   /* put together the new matrix */
994:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
995:   PetscLogObjectParent(*fact,isicol);
996:   b = (Mat_SeqAIJ*)(*fact)->data;
997:   PetscFree(b->imax);
998:   b->singlemalloc = PETSC_FALSE;
999:   len = (ainew[n] + shift)*sizeof(Scalar);
1000:   /* the next line frees the default space generated by the Create() */
1001:   PetscFree(b->a);
1002:   PetscFree(b->ilen);
1003:   PetscMalloc(len+1,&b->a);
1004:   b->j          = ajnew;
1005:   b->i          = ainew;
1006:   for (i=0; i<n; i++) dloc[i] += ainew[i];
1007:   b->diag       = dloc;
1008:   b->ilen       = 0;
1009:   b->imax       = 0;
1010:   b->row        = isrow;
1011:   b->col        = iscol;
1012:   ierr          = PetscObjectReference((PetscObject)isrow);
1013:   ierr          = PetscObjectReference((PetscObject)iscol);
1014:   b->icol       = isicol;
1015:   PetscMalloc((n+1)*sizeof(Scalar),&b->solve_work);
1016:   /* In b structure:  Free imax, ilen, old a, old j.  
1017:      Allocate dloc, solve_work, new a, new j */
1018:   PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
1019:   b->maxnz          = b->nz = ainew[n] + shift;
1020:   if (info) {
1021:     b->lu_damp    = (PetscTruth)((int)info->damp);
1022:     b->lu_damping = info->damping;
1023:   } else {
1024:     b->lu_damp    = PETSC_FALSE;
1025:     b->lu_damping = 0.0;
1026:   }
1027:   (*fact)->factor   = FACTOR_LU;

1029:   (*fact)->info.factor_mallocs    = realloc;
1030:   (*fact)->info.fill_ratio_given  = f;
1031:   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1032:   (*fact)->factor                 =  FACTOR_LU;

1034:   return(0);
1035: }