Actual source code: aijfact.c
2: #include src/mat/impls/aij/seq/aij.h
3: #include src/inline/dot.h
4: #include src/inline/spops.h
8: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
9: {
12: SETERRQ(PETSC_ERR_SUP,"Code not written");
13: #if !defined(PETSC_USE_DEBUG)
14: return(0);
15: #endif
16: }
19: EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
20: EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
22: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
23: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
24: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28: /* ------------------------------------------------------------
30: This interface was contribed by Tony Caola
32: This routine is an interface to the pivoting drop-tolerance
33: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
34: SPARSEKIT2.
36: The SPARSEKIT2 routines used here are covered by the GNU
37: copyright; see the file gnu in this directory.
39: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
40: help in getting this routine ironed out.
42: The major drawback to this routine is that if info->fill is
43: not large enough it fails rather than allocating more space;
44: this can be fixed by hacking/improving the f2c version of
45: Yousef Saad's code.
47: ------------------------------------------------------------
48: */
49: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
50: {
51: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
53: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
54: You can obtain the drop tolerance routines by installing PETSc from\n\
55: www.mcs.anl.gov/petsc\n");
56: #else
57: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
58: IS iscolf,isicol,isirow;
59: PetscTruth reorder;
60: PetscErrorCode ierr,sierr;
61: PetscInt *c,*r,*ic,i,n = A->m;
62: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
63: PetscInt *ordcol,*iwk,*iperm,*jw;
64: PetscInt jmax,lfill,job,*o_i,*o_j;
65: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
66: PetscReal af;
70: if (info->dt == PETSC_DEFAULT) info->dt = .005;
71: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
72: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
73: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
74: lfill = (PetscInt)(info->dtcount/2.0);
75: jmax = (PetscInt)(info->fill*a->nz);
78: /* ------------------------------------------------------------
79: If reorder=.TRUE., then the original matrix has to be
80: reordered to reflect the user selected ordering scheme, and
81: then de-reordered so it is in it's original format.
82: Because Saad's dperm() is NOT in place, we have to copy
83: the original matrix and allocate more storage. . .
84: ------------------------------------------------------------
85: */
87: /* set reorder to true if either isrow or iscol is not identity */
88: ISIdentity(isrow,&reorder);
89: if (reorder) {ISIdentity(iscol,&reorder);}
90: reorder = PetscNot(reorder);
92:
93: /* storage for ilu factor */
94: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
95: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
96: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
97: PetscMalloc(n*sizeof(PetscInt),&ordcol);
99: /* ------------------------------------------------------------
100: Make sure that everything is Fortran formatted (1-Based)
101: ------------------------------------------------------------
102: */
103: for (i=old_i[0];i<old_i[n];i++) {
104: old_j[i]++;
105: }
106: for(i=0;i<n+1;i++) {
107: old_i[i]++;
108: };
109:
111: if (reorder) {
112: ISGetIndices(iscol,&c);
113: ISGetIndices(isrow,&r);
114: for(i=0;i<n;i++) {
115: r[i] = r[i]+1;
116: c[i] = c[i]+1;
117: }
118: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
119: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
120: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
121: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
122: for (i=0;i<n;i++) {
123: r[i] = r[i]-1;
124: c[i] = c[i]-1;
125: }
126: ISRestoreIndices(iscol,&c);
127: ISRestoreIndices(isrow,&r);
128: o_a = old_a2;
129: o_j = old_j2;
130: o_i = old_i2;
131: } else {
132: o_a = old_a;
133: o_j = old_j;
134: o_i = old_i;
135: }
137: /* ------------------------------------------------------------
138: Call Saad's ilutp() routine to generate the factorization
139: ------------------------------------------------------------
140: */
142: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
143: PetscMalloc(2*n*sizeof(PetscInt),&jw);
144: PetscMalloc(n*sizeof(PetscScalar),&w);
146: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
147: if (sierr) {
148: switch (sierr) {
149: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
150: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
151: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
152: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
153: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
154: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
155: }
156: }
158: PetscFree(w);
159: PetscFree(jw);
161: /* ------------------------------------------------------------
162: Saad's routine gives the result in Modified Sparse Row (msr)
163: Convert to Compressed Sparse Row format (csr)
164: ------------------------------------------------------------
165: */
167: PetscMalloc(n*sizeof(PetscScalar),&wk);
168: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
170: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
172: PetscFree(iwk);
173: PetscFree(wk);
175: if (reorder) {
176: PetscFree(old_a2);
177: PetscFree(old_j2);
178: PetscFree(old_i2);
179: } else {
180: /* fix permutation of old_j that the factorization introduced */
181: for (i=old_i[0]; i<old_i[n]; i++) {
182: old_j[i-1] = iperm[old_j[i-1]-1];
183: }
184: }
186: /* get rid of the shift to indices starting at 1 */
187: for (i=0; i<n+1; i++) {
188: old_i[i]--;
189: }
190: for (i=old_i[0];i<old_i[n];i++) {
191: old_j[i]--;
192: }
193:
194: /* Make the factored matrix 0-based */
195: for (i=0; i<n+1; i++) {
196: new_i[i]--;
197: }
198: for (i=new_i[0];i<new_i[n];i++) {
199: new_j[i]--;
200: }
202: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203: /*-- permute the right-hand-side and solution vectors --*/
204: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
205: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
206: ISGetIndices(isicol,&ic);
207: for(i=0; i<n; i++) {
208: ordcol[i] = ic[iperm[i]-1];
209: };
210: ISRestoreIndices(isicol,&ic);
211: ISDestroy(isicol);
213: PetscFree(iperm);
215: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
216: PetscFree(ordcol);
218: /*----- put together the new matrix -----*/
220: MatCreate(A->comm,n,n,n,n,fact);
221: MatSetType(*fact,A->type_name);
222: MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);
223: (*fact)->factor = FACTOR_LU;
224: (*fact)->assembled = PETSC_TRUE;
226: b = (Mat_SeqAIJ*)(*fact)->data;
227: PetscFree(b->imax);
228: b->sorted = PETSC_FALSE;
229: b->singlemalloc = PETSC_FALSE;
230: /* the next line frees the default space generated by the MatCreate() */
231: PetscFree(b->a);
232: PetscFree(b->ilen);
233: b->a = new_a;
234: b->j = new_j;
235: b->i = new_i;
236: b->ilen = 0;
237: b->imax = 0;
238: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239: b->row = isirow;
240: b->col = iscolf;
241: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
242: b->maxnz = b->nz = new_i[n];
243: MatMarkDiagonal_SeqAIJ(*fact);
244: (*fact)->info.factor_mallocs = 0;
246: MatMarkDiagonal_SeqAIJ(A);
248: /* check out for identical nodes. If found, use inode functions */
249: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
251: af = ((double)b->nz)/((double)a->nz) + .001;
252: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
257: return(0);
258: #endif
259: }
261: /*
262: Factorization code for AIJ format.
263: */
266: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267: {
268: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
269: IS isicol;
271: PetscInt *r,*ic,i,n = A->m,*ai = a->i,*aj = a->j;
272: PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
273: PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im;
274: PetscReal f;
277: if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
278: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
279: ISGetIndices(isrow,&r);
280: ISGetIndices(isicol,&ic);
282: /* get new row pointers */
283: PetscMalloc((n+1)*sizeof(PetscInt),&ainew);
284: ainew[0] = 0;
285: /* don't know how many column pointers are needed so estimate */
286: f = info->fill;
287: jmax = (PetscInt)(f*ai[n]+1);
288: PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);
289: /* fill is a linked list of nonzeros in active row */
290: PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);
291: im = fill + n + 1;
292: /* idnew is location of diagonal in factor */
293: PetscMalloc((n+1)*sizeof(PetscInt),&idnew);
294: idnew[0] = 0;
296: for (i=0; i<n; i++) {
297: /* first copy previous fill into linked list */
298: nnz = nz = ai[r[i]+1] - ai[r[i]];
299: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
300: ajtmp = aj + ai[r[i]];
301: fill[n] = n;
302: while (nz--) {
303: fm = n;
304: idx = ic[*ajtmp++];
305: do {
306: m = fm;
307: fm = fill[m];
308: } while (fm < idx);
309: fill[m] = idx;
310: fill[idx] = fm;
311: }
312: row = fill[n];
313: while (row < i) {
314: ajtmp = ajnew + idnew[row] + 1;
315: nzbd = 1 + idnew[row] - ainew[row];
316: nz = im[row] - nzbd;
317: fm = row;
318: while (nz-- > 0) {
319: idx = *ajtmp++ ;
320: nzbd++;
321: if (idx == i) im[row] = nzbd;
322: do {
323: m = fm;
324: fm = fill[m];
325: } while (fm < idx);
326: if (fm != idx) {
327: fill[m] = idx;
328: fill[idx] = fm;
329: fm = idx;
330: nnz++;
331: }
332: }
333: row = fill[row];
334: }
335: /* copy new filled row into permanent storage */
336: ainew[i+1] = ainew[i] + nnz;
337: if (ainew[i+1] > jmax) {
339: /* estimate how much additional space we will need */
340: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
341: /* just double the memory each time */
342: PetscInt maxadd = jmax;
343: /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
344: if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
345: jmax += maxadd;
347: /* allocate a longer ajnew */
348: PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);
349: PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(PetscInt));
350: PetscFree(ajnew);
351: ajnew = ajtmp;
352: reallocs++; /* count how many times we realloc */
353: }
354: ajtmp = ajnew + ainew[i];
355: fm = fill[n];
356: nzi = 0;
357: im[i] = nnz;
358: while (nnz--) {
359: if (fm < i) nzi++;
360: *ajtmp++ = fm ;
361: fm = fill[fm];
362: }
363: idnew[i] = ainew[i] + nzi;
364: }
365: if (ai[n] != 0) {
366: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
367: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
368: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
369: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
370: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
371: } else {
372: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
373: }
375: ISRestoreIndices(isrow,&r);
376: ISRestoreIndices(isicol,&ic);
378: PetscFree(fill);
380: /* put together the new matrix */
381: MatCreate(A->comm,n,n,n,n,B);
382: MatSetType(*B,A->type_name);
383: MatSeqAIJSetPreallocation(*B,0,PETSC_NULL);
384: PetscLogObjectParent(*B,isicol);
385: b = (Mat_SeqAIJ*)(*B)->data;
386: PetscFree(b->imax);
387: b->singlemalloc = PETSC_FALSE;
388: /* the next line frees the default space generated by the Create() */
389: PetscFree(b->a);
390: PetscFree(b->ilen);
391: PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);
392: b->j = ajnew;
393: b->i = ainew;
394: b->diag = idnew;
395: b->ilen = 0;
396: b->imax = 0;
397: b->row = isrow;
398: b->col = iscol;
399: b->lu_damping = info->damping;
400: b->lu_zeropivot = info->zeropivot;
401: b->lu_shift = info->shift;
402: b->lu_shift_fraction = info->shift_fraction;
403: PetscObjectReference((PetscObject)isrow);
404: PetscObjectReference((PetscObject)iscol);
405: b->icol = isicol;
406: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
407: /* In b structure: Free imax, ilen, old a, old j.
408: Allocate idnew, solve_work, new a, new j */
409: PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
410: b->maxnz = b->nz = ainew[n] ;
412: (*B)->factor = FACTOR_LU;
413: (*B)->info.factor_mallocs = reallocs;
414: (*B)->info.fill_ratio_given = f;
415: Mat_AIJ_CheckInode(*B,PETSC_FALSE);
416: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
418: if (ai[n] != 0) {
419: (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
420: } else {
421: (*B)->info.fill_ratio_needed = 0.0;
422: }
423: return(0);
424: }
425: /* ----------------------------------------------------------- */
426: EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
430: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
431: {
432: Mat C = *B;
433: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
434: IS isrow = b->row,isicol = b->icol;
436: PetscInt *r,*ic,i,j,n = A->m,*ai = b->i,*aj = b->j;
437: PetscInt *ajtmpold,*ajtmp,nz,row,*ics;
438: PetscInt *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
439: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
440: PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
441: PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
442: PetscTruth damp,lushift;
445: ISGetIndices(isrow,&r);
446: ISGetIndices(isicol,&ic);
447: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
448: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
449: rtmps = rtmp; ics = ic;
451: if (!a->diag) {
452: MatMarkDiagonal_SeqAIJ(A);
453: }
455: if (b->lu_shift) { /* set max shift */
456: PetscInt *aai = a->i,*ddiag = a->diag;
457: shift_top = 0;
458: for (i=0; i<n; i++) {
459: d = PetscAbsScalar((a->a)[ddiag[i]]);
460: /* calculate amt of shift needed for this row */
461: if (d<=0) {
462: row_shift = 0;
463: } else {
464: row_shift = -2*d;
465: }
466: v = a->a+aai[i];
467: for (j=0; j<aai[i+1]-aai[i]; j++)
468: row_shift += PetscAbsScalar(v[j]);
469: if (row_shift>shift_top) shift_top = row_shift;
470: }
471: }
473: shift_fraction = 0; shift_amount = 0;
474: do {
475: damp = PETSC_FALSE;
476: lushift = PETSC_FALSE;
477: for (i=0; i<n; i++) {
478: nz = ai[i+1] - ai[i];
479: ajtmp = aj + ai[i];
480: for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
482: /* load in initial (unfactored row) */
483: nz = a->i[r[i]+1] - a->i[r[i]];
484: ajtmpold = a->j + a->i[r[i]];
485: v = a->a + a->i[r[i]];
486: for (j=0; j<nz; j++) {
487: rtmp[ics[ajtmpold[j]]] = v[j];
488: }
489: rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
491: row = *ajtmp++ ;
492: while (row < i) {
493: pc = rtmp + row;
494: if (*pc != 0.0) {
495: pv = b->a + diag_offset[row];
496: pj = b->j + diag_offset[row] + 1;
497: multiplier = *pc / *pv++;
498: *pc = multiplier;
499: nz = ai[row+1] - diag_offset[row] - 1;
500: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
501: PetscLogFlops(2*nz);
502: }
503: row = *ajtmp++;
504: }
505: /* finished row so stick it into b->a */
506: pv = b->a + ai[i] ;
507: pj = b->j + ai[i] ;
508: nz = ai[i+1] - ai[i];
509: diag = diag_offset[i] - ai[i];
510: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
511: rs = 0.0;
512: for (j=0; j<nz; j++) {
513: pv[j] = rtmps[pj[j]];
514: if (j != diag) rs += PetscAbsScalar(pv[j]);
515: }
516: #define MAX_NSHIFT 5
517: if (PetscRealPart(pv[diag]) <= zeropivot*rs && b->lu_shift) {
518: if (nshift>MAX_NSHIFT) {
519: SETERRQ(PETSC_ERR_CONV_FAILED,"Unable to determine shift to enforce positive definite preconditioner");
520: } else if (nshift==MAX_NSHIFT) {
521: shift_fraction = shift_hi;
522: lushift = PETSC_FALSE;
523: } else {
524: shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
525: lushift = PETSC_TRUE;
526: }
527: shift_amount = shift_fraction * shift_top;
528: nshift++;
529: break;
530: }
531: if (PetscAbsScalar(pv[diag]) <= zeropivot*rs) {
532: if (damping) {
533: if (ndamp) damping *= 2.0;
534: damp = PETSC_TRUE;
535: ndamp++;
536: break;
537: } else {
538: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
539: }
540: }
541: }
542: if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
543: /*
544: * if no shift in this attempt & shifting & started shifting & can refine,
545: * then try lower shift
546: */
547: shift_hi = shift_fraction;
548: shift_fraction = (shift_hi+shift_lo)/2.;
549: shift_amount = shift_fraction * shift_top;
550: lushift = PETSC_TRUE;
551: nshift++;
552: }
553: } while (damp || lushift);
555: /* invert diagonal entries for simplier triangular solves */
556: for (i=0; i<n; i++) {
557: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
558: }
560: PetscFree(rtmp);
561: ISRestoreIndices(isicol,&ic);
562: ISRestoreIndices(isrow,&r);
563: C->factor = FACTOR_LU;
564: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
565: C->assembled = PETSC_TRUE;
566: PetscLogFlops(C->n);
567: if (ndamp) {
568: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %D damping value %g\n",ndamp,damping);
569: }
570: if (nshift) {
571: b->lu_shift_fraction = shift_fraction;
572: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction top_value %e number shifts %D\n",shift_fraction,shift_top,nshift);
573: }
574: return(0);
575: }
579: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
580: {
582: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
583: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
584: return(0);
585: }
588: /* ----------------------------------------------------------- */
591: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
592: {
594: Mat C;
597: MatLUFactorSymbolic(A,row,col,info,&C);
598: MatLUFactorNumeric(A,&C);
599: MatHeaderCopy(A,C);
600: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
601: return(0);
602: }
603: /* ----------------------------------------------------------- */
606: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
607: {
608: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
609: IS iscol = a->col,isrow = a->row;
611: PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
612: PetscInt nz,*rout,*cout;
613: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
616: if (!n) return(0);
618: VecGetArray(bb,&b);
619: VecGetArray(xx,&x);
620: tmp = a->solve_work;
622: ISGetIndices(isrow,&rout); r = rout;
623: ISGetIndices(iscol,&cout); c = cout + (n-1);
625: /* forward solve the lower triangular */
626: tmp[0] = b[*r++];
627: tmps = tmp;
628: for (i=1; i<n; i++) {
629: v = aa + ai[i] ;
630: vi = aj + ai[i] ;
631: nz = a->diag[i] - ai[i];
632: sum = b[*r++];
633: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
634: tmp[i] = sum;
635: }
637: /* backward solve the upper triangular */
638: for (i=n-1; i>=0; i--){
639: v = aa + a->diag[i] + 1;
640: vi = aj + a->diag[i] + 1;
641: nz = ai[i+1] - a->diag[i] - 1;
642: sum = tmp[i];
643: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
644: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
645: }
647: ISRestoreIndices(isrow,&rout);
648: ISRestoreIndices(iscol,&cout);
649: VecRestoreArray(bb,&b);
650: VecRestoreArray(xx,&x);
651: PetscLogFlops(2*a->nz - A->n);
652: return(0);
653: }
655: /* ----------------------------------------------------------- */
658: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
659: {
660: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
662: PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
663: PetscScalar *x,*b,*aa = a->a;
664: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
665: PetscInt adiag_i,i,*vi,nz,ai_i;
666: PetscScalar *v,sum;
667: #endif
670: if (!n) return(0);
672: VecGetArray(bb,&b);
673: VecGetArray(xx,&x);
675: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
676: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
677: #else
678: /* forward solve the lower triangular */
679: x[0] = b[0];
680: for (i=1; i<n; i++) {
681: ai_i = ai[i];
682: v = aa + ai_i;
683: vi = aj + ai_i;
684: nz = adiag[i] - ai_i;
685: sum = b[i];
686: while (nz--) sum -= *v++ * x[*vi++];
687: x[i] = sum;
688: }
690: /* backward solve the upper triangular */
691: for (i=n-1; i>=0; i--){
692: adiag_i = adiag[i];
693: v = aa + adiag_i + 1;
694: vi = aj + adiag_i + 1;
695: nz = ai[i+1] - adiag_i - 1;
696: sum = x[i];
697: while (nz--) sum -= *v++ * x[*vi++];
698: x[i] = sum*aa[adiag_i];
699: }
700: #endif
701: PetscLogFlops(2*a->nz - A->n);
702: VecRestoreArray(bb,&b);
703: VecRestoreArray(xx,&x);
704: return(0);
705: }
709: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
710: {
711: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
712: IS iscol = a->col,isrow = a->row;
714: PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
715: PetscInt nz,*rout,*cout;
716: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
719: if (yy != xx) {VecCopy(yy,xx);}
721: VecGetArray(bb,&b);
722: VecGetArray(xx,&x);
723: tmp = a->solve_work;
725: ISGetIndices(isrow,&rout); r = rout;
726: ISGetIndices(iscol,&cout); c = cout + (n-1);
728: /* forward solve the lower triangular */
729: tmp[0] = b[*r++];
730: for (i=1; i<n; i++) {
731: v = aa + ai[i] ;
732: vi = aj + ai[i] ;
733: nz = a->diag[i] - ai[i];
734: sum = b[*r++];
735: while (nz--) sum -= *v++ * tmp[*vi++ ];
736: tmp[i] = sum;
737: }
739: /* backward solve the upper triangular */
740: for (i=n-1; i>=0; i--){
741: v = aa + a->diag[i] + 1;
742: vi = aj + a->diag[i] + 1;
743: nz = ai[i+1] - a->diag[i] - 1;
744: sum = tmp[i];
745: while (nz--) sum -= *v++ * tmp[*vi++ ];
746: tmp[i] = sum*aa[a->diag[i]];
747: x[*c--] += tmp[i];
748: }
750: ISRestoreIndices(isrow,&rout);
751: ISRestoreIndices(iscol,&cout);
752: VecRestoreArray(bb,&b);
753: VecRestoreArray(xx,&x);
754: PetscLogFlops(2*a->nz);
756: return(0);
757: }
758: /* -------------------------------------------------------------------*/
761: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
762: {
763: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
764: IS iscol = a->col,isrow = a->row;
766: PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
767: PetscInt nz,*rout,*cout,*diag = a->diag;
768: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
771: VecGetArray(bb,&b);
772: VecGetArray(xx,&x);
773: tmp = a->solve_work;
775: ISGetIndices(isrow,&rout); r = rout;
776: ISGetIndices(iscol,&cout); c = cout;
778: /* copy the b into temp work space according to permutation */
779: for (i=0; i<n; i++) tmp[i] = b[c[i]];
781: /* forward solve the U^T */
782: for (i=0; i<n; i++) {
783: v = aa + diag[i] ;
784: vi = aj + diag[i] + 1;
785: nz = ai[i+1] - diag[i] - 1;
786: s1 = tmp[i];
787: s1 *= (*v++); /* multiply by inverse of diagonal entry */
788: while (nz--) {
789: tmp[*vi++ ] -= (*v++)*s1;
790: }
791: tmp[i] = s1;
792: }
794: /* backward solve the L^T */
795: for (i=n-1; i>=0; i--){
796: v = aa + diag[i] - 1 ;
797: vi = aj + diag[i] - 1 ;
798: nz = diag[i] - ai[i];
799: s1 = tmp[i];
800: while (nz--) {
801: tmp[*vi-- ] -= (*v--)*s1;
802: }
803: }
805: /* copy tmp into x according to permutation */
806: for (i=0; i<n; i++) x[r[i]] = tmp[i];
808: ISRestoreIndices(isrow,&rout);
809: ISRestoreIndices(iscol,&cout);
810: VecRestoreArray(bb,&b);
811: VecRestoreArray(xx,&x);
813: PetscLogFlops(2*a->nz-A->n);
814: return(0);
815: }
819: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
820: {
821: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
822: IS iscol = a->col,isrow = a->row;
824: PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
825: PetscInt nz,*rout,*cout,*diag = a->diag;
826: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
829: if (zz != xx) {VecCopy(zz,xx);}
831: VecGetArray(bb,&b);
832: VecGetArray(xx,&x);
833: tmp = a->solve_work;
835: ISGetIndices(isrow,&rout); r = rout;
836: ISGetIndices(iscol,&cout); c = cout;
838: /* copy the b into temp work space according to permutation */
839: for (i=0; i<n; i++) tmp[i] = b[c[i]];
841: /* forward solve the U^T */
842: for (i=0; i<n; i++) {
843: v = aa + diag[i] ;
844: vi = aj + diag[i] + 1;
845: nz = ai[i+1] - diag[i] - 1;
846: tmp[i] *= *v++;
847: while (nz--) {
848: tmp[*vi++ ] -= (*v++)*tmp[i];
849: }
850: }
852: /* backward solve the L^T */
853: for (i=n-1; i>=0; i--){
854: v = aa + diag[i] - 1 ;
855: vi = aj + diag[i] - 1 ;
856: nz = diag[i] - ai[i];
857: while (nz--) {
858: tmp[*vi-- ] -= (*v--)*tmp[i];
859: }
860: }
862: /* copy tmp into x according to permutation */
863: for (i=0; i<n; i++) x[r[i]] += tmp[i];
865: ISRestoreIndices(isrow,&rout);
866: ISRestoreIndices(iscol,&cout);
867: VecRestoreArray(bb,&b);
868: VecRestoreArray(xx,&x);
870: PetscLogFlops(2*a->nz);
871: return(0);
872: }
873: /* ----------------------------------------------------------------*/
874: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
878: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
879: {
880: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
881: IS isicol;
883: PetscInt *r,*ic,prow,n = A->m,*ai = a->i,*aj = a->j;
884: PetscInt *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
885: PetscInt *dloc,idx,row,m,fm,nzf,nzi,len, reallocs = 0,dcount = 0;
886: PetscInt incrlev,nnz,i,levels,diagonal_fill;
887: PetscTruth col_identity,row_identity;
888: PetscReal f;
889:
891: f = info->fill;
892: levels = (PetscInt)info->levels;
893: diagonal_fill = (PetscInt)info->diagonal_fill;
894: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
896: /* special case that simply copies fill pattern */
897: ISIdentity(isrow,&row_identity);
898: ISIdentity(iscol,&col_identity);
899: if (!levels && row_identity && col_identity) {
900: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
901: (*fact)->factor = FACTOR_LU;
902: b = (Mat_SeqAIJ*)(*fact)->data;
903: if (!b->diag) {
904: MatMarkDiagonal_SeqAIJ(*fact);
905: }
906: MatMissingDiagonal_SeqAIJ(*fact);
907: b->row = isrow;
908: b->col = iscol;
909: b->icol = isicol;
910: b->lu_damping = info->damping;
911: b->lu_zeropivot = info->zeropivot;
912: b->lu_shift = info->shift;
913: b->lu_shift_fraction= info->shift_fraction;
914: PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
915: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
916: PetscObjectReference((PetscObject)isrow);
917: PetscObjectReference((PetscObject)iscol);
918: return(0);
919: }
921: ISGetIndices(isrow,&r);
922: ISGetIndices(isicol,&ic);
924: /* get new row pointers */
925: PetscMalloc((n+1)*sizeof(PetscInt),&ainew);
926: ainew[0] = 0;
927: /* don't know how many column pointers are needed so estimate */
928: jmax = (PetscInt)(f*(ai[n]+1));
929: PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);
930: /* ajfill is level of fill for each fill entry */
931: PetscMalloc((jmax)*sizeof(PetscInt),&ajfill);
932: /* fill is a linked list of nonzeros in active row */
933: PetscMalloc((n+1)*sizeof(PetscInt),&fill);
934: /* im is level for each filled value */
935: PetscMalloc((n+1)*sizeof(PetscInt),&im);
936: /* dloc is location of diagonal in factor */
937: PetscMalloc((n+1)*sizeof(PetscInt),&dloc);
938: dloc[0] = 0;
939: for (prow=0; prow<n; prow++) {
941: /* copy current row into linked list */
942: nzf = nz = ai[r[prow]+1] - ai[r[prow]];
943: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
944: xi = aj + ai[r[prow]] ;
945: fill[n] = n;
946: fill[prow] = -1; /* marker to indicate if diagonal exists */
947: while (nz--) {
948: fm = n;
949: idx = ic[*xi++ ];
950: do {
951: m = fm;
952: fm = fill[m];
953: } while (fm < idx);
954: fill[m] = idx;
955: fill[idx] = fm;
956: im[idx] = 0;
957: }
959: /* make sure diagonal entry is included */
960: if (diagonal_fill && fill[prow] == -1) {
961: fm = n;
962: while (fill[fm] < prow) fm = fill[fm];
963: fill[prow] = fill[fm]; /* insert diagonal into linked list */
964: fill[fm] = prow;
965: im[prow] = 0;
966: nzf++;
967: dcount++;
968: }
970: nzi = 0;
971: row = fill[n];
972: while (row < prow) {
973: incrlev = im[row] + 1;
974: nz = dloc[row];
975: xi = ajnew + ainew[row] + nz + 1;
976: flev = ajfill + ainew[row] + nz + 1;
977: nnz = ainew[row+1] - ainew[row] - nz - 1;
978: fm = row;
979: while (nnz-- > 0) {
980: idx = *xi++ ;
981: if (*flev + incrlev > levels) {
982: flev++;
983: continue;
984: }
985: do {
986: m = fm;
987: fm = fill[m];
988: } while (fm < idx);
989: if (fm != idx) {
990: im[idx] = *flev + incrlev;
991: fill[m] = idx;
992: fill[idx] = fm;
993: fm = idx;
994: nzf++;
995: } else {
996: if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
997: }
998: flev++;
999: }
1000: row = fill[row];
1001: nzi++;
1002: }
1003: /* copy new filled row into permanent storage */
1004: ainew[prow+1] = ainew[prow] + nzf;
1005: if (ainew[prow+1] > jmax) {
1007: /* estimate how much additional space we will need */
1008: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1009: /* just double the memory each time */
1010: /* maxadd = (PetscInt)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1011: PetscInt maxadd = jmax;
1012: if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1013: jmax += maxadd;
1015: /* allocate a longer ajnew and ajfill */
1016: PetscMalloc(jmax*sizeof(PetscInt),&xi);
1017: PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(PetscInt));
1018: PetscFree(ajnew);
1019: ajnew = xi;
1020: PetscMalloc(jmax*sizeof(PetscInt),&xi);
1021: PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(PetscInt));
1022: PetscFree(ajfill);
1023: ajfill = xi;
1024: reallocs++; /* count how many times we realloc */
1025: }
1026: xi = ajnew + ainew[prow] ;
1027: flev = ajfill + ainew[prow] ;
1028: dloc[prow] = nzi;
1029: fm = fill[n];
1030: while (nzf--) {
1031: *xi++ = fm ;
1032: *flev++ = im[fm];
1033: fm = fill[fm];
1034: }
1035: /* make sure row has diagonal entry */
1036: if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1037: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1038: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1039: }
1040: }
1041: PetscFree(ajfill);
1042: ISRestoreIndices(isrow,&r);
1043: ISRestoreIndices(isicol,&ic);
1044: PetscFree(fill);
1045: PetscFree(im);
1047: {
1048: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1049: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af);
1050: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1051: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1052: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1053: if (diagonal_fill) {
1054: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount);
1055: }
1056: }
1058: /* put together the new matrix */
1059: MatCreate(A->comm,n,n,n,n,fact);
1060: MatSetType(*fact,A->type_name);
1061: MatSeqAIJSetPreallocation(*fact,0,PETSC_NULL);
1062: PetscLogObjectParent(*fact,isicol);
1063: b = (Mat_SeqAIJ*)(*fact)->data;
1064: PetscFree(b->imax);
1065: b->singlemalloc = PETSC_FALSE;
1066: len = (ainew[n] )*sizeof(PetscScalar);
1067: /* the next line frees the default space generated by the Create() */
1068: PetscFree(b->a);
1069: PetscFree(b->ilen);
1070: PetscMalloc(len+1,&b->a);
1071: b->j = ajnew;
1072: b->i = ainew;
1073: for (i=0; i<n; i++) dloc[i] += ainew[i];
1074: b->diag = dloc;
1075: b->ilen = 0;
1076: b->imax = 0;
1077: b->row = isrow;
1078: b->col = iscol;
1079: PetscObjectReference((PetscObject)isrow);
1080: PetscObjectReference((PetscObject)iscol);
1081: b->icol = isicol;
1082: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1083: /* In b structure: Free imax, ilen, old a, old j.
1084: Allocate dloc, solve_work, new a, new j */
1085: PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1086: b->maxnz = b->nz = ainew[n] ;
1087: b->lu_damping = info->damping;
1088: b->lu_shift = info->shift;
1089: b->lu_shift_fraction = info->shift_fraction;
1090: b->lu_zeropivot = info->zeropivot;
1091: (*fact)->factor = FACTOR_LU;
1092: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
1093: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1095: (*fact)->info.factor_mallocs = reallocs;
1096: (*fact)->info.fill_ratio_given = f;
1097: (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1098: return(0);
1099: }
1103: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1104: {
1105: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1106: PetscErrorCode ierr,(*f)(Mat,Mat*);
1109: if (!a->sbaijMat){
1110: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1111: }
1112: PetscObjectQueryFunction((PetscObject) *fact,"MatCholeskyFactorNumeric",(void (**)(void))&f);
1113: (*f)(a->sbaijMat,fact);
1114: MatDestroy(a->sbaijMat);
1115: a->sbaijMat = PETSC_NULL;
1116: return(0);
1117: }
1121: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1122: {
1123: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1125: PetscTruth perm_identity;
1126:
1128: ISIdentity(perm,&perm_identity);
1129: if (!perm_identity){
1130: SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet");
1131: }
1132: if (!a->sbaijMat){
1133: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1134: }
1136: MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);
1137: PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);
1138: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1139:
1140: return(0);
1141: }
1145: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1146: {
1147: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1149: PetscTruth perm_identity;
1150:
1152: ISIdentity(perm,&perm_identity);
1153: if (!perm_identity){
1154: SETERRQ(PETSC_ERR_SUP,"Non-identity permutation is not supported yet");
1155: }
1156: if (!a->sbaijMat){
1157: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1158: }
1159: MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);
1160: PetscObjectComposeFunction((PetscObject) *fact,"MatCholeskyFactorNumeric","dummyname",(FCNVOID)(*fact)->ops->choleskyfactornumeric);
1161: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1162: return(0);
1163: }