Actual source code: aijfact.c
1: /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/vec/vecimpl.h
5: #include src/inline/dot.h
6: #include src/inline/spops.h
10: int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
11: {
14: SETERRQ(PETSC_ERR_SUP,"Code not written");
15: #if !defined(PETSC_USE_DEBUG)
16: return(0);
17: #endif
18: }
21: EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
22: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
24: EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
25: EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
26: EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);
30: /* ------------------------------------------------------------
32: This interface was contribed by Tony Caola
34: This routine is an interface to the pivoting drop-tolerance
35: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36: SPARSEKIT2.
38: The SPARSEKIT2 routines used here are covered by the GNU
39: copyright; see the file gnu in this directory.
41: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42: help in getting this routine ironed out.
44: The major drawback to this routine is that if info->fill is
45: not large enough it fails rather than allocating more space;
46: this can be fixed by hacking/improving the f2c version of
47: Yousef Saad's code.
49: ------------------------------------------------------------
50: */
51: int MatILUDTFactor_SeqAIJ(Mat A,MatFactorInfo *info,IS isrow,IS iscol,Mat *fact)
52: {
53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
55: SETERRQ(1,"This distribution does not include GNU Copyright code\n\
56: You can obtain the drop tolerance routines by installing PETSc from\n\
57: www.mcs.anl.gov/petsc\n");
58: #else
59: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
60: IS iscolf,isicol,isirow;
61: PetscTruth reorder;
62: int *c,*r,*ic,ierr,i,n = A->m;
63: int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
64: int *ordcol,*iwk,*iperm,*jw;
65: int jmax,lfill,job,*o_i,*o_j;
66: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
67: PetscReal permtol,af;
71: if (info->dt == PETSC_DEFAULT) info->dt = .005;
72: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
73: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
74: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
75: lfill = (int)(info->dtcount/2.0);
76: jmax = (int)(info->fill*a->nz);
77: permtol = info->dtcol;
80: /* ------------------------------------------------------------
81: If reorder=.TRUE., then the original matrix has to be
82: reordered to reflect the user selected ordering scheme, and
83: then de-reordered so it is in it's original format.
84: Because Saad's dperm() is NOT in place, we have to copy
85: the original matrix and allocate more storage. . .
86: ------------------------------------------------------------
87: */
89: /* set reorder to true if either isrow or iscol is not identity */
90: ISIdentity(isrow,&reorder);
91: if (reorder) {ISIdentity(iscol,&reorder);}
92: reorder = PetscNot(reorder);
94:
95: /* storage for ilu factor */
96: PetscMalloc((n+1)*sizeof(int),&new_i);
97: PetscMalloc(jmax*sizeof(int),&new_j);
98: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
99: PetscMalloc(n*sizeof(int),&ordcol);
101: /* ------------------------------------------------------------
102: Make sure that everything is Fortran formatted (1-Based)
103: ------------------------------------------------------------
104: */
105: for (i=old_i[0];i<old_i[n];i++) {
106: old_j[i]++;
107: }
108: for(i=0;i<n+1;i++) {
109: old_i[i]++;
110: };
111:
113: if (reorder) {
114: ISGetIndices(iscol,&c);
115: ISGetIndices(isrow,&r);
116: for(i=0;i<n;i++) {
117: r[i] = r[i]+1;
118: c[i] = c[i]+1;
119: }
120: PetscMalloc((n+1)*sizeof(int),&old_i2);
121: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);
122: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
123: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124: for (i=0;i<n;i++) {
125: r[i] = r[i]-1;
126: c[i] = c[i]-1;
127: }
128: ISRestoreIndices(iscol,&c);
129: ISRestoreIndices(isrow,&r);
130: o_a = old_a2;
131: o_j = old_j2;
132: o_i = old_i2;
133: } else {
134: o_a = old_a;
135: o_j = old_j;
136: o_i = old_i;
137: }
139: /* ------------------------------------------------------------
140: Call Saad's ilutp() routine to generate the factorization
141: ------------------------------------------------------------
142: */
144: PetscMalloc(2*n*sizeof(int),&iperm);
145: PetscMalloc(2*n*sizeof(int),&jw);
146: PetscMalloc(n*sizeof(PetscScalar),&w);
148: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
149: if (ierr) {
150: switch (ierr) {
151: case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
152: case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
153: case -5: SETERRQ(1,"ilutp(), zero row encountered");
154: case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
155: case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
156: default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
157: }
158: }
160: PetscFree(w);
161: PetscFree(jw);
163: /* ------------------------------------------------------------
164: Saad's routine gives the result in Modified Sparse Row (msr)
165: Convert to Compressed Sparse Row format (csr)
166: ------------------------------------------------------------
167: */
169: PetscMalloc(n*sizeof(PetscScalar),&wk);
170: PetscMalloc((n+1)*sizeof(int),&iwk);
172: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
174: PetscFree(iwk);
175: PetscFree(wk);
177: if (reorder) {
178: PetscFree(old_a2);
179: PetscFree(old_j2);
180: PetscFree(old_i2);
181: } else {
182: /* fix permutation of old_j that the factorization introduced */
183: for (i=old_i[0]; i<old_i[n]; i++) {
184: old_j[i-1] = iperm[old_j[i-1]-1];
185: }
186: }
188: /* get rid of the shift to indices starting at 1 */
189: for (i=0; i<n+1; i++) {
190: old_i[i]--;
191: }
192: for (i=old_i[0];i<old_i[n];i++) {
193: old_j[i]--;
194: }
195:
196: /* Make the factored matrix 0-based */
197: for (i=0; i<n+1; i++) {
198: new_i[i]--;
199: }
200: for (i=new_i[0];i<new_i[n];i++) {
201: new_j[i]--;
202: }
204: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205: /*-- permute the right-hand-side and solution vectors --*/
206: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
208: ISGetIndices(isicol,&ic);
209: for(i=0; i<n; i++) {
210: ordcol[i] = ic[iperm[i]-1];
211: };
212: ISRestoreIndices(isicol,&ic);
213: ISDestroy(isicol);
215: PetscFree(iperm);
217: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
218: PetscFree(ordcol);
220: /*----- put together the new matrix -----*/
222: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
223: (*fact)->factor = FACTOR_LU;
224: (*fact)->assembled = PETSC_TRUE;
226: b = (Mat_SeqAIJ*)(*fact)->data;
227: PetscFree(b->imax);
228: b->sorted = PETSC_FALSE;
229: b->singlemalloc = PETSC_FALSE;
230: /* the next line frees the default space generated by the MatCreate() */
231: PetscFree(b->a);
232: PetscFree(b->ilen);
233: b->a = new_a;
234: b->j = new_j;
235: b->i = new_i;
236: b->ilen = 0;
237: b->imax = 0;
238: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239: b->row = isirow;
240: b->col = iscolf;
241: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
242: b->maxnz = b->nz = new_i[n];
243: MatMarkDiagonal_SeqAIJ(*fact);
244: (*fact)->info.factor_mallocs = 0;
246: MatMarkDiagonal_SeqAIJ(A);
248: /* check out for identical nodes. If found, use inode functions */
249: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
251: af = ((double)b->nz)/((double)a->nz) + .001;
252: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af);
253: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af);
254: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af);
255: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.\n");
257: return(0);
258: #endif
259: }
261: /*
262: Factorization code for AIJ format.
263: */
266: int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
267: {
268: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
269: IS isicol;
270: int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
271: int *ainew,*ajnew,jmax,*fill,*ajtmp,nz;
272: int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
273: PetscReal f;
276: if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
277: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
278: ISGetIndices(isrow,&r);
279: ISGetIndices(isicol,&ic);
281: /* get new row pointers */
282: PetscMalloc((n+1)*sizeof(int),&ainew);
283: ainew[0] = 0;
284: /* don't know how many column pointers are needed so estimate */
285: f = info->fill;
286: jmax = (int)(f*ai[n]+1);
287: PetscMalloc((jmax)*sizeof(int),&ajnew);
288: /* fill is a linked list of nonzeros in active row */
289: PetscMalloc((2*n+1)*sizeof(int),&fill);
290: im = fill + n + 1;
291: /* idnew is location of diagonal in factor */
292: PetscMalloc((n+1)*sizeof(int),&idnew);
293: idnew[0] = 0;
295: for (i=0; i<n; i++) {
296: /* first copy previous fill into linked list */
297: nnz = nz = ai[r[i]+1] - ai[r[i]];
298: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
299: ajtmp = aj + ai[r[i]];
300: fill[n] = n;
301: while (nz--) {
302: fm = n;
303: idx = ic[*ajtmp++];
304: do {
305: m = fm;
306: fm = fill[m];
307: } while (fm < idx);
308: fill[m] = idx;
309: fill[idx] = fm;
310: }
311: row = fill[n];
312: while (row < i) {
313: ajtmp = ajnew + idnew[row] + 1;
314: nzbd = 1 + idnew[row] - ainew[row];
315: nz = im[row] - nzbd;
316: fm = row;
317: while (nz-- > 0) {
318: idx = *ajtmp++ ;
319: nzbd++;
320: if (idx == i) im[row] = nzbd;
321: do {
322: m = fm;
323: fm = fill[m];
324: } while (fm < idx);
325: if (fm != idx) {
326: fill[m] = idx;
327: fill[idx] = fm;
328: fm = idx;
329: nnz++;
330: }
331: }
332: row = fill[row];
333: }
334: /* copy new filled row into permanent storage */
335: ainew[i+1] = ainew[i] + nnz;
336: if (ainew[i+1] > jmax) {
338: /* estimate how much additional space we will need */
339: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
340: /* just double the memory each time */
341: int maxadd = jmax;
342: /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
343: if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
344: jmax += maxadd;
346: /* allocate a longer ajnew */
347: PetscMalloc(jmax*sizeof(int),&ajtmp);
348: PetscMemcpy(ajtmp,ajnew,(ainew[i])*sizeof(int));
349: PetscFree(ajnew);
350: ajnew = ajtmp;
351: realloc++; /* count how many times we realloc */
352: }
353: ajtmp = ajnew + ainew[i];
354: fm = fill[n];
355: nzi = 0;
356: im[i] = nnz;
357: while (nnz--) {
358: if (fm < i) nzi++;
359: *ajtmp++ = fm ;
360: fm = fill[fm];
361: }
362: idnew[i] = ainew[i] + nzi;
363: }
364: if (ai[n] != 0) {
365: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
366: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
367: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af);
368: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af);
369: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n");
370: } else {
371: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n");
372: }
374: ISRestoreIndices(isrow,&r);
375: ISRestoreIndices(isicol,&ic);
377: PetscFree(fill);
379: /* put together the new matrix */
380: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);
381: PetscLogObjectParent(*B,isicol);
382: b = (Mat_SeqAIJ*)(*B)->data;
383: PetscFree(b->imax);
384: b->singlemalloc = PETSC_FALSE;
385: /* the next line frees the default space generated by the Create() */
386: PetscFree(b->a);
387: PetscFree(b->ilen);
388: PetscMalloc((ainew[n]+1)*sizeof(PetscScalar),&b->a);
389: b->j = ajnew;
390: b->i = ainew;
391: b->diag = idnew;
392: b->ilen = 0;
393: b->imax = 0;
394: b->row = isrow;
395: b->col = iscol;
396: b->lu_damping = info->damping;
397: b->lu_zeropivot = info->zeropivot;
398: b->lu_shift = info->shift;
399: b->lu_shift_fraction = info->shift_fraction;
400: PetscObjectReference((PetscObject)isrow);
401: PetscObjectReference((PetscObject)iscol);
402: b->icol = isicol;
403: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
404: /* In b structure: Free imax, ilen, old a, old j.
405: Allocate idnew, solve_work, new a, new j */
406: PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(PetscScalar)));
407: b->maxnz = b->nz = ainew[n] ;
409: (*B)->factor = FACTOR_LU;
410: (*B)->info.factor_mallocs = realloc;
411: (*B)->info.fill_ratio_given = f;
412: Mat_AIJ_CheckInode(*B,PETSC_FALSE);
413: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
415: if (ai[n] != 0) {
416: (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
417: } else {
418: (*B)->info.fill_ratio_needed = 0.0;
419: }
420: return(0);
421: }
422: /* ----------------------------------------------------------- */
423: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
427: int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
428: {
429: Mat C = *B;
430: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
431: IS isrow = b->row,isicol = b->icol;
432: int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
433: int *ajtmpold,*ajtmp,nz,row,*ics;
434: int *diag_offset = b->diag,diag,*pj,ndamp = 0, nshift=0;
435: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
436: PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs,d;
437: PetscReal row_shift,shift_fraction,shift_amount,shift_lo=0., shift_hi=1., shift_top=0.;
438: PetscTruth damp,lushift;
441: ISGetIndices(isrow,&r);
442: ISGetIndices(isicol,&ic);
443: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
444: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
445: rtmps = rtmp; ics = ic;
447: if (!a->diag) {
448: MatMarkDiagonal_SeqAIJ(A);
449: }
451: if (b->lu_shift) { /* set max shift */
452: int *aai = a->i,*ddiag = a->diag;
453: shift_top = 0;
454: for (i=0; i<n; i++) {
455: d = PetscAbsScalar((a->a)[ddiag[i]]);
456: /* calculate amt of shift needed for this row */
457: if (d<0) {
458: row_shift = 0;
459: } else {
460: row_shift = -2*d;
461: }
462: v = a->a+aai[i];
463: for (j=0; j<aai[i+1]-aai[i]; j++)
464: row_shift += PetscAbsScalar(v[j]);
465: if (row_shift>shift_top) shift_top = row_shift;
466: }
467: }
469: shift_fraction = 0; shift_amount = 0;
470: do {
471: damp = PETSC_FALSE;
472: lushift = PETSC_FALSE;
473: for (i=0; i<n; i++) {
474: nz = ai[i+1] - ai[i];
475: ajtmp = aj + ai[i];
476: for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
478: /* load in initial (unfactored row) */
479: nz = a->i[r[i]+1] - a->i[r[i]];
480: ajtmpold = a->j + a->i[r[i]];
481: v = a->a + a->i[r[i]];
482: for (j=0; j<nz; j++) {
483: rtmp[ics[ajtmpold[j]]] = v[j];
484: }
485: rtmp[ics[r[i]]] += damping + shift_amount; /* damp the diagonal of the matrix */
487: row = *ajtmp++ ;
488: while (row < i) {
489: pc = rtmp + row;
490: if (*pc != 0.0) {
491: pv = b->a + diag_offset[row] ;
492: pj = b->j + diag_offset[row] + 1;
493: multiplier = *pc / *pv++;
494: *pc = multiplier;
495: nz = ai[row+1] - diag_offset[row] - 1;
496: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
497: PetscLogFlops(2*nz);
498: }
499: row = *ajtmp++;
500: }
501: /* finished row so stick it into b->a */
502: pv = b->a + ai[i] ;
503: pj = b->j + ai[i] ;
504: nz = ai[i+1] - ai[i];
505: diag = diag_offset[i] - ai[i];
506: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
507: rs = 0.0;
508: for (j=0; j<nz; j++) {
509: pv[j] = rtmps[pj[j]];
510: if (j != diag) rs += PetscAbsScalar(pv[j]);
511: }
512: #define MAX_NSHIFT 5
513: if (PetscRealPart(pv[diag]) < zeropivot*rs && b->lu_shift) {
514: if (nshift>MAX_NSHIFT) {
515: SETERRQ(1,"Unable to determine shift to enforce positive definite preconditioner");
516: } else if (nshift==MAX_NSHIFT) {
517: shift_fraction = shift_hi;
518: } else {
519: shift_lo = shift_fraction; shift_fraction = (shift_hi+shift_lo)/2.;
520: }
521: shift_amount = shift_fraction * shift_top;
522: lushift = PETSC_TRUE;
523: nshift++;
524: break;
525: }
526: if (PetscAbsScalar(pv[diag]) < zeropivot*rs) {
527: if (damping) {
528: if (ndamp) damping *= 2.0;
529: damp = PETSC_TRUE;
530: ndamp++;
531: break;
532: } else {
533: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
534: }
535: }
536: }
537: if (!lushift && b->lu_shift && shift_fraction>0 && nshift<MAX_NSHIFT) {
538: /*
539: * if not already shifting up & shifting & started shifting & can refine,
540: * then try lower shift
541: */
542: shift_hi = shift_fraction;
543: shift_fraction = (shift_hi+shift_lo)/2.;
544: shift_amount = shift_fraction * shift_top;
545: lushift = PETSC_TRUE;
546: nshift++;
547: }
548: } while (damp || lushift);
550: /* invert diagonal entries for simplier triangular solves */
551: for (i=0; i<n; i++) {
552: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
553: }
555: PetscFree(rtmp);
556: ISRestoreIndices(isicol,&ic);
557: ISRestoreIndices(isrow,&r);
558: C->factor = FACTOR_LU;
559: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
560: C->assembled = PETSC_TRUE;
561: PetscLogFlops(C->n);
562: if (ndamp) {
563: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %g\n",ndamp,damping);
564: }
565: if (nshift) {
566: b->lu_shift_fraction = shift_fraction;
567: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: diagonal shifted up by %e fraction\n",shift_fraction);
568: }
569: return(0);
570: }
574: int MatUsePETSc_SeqAIJ(Mat A)
575: {
577: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
578: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
579: return(0);
580: }
583: /* ----------------------------------------------------------- */
586: int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
587: {
589: Mat C;
592: MatLUFactorSymbolic(A,row,col,info,&C);
593: MatLUFactorNumeric(A,&C);
594: MatHeaderCopy(A,C);
595: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
596: return(0);
597: }
598: /* ----------------------------------------------------------- */
601: int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
602: {
603: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
604: IS iscol = a->col,isrow = a->row;
605: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
606: int nz,*rout,*cout;
607: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
610: if (!n) return(0);
612: VecGetArray(bb,&b);
613: VecGetArray(xx,&x);
614: tmp = a->solve_work;
616: ISGetIndices(isrow,&rout); r = rout;
617: ISGetIndices(iscol,&cout); c = cout + (n-1);
619: /* forward solve the lower triangular */
620: tmp[0] = b[*r++];
621: tmps = tmp;
622: for (i=1; i<n; i++) {
623: v = aa + ai[i] ;
624: vi = aj + ai[i] ;
625: nz = a->diag[i] - ai[i];
626: sum = b[*r++];
627: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
628: tmp[i] = sum;
629: }
631: /* backward solve the upper triangular */
632: for (i=n-1; i>=0; i--){
633: v = aa + a->diag[i] + 1;
634: vi = aj + a->diag[i] + 1;
635: nz = ai[i+1] - a->diag[i] - 1;
636: sum = tmp[i];
637: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
638: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
639: }
641: ISRestoreIndices(isrow,&rout);
642: ISRestoreIndices(iscol,&cout);
643: VecRestoreArray(bb,&b);
644: VecRestoreArray(xx,&x);
645: PetscLogFlops(2*a->nz - A->n);
646: return(0);
647: }
649: /* ----------------------------------------------------------- */
652: int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
653: {
654: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
655: int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
656: PetscScalar *x,*b,*aa = a->a;
657: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
658: int adiag_i,i,*vi,nz,ai_i;
659: PetscScalar *v,sum;
660: #endif
663: if (!n) return(0);
665: VecGetArray(bb,&b);
666: VecGetArray(xx,&x);
668: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
669: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
670: #else
671: /* forward solve the lower triangular */
672: x[0] = b[0];
673: for (i=1; i<n; i++) {
674: ai_i = ai[i];
675: v = aa + ai_i;
676: vi = aj + ai_i;
677: nz = adiag[i] - ai_i;
678: sum = b[i];
679: while (nz--) sum -= *v++ * x[*vi++];
680: x[i] = sum;
681: }
683: /* backward solve the upper triangular */
684: for (i=n-1; i>=0; i--){
685: adiag_i = adiag[i];
686: v = aa + adiag_i + 1;
687: vi = aj + adiag_i + 1;
688: nz = ai[i+1] - adiag_i - 1;
689: sum = x[i];
690: while (nz--) sum -= *v++ * x[*vi++];
691: x[i] = sum*aa[adiag_i];
692: }
693: #endif
694: PetscLogFlops(2*a->nz - A->n);
695: VecRestoreArray(bb,&b);
696: VecRestoreArray(xx,&x);
697: return(0);
698: }
702: int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
703: {
704: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
705: IS iscol = a->col,isrow = a->row;
706: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
707: int nz,*rout,*cout;
708: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
711: if (yy != xx) {VecCopy(yy,xx);}
713: VecGetArray(bb,&b);
714: VecGetArray(xx,&x);
715: tmp = a->solve_work;
717: ISGetIndices(isrow,&rout); r = rout;
718: ISGetIndices(iscol,&cout); c = cout + (n-1);
720: /* forward solve the lower triangular */
721: tmp[0] = b[*r++];
722: for (i=1; i<n; i++) {
723: v = aa + ai[i] ;
724: vi = aj + ai[i] ;
725: nz = a->diag[i] - ai[i];
726: sum = b[*r++];
727: while (nz--) sum -= *v++ * tmp[*vi++ ];
728: tmp[i] = sum;
729: }
731: /* backward solve the upper triangular */
732: for (i=n-1; i>=0; i--){
733: v = aa + a->diag[i] + 1;
734: vi = aj + a->diag[i] + 1;
735: nz = ai[i+1] - a->diag[i] - 1;
736: sum = tmp[i];
737: while (nz--) sum -= *v++ * tmp[*vi++ ];
738: tmp[i] = sum*aa[a->diag[i]];
739: x[*c--] += tmp[i];
740: }
742: ISRestoreIndices(isrow,&rout);
743: ISRestoreIndices(iscol,&cout);
744: VecRestoreArray(bb,&b);
745: VecRestoreArray(xx,&x);
746: PetscLogFlops(2*a->nz);
748: return(0);
749: }
750: /* -------------------------------------------------------------------*/
753: int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
754: {
755: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
756: IS iscol = a->col,isrow = a->row;
757: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
758: int nz,*rout,*cout,*diag = a->diag;
759: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
762: VecGetArray(bb,&b);
763: VecGetArray(xx,&x);
764: tmp = a->solve_work;
766: ISGetIndices(isrow,&rout); r = rout;
767: ISGetIndices(iscol,&cout); c = cout;
769: /* copy the b into temp work space according to permutation */
770: for (i=0; i<n; i++) tmp[i] = b[c[i]];
772: /* forward solve the U^T */
773: for (i=0; i<n; i++) {
774: v = aa + diag[i] ;
775: vi = aj + diag[i] + 1;
776: nz = ai[i+1] - diag[i] - 1;
777: s1 = tmp[i];
778: s1 *= (*v++); /* multiply by inverse of diagonal entry */
779: while (nz--) {
780: tmp[*vi++ ] -= (*v++)*s1;
781: }
782: tmp[i] = s1;
783: }
785: /* backward solve the L^T */
786: for (i=n-1; i>=0; i--){
787: v = aa + diag[i] - 1 ;
788: vi = aj + diag[i] - 1 ;
789: nz = diag[i] - ai[i];
790: s1 = tmp[i];
791: while (nz--) {
792: tmp[*vi-- ] -= (*v--)*s1;
793: }
794: }
796: /* copy tmp into x according to permutation */
797: for (i=0; i<n; i++) x[r[i]] = tmp[i];
799: ISRestoreIndices(isrow,&rout);
800: ISRestoreIndices(iscol,&cout);
801: VecRestoreArray(bb,&b);
802: VecRestoreArray(xx,&x);
804: PetscLogFlops(2*a->nz-A->n);
805: return(0);
806: }
810: int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
811: {
812: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
813: IS iscol = a->col,isrow = a->row;
814: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
815: int nz,*rout,*cout,*diag = a->diag;
816: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
819: if (zz != xx) VecCopy(zz,xx);
821: VecGetArray(bb,&b);
822: VecGetArray(xx,&x);
823: tmp = a->solve_work;
825: ISGetIndices(isrow,&rout); r = rout;
826: ISGetIndices(iscol,&cout); c = cout;
828: /* copy the b into temp work space according to permutation */
829: for (i=0; i<n; i++) tmp[i] = b[c[i]];
831: /* forward solve the U^T */
832: for (i=0; i<n; i++) {
833: v = aa + diag[i] ;
834: vi = aj + diag[i] + 1;
835: nz = ai[i+1] - diag[i] - 1;
836: tmp[i] *= *v++;
837: while (nz--) {
838: tmp[*vi++ ] -= (*v++)*tmp[i];
839: }
840: }
842: /* backward solve the L^T */
843: for (i=n-1; i>=0; i--){
844: v = aa + diag[i] - 1 ;
845: vi = aj + diag[i] - 1 ;
846: nz = diag[i] - ai[i];
847: while (nz--) {
848: tmp[*vi-- ] -= (*v--)*tmp[i];
849: }
850: }
852: /* copy tmp into x according to permutation */
853: for (i=0; i<n; i++) x[r[i]] += tmp[i];
855: ISRestoreIndices(isrow,&rout);
856: ISRestoreIndices(iscol,&cout);
857: VecRestoreArray(bb,&b);
858: VecRestoreArray(xx,&x);
860: PetscLogFlops(2*a->nz);
861: return(0);
862: }
863: /* ----------------------------------------------------------------*/
864: EXTERN int MatMissingDiagonal_SeqAIJ(Mat);
868: int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
869: {
870: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
871: IS isicol;
872: int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
873: int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
874: int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
875: int incrlev,nnz,i,levels,diagonal_fill;
876: PetscTruth col_identity,row_identity;
877: PetscReal f;
878:
880: f = info->fill;
881: levels = (int)info->levels;
882: diagonal_fill = (int)info->diagonal_fill;
883: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
885: /* special case that simply copies fill pattern */
886: ISIdentity(isrow,&row_identity);
887: ISIdentity(iscol,&col_identity);
888: if (!levels && row_identity && col_identity) {
889: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
890: (*fact)->factor = FACTOR_LU;
891: b = (Mat_SeqAIJ*)(*fact)->data;
892: if (!b->diag) {
893: MatMarkDiagonal_SeqAIJ(*fact);
894: }
895: MatMissingDiagonal_SeqAIJ(*fact);
896: b->row = isrow;
897: b->col = iscol;
898: b->icol = isicol;
899: b->lu_damping = info->damping;
900: b->lu_zeropivot = info->zeropivot;
901: b->lu_shift = info->shift;
902: b->lu_shift_fraction= info->shift_fraction;
903: PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
904: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
905: PetscObjectReference((PetscObject)isrow);
906: PetscObjectReference((PetscObject)iscol);
907: return(0);
908: }
910: ISGetIndices(isrow,&r);
911: ISGetIndices(isicol,&ic);
913: /* get new row pointers */
914: PetscMalloc((n+1)*sizeof(int),&ainew);
915: ainew[0] = 0;
916: /* don't know how many column pointers are needed so estimate */
917: jmax = (int)(f*(ai[n]+1));
918: PetscMalloc((jmax)*sizeof(int),&ajnew);
919: /* ajfill is level of fill for each fill entry */
920: PetscMalloc((jmax)*sizeof(int),&ajfill);
921: /* fill is a linked list of nonzeros in active row */
922: PetscMalloc((n+1)*sizeof(int),&fill);
923: /* im is level for each filled value */
924: PetscMalloc((n+1)*sizeof(int),&im);
925: /* dloc is location of diagonal in factor */
926: PetscMalloc((n+1)*sizeof(int),&dloc);
927: dloc[0] = 0;
928: for (prow=0; prow<n; prow++) {
930: /* copy current row into linked list */
931: nzf = nz = ai[r[prow]+1] - ai[r[prow]];
932: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
933: xi = aj + ai[r[prow]] ;
934: fill[n] = n;
935: fill[prow] = -1; /* marker to indicate if diagonal exists */
936: while (nz--) {
937: fm = n;
938: idx = ic[*xi++ ];
939: do {
940: m = fm;
941: fm = fill[m];
942: } while (fm < idx);
943: fill[m] = idx;
944: fill[idx] = fm;
945: im[idx] = 0;
946: }
948: /* make sure diagonal entry is included */
949: if (diagonal_fill && fill[prow] == -1) {
950: fm = n;
951: while (fill[fm] < prow) fm = fill[fm];
952: fill[prow] = fill[fm]; /* insert diagonal into linked list */
953: fill[fm] = prow;
954: im[prow] = 0;
955: nzf++;
956: dcount++;
957: }
959: nzi = 0;
960: row = fill[n];
961: while (row < prow) {
962: incrlev = im[row] + 1;
963: nz = dloc[row];
964: xi = ajnew + ainew[row] + nz + 1;
965: flev = ajfill + ainew[row] + nz + 1;
966: nnz = ainew[row+1] - ainew[row] - nz - 1;
967: fm = row;
968: while (nnz-- > 0) {
969: idx = *xi++ ;
970: if (*flev + incrlev > levels) {
971: flev++;
972: continue;
973: }
974: do {
975: m = fm;
976: fm = fill[m];
977: } while (fm < idx);
978: if (fm != idx) {
979: im[idx] = *flev + incrlev;
980: fill[m] = idx;
981: fill[idx] = fm;
982: fm = idx;
983: nzf++;
984: } else {
985: if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
986: }
987: flev++;
988: }
989: row = fill[row];
990: nzi++;
991: }
992: /* copy new filled row into permanent storage */
993: ainew[prow+1] = ainew[prow] + nzf;
994: if (ainew[prow+1] > jmax) {
996: /* estimate how much additional space we will need */
997: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
998: /* just double the memory each time */
999: /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
1000: int maxadd = jmax;
1001: if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
1002: jmax += maxadd;
1004: /* allocate a longer ajnew and ajfill */
1005: PetscMalloc(jmax*sizeof(int),&xi);
1006: PetscMemcpy(xi,ajnew,(ainew[prow])*sizeof(int));
1007: PetscFree(ajnew);
1008: ajnew = xi;
1009: PetscMalloc(jmax*sizeof(int),&xi);
1010: PetscMemcpy(xi,ajfill,(ainew[prow])*sizeof(int));
1011: PetscFree(ajfill);
1012: ajfill = xi;
1013: realloc++; /* count how many times we realloc */
1014: }
1015: xi = ajnew + ainew[prow] ;
1016: flev = ajfill + ainew[prow] ;
1017: dloc[prow] = nzi;
1018: fm = fill[n];
1019: while (nzf--) {
1020: *xi++ = fm ;
1021: *flev++ = im[fm];
1022: fm = fill[fm];
1023: }
1024: /* make sure row has diagonal entry */
1025: if (ajnew[ainew[prow]+dloc[prow]] != prow) {
1026: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrix\n\
1027: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
1028: }
1029: }
1030: PetscFree(ajfill);
1031: ISRestoreIndices(isrow,&r);
1032: ISRestoreIndices(isicol,&ic);
1033: PetscFree(fill);
1034: PetscFree(im);
1036: {
1037: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
1038: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
1039: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af);
1040: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af);
1041: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n");
1042: if (diagonal_fill) {
1043: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
1044: }
1045: }
1047: /* put together the new matrix */
1048: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
1049: PetscLogObjectParent(*fact,isicol);
1050: b = (Mat_SeqAIJ*)(*fact)->data;
1051: PetscFree(b->imax);
1052: b->singlemalloc = PETSC_FALSE;
1053: len = (ainew[n] )*sizeof(PetscScalar);
1054: /* the next line frees the default space generated by the Create() */
1055: PetscFree(b->a);
1056: PetscFree(b->ilen);
1057: PetscMalloc(len+1,&b->a);
1058: b->j = ajnew;
1059: b->i = ainew;
1060: for (i=0; i<n; i++) dloc[i] += ainew[i];
1061: b->diag = dloc;
1062: b->ilen = 0;
1063: b->imax = 0;
1064: b->row = isrow;
1065: b->col = iscol;
1066: PetscObjectReference((PetscObject)isrow);
1067: PetscObjectReference((PetscObject)iscol);
1068: b->icol = isicol;
1069: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1070: /* In b structure: Free imax, ilen, old a, old j.
1071: Allocate dloc, solve_work, new a, new j */
1072: PetscLogObjectMemory(*fact,(ainew[n]-n) * (sizeof(int)+sizeof(PetscScalar)));
1073: b->maxnz = b->nz = ainew[n] ;
1074: b->lu_damping = info->damping;
1075: b->lu_shift = info->shift;
1076: b->lu_shift_fraction = info->shift_fraction;
1077: b->lu_zeropivot = info->zeropivot;
1078: (*fact)->factor = FACTOR_LU;
1079: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
1080: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1082: (*fact)->info.factor_mallocs = realloc;
1083: (*fact)->info.fill_ratio_given = f;
1084: (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1085: return(0);
1086: }
1088: #include src/mat/impls/sbaij/seq/sbaij.h
1091: int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1092: {
1093: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1094: int ierr;
1097: if (!a->sbaijMat){
1098: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1099: }
1100:
1101: MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(a->sbaijMat,fact);
1102: MatDestroy(a->sbaijMat);
1103: a->sbaijMat = PETSC_NULL;
1104:
1105: return(0);
1106: }
1110: int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1111: {
1112: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1113: int ierr;
1114: PetscTruth perm_identity;
1115:
1117: ISIdentity(perm,&perm_identity);
1118: if (!perm_identity){
1119: SETERRQ(1,"Non-identity permutation is not supported yet");
1120: }
1121: if (!a->sbaijMat){
1122: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1123: }
1125: MatICCFactorSymbolic(a->sbaijMat,perm,info,fact);
1126: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1127:
1128: return(0);
1129: }
1133: int MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1134: {
1135: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1136: int ierr;
1137: PetscTruth perm_identity;
1138:
1140: ISIdentity(perm,&perm_identity);
1141: if (!perm_identity){
1142: SETERRQ(1,"Non-identity permutation is not supported yet");
1143: }
1144: if (!a->sbaijMat){
1145: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1146: }
1148: MatCholeskyFactorSymbolic(a->sbaijMat,perm,info,fact);
1149: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1150:
1151: return(0);
1152: }