Actual source code: aijfact.c
1: /*$Id: aijfact.c,v 1.160 2001/04/10 19:35:19 bsmith Exp $*/
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/vec/vecimpl.h
5: #include src/inline/dot.h
7: int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
8: {
11: SETERRQ(PETSC_ERR_SUP,"Code not written");
12: #if !defined(PETSC_USE_DEBUG)
13: return(0);
14: #endif
15: }
18: EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
19: EXTERN int Mat_AIJ_CheckInode(Mat);
21: EXTERN int SPARSEKIT2dperm(int*,Scalar*,int*,int*,Scalar*,int*,int*,int*,int*,int*);
22: EXTERN int SPARSEKIT2ilutp(int*,Scalar*,int*,int*,int*,PetscReal*,PetscReal*,int*,Scalar*,int*,int*,int*,Scalar*,int*,int*,int*);
23: EXTERN int SPARSEKIT2msrcsr(int*,Scalar*,int*,Scalar*,int*,int*,Scalar*,int*);
25: /* ------------------------------------------------------------
27: This interface was contribed by Tony Caola
29: This routine is an interface to the pivoting drop-tolerance
30: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
31: SPARSEKIT2.
33: The SPARSEKIT2 routines used here are covered by the GNU
34: copyright; see the file gnu in this directory.
36: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
37: help in getting this routine ironed out.
39: The major drawback to this routine is that if info->fill is
40: not large enough it fails rather than allocating more space;
41: this can be fixed by hacking/improving the f2c version of
42: Yousef Saad's code.
44: ishift = 0, for indices start at 1
45: ishift = 1, for indices starting at 0
46: ------------------------------------------------------------
47: */
49: int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
50: {
51: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
52: IS iscolf,isicol,isirow;
53: PetscTruth reorder;
54: int *c,*r,*ic,ierr,i,n = A->m;
55: int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
56: int *ordcol,*iwk,*iperm,*jw;
57: int ishift = !a->indexshift;
58: int jmax,lfill,job,*o_i,*o_j;
59: Scalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
60: PetscReal permtol,af;
64: if (info->dt == PETSC_DEFAULT) info->dt = .005;
65: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
66: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
67: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
68: lfill = (int)(info->dtcount/2.0);
69: jmax = (int)(info->fill*a->nz);
70: permtol = info->dtcol;
73: /* ------------------------------------------------------------
74: If reorder=.TRUE., then the original matrix has to be
75: reordered to reflect the user selected ordering scheme, and
76: then de-reordered so it is in it's original format.
77: Because Saad's dperm() is NOT in place, we have to copy
78: the original matrix and allocate more storage. . .
79: ------------------------------------------------------------
80: */
82: /* set reorder to true if either isrow or iscol is not identity */
83: ISIdentity(isrow,&reorder);
84: if (reorder) {ISIdentity(iscol,&reorder);}
85: reorder = PetscNot(reorder);
87:
88: /* storage for ilu factor */
89: PetscMalloc((n+1)*sizeof(int),&new_i);
90: PetscMalloc(jmax*sizeof(int),&new_j);
91: PetscMalloc(jmax*sizeof(Scalar),&new_a);
92: PetscMalloc(n*sizeof(int),&ordcol);
94: /* ------------------------------------------------------------
95: Make sure that everything is Fortran formatted (1-Based)
96: ------------------------------------------------------------
97: */
98: if (ishift) {
99: for (i=old_i[0];i<old_i[n];i++) {
100: old_j[i]++;
101: }
102: for(i=0;i<n+1;i++) {
103: old_i[i]++;
104: };
105: };
107: if (reorder) {
108: ISGetIndices(iscol,&c);
109: ISGetIndices(isrow,&r);
110: for(i=0;i<n;i++) {
111: r[i] = r[i]+1;
112: c[i] = c[i]+1;
113: }
114: PetscMalloc((n+1)*sizeof(int),&old_i2);
115: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);
116: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(Scalar),old_a2);
117: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
118: for (i=0;i<n;i++) {
119: r[i] = r[i]-1;
120: c[i] = c[i]-1;
121: }
122: ISRestoreIndices(iscol,&c);
123: ISRestoreIndices(isrow,&r);
124: o_a = old_a2;
125: o_j = old_j2;
126: o_i = old_i2;
127: } else {
128: o_a = old_a;
129: o_j = old_j;
130: o_i = old_i;
131: }
133: /* ------------------------------------------------------------
134: Call Saad's ilutp() routine to generate the factorization
135: ------------------------------------------------------------
136: */
138: PetscMalloc(2*n*sizeof(int),&iperm);
139: PetscMalloc(2*n*sizeof(int),&jw);
140: PetscMalloc(n*sizeof(Scalar),&w);
142: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,&info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
143: if (ierr) {
144: switch (ierr) {
145: case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
146: case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
147: case -5: SETERRQ(1,"ilutp(), zero row encountered");
148: case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
149: case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
150: default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
151: }
152: }
154: PetscFree(w);
155: PetscFree(jw);
157: /* ------------------------------------------------------------
158: Saad's routine gives the result in Modified Sparse Row (msr)
159: Convert to Compressed Sparse Row format (csr)
160: ------------------------------------------------------------
161: */
163: PetscMalloc(n*sizeof(Scalar),&wk);
164: PetscMalloc((n+1)*sizeof(int),&iwk);
166: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
168: PetscFree(iwk);
169: PetscFree(wk);
171: if (reorder) {
172: PetscFree(old_a2);
173: PetscFree(old_j2);
174: PetscFree(old_i2);
175: } else {
176: /* fix permutation of old_j that the factorization introduced */
177: for (i=old_i[0]; i<old_i[n]; i++) {
178: old_j[i-1] = iperm[old_j[i-1]-1];
179: }
180: }
182: /* get rid of the shift to indices starting at 1 */
183: if (ishift) {
184: for (i=0; i<n+1; i++) {
185: old_i[i]--;
186: }
187: for (i=old_i[0];i<old_i[n];i++) {
188: old_j[i]--;
189: }
190: }
192: /* Make the factored matrix 0-based */
193: if (ishift) {
194: for (i=0; i<n+1; i++) {
195: new_i[i]--;
196: }
197: for (i=new_i[0];i<new_i[n];i++) {
198: new_j[i]--;
199: }
200: }
202: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203: /*-- permute the right-hand-side and solution vectors --*/
204: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
205: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
206: ISGetIndices(isicol,&ic);
207: for(i=0; i<n; i++) {
208: ordcol[i] = ic[iperm[i]-1];
209: };
210: ISRestoreIndices(isicol,&ic);
211: ISDestroy(isicol);
213: PetscFree(iperm);
215: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
216: PetscFree(ordcol);
218: /*----- put together the new matrix -----*/
220: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
221: (*fact)->factor = FACTOR_LU;
222: (*fact)->assembled = PETSC_TRUE;
224: b = (Mat_SeqAIJ*)(*fact)->data;
225: PetscFree(b->imax);
226: b->sorted = PETSC_FALSE;
227: b->singlemalloc = PETSC_FALSE;
228: /* the next line frees the default space generated by the MatCreate() */
229: ierr = PetscFree(b->a);
230: ierr = PetscFree(b->ilen);
231: b->a = new_a;
232: b->j = new_j;
233: b->i = new_i;
234: b->ilen = 0;
235: b->imax = 0;
236: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
237: b->row = isirow;
238: b->col = iscolf;
239: PetscMalloc((n+1)*sizeof(Scalar),&b->solve_work);
240: b->maxnz = b->nz = new_i[n];
241: b->indexshift = a->indexshift;
242: MatMarkDiagonal_SeqAIJ(*fact);
243: (*fact)->info.factor_mallocs = 0;
245: MatMarkDiagonal_SeqAIJ(A);
247: /* check out for identical nodes. If found, use inode functions */
248: Mat_AIJ_CheckInode(*fact);
250: af = ((double)b->nz)/((double)a->nz) + .001;
251: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %gn",info->fill,af);
252: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use n",af);
253: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);n",af);
254: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.n");
256: return(0);
257: }
259: /*
260: Factorization code for AIJ format.
261: */
262: int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B)
263: {
264: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
265: IS isicol;
266: int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
267: int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift;
268: int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
269: PetscReal f;
274: if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
276: if (!isrow) {
277: ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);
278: }
279: if (!iscol) {
280: ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);
281: }
283: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
284: ISGetIndices(isrow,&r);
285: ISGetIndices(isicol,&ic);
287: /* get new row pointers */
288: PetscMalloc((n+1)*sizeof(int),&ainew);
289: ainew[0] = -shift;
290: /* don't know how many column pointers are needed so estimate */
291: if (info) f = info->fill; else f = 1.0;
292: jmax = (int)(f*ai[n]+(!shift));
293: PetscMalloc((jmax)*sizeof(int),&ajnew);
294: /* fill is a linked list of nonzeros in active row */
295: PetscMalloc((2*n+1)*sizeof(int),&fill);
296: im = fill + n + 1;
297: /* idnew is location of diagonal in factor */
298: PetscMalloc((n+1)*sizeof(int),&idnew);
299: idnew[0] = -shift;
301: for (i=0; i<n; i++) {
302: /* first copy previous fill into linked list */
303: nnz = nz = ai[r[i]+1] - ai[r[i]];
304: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
305: ajtmp = aj + ai[r[i]] + shift;
306: fill[n] = n;
307: while (nz--) {
308: fm = n;
309: idx = ic[*ajtmp++ + shift];
310: do {
311: m = fm;
312: fm = fill[m];
313: } while (fm < idx);
314: fill[m] = idx;
315: fill[idx] = fm;
316: }
317: row = fill[n];
318: while (row < i) {
319: ajtmp = ajnew + idnew[row] + (!shift);
320: nzbd = 1 + idnew[row] - ainew[row];
321: nz = im[row] - nzbd;
322: fm = row;
323: while (nz-- > 0) {
324: idx = *ajtmp++ + shift;
325: nzbd++;
326: if (idx == i) im[row] = nzbd;
327: do {
328: m = fm;
329: fm = fill[m];
330: } while (fm < idx);
331: if (fm != idx) {
332: fill[m] = idx;
333: fill[idx] = fm;
334: fm = idx;
335: nnz++;
336: }
337: }
338: row = fill[row];
339: }
340: /* copy new filled row into permanent storage */
341: ainew[i+1] = ainew[i] + nnz;
342: if (ainew[i+1] > jmax) {
344: /* estimate how much additional space we will need */
345: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
346: /* just double the memory each time */
347: int maxadd = jmax;
348: /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
349: if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
350: jmax += maxadd;
352: /* allocate a longer ajnew */
353: PetscMalloc(jmax*sizeof(int),&ajtmp);
354: ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
355: ierr = PetscFree(ajnew);
356: ajnew = ajtmp;
357: realloc++; /* count how many times we realloc */
358: }
359: ajtmp = ajnew + ainew[i] + shift;
360: fm = fill[n];
361: nzi = 0;
362: im[i] = nnz;
363: while (nnz--) {
364: if (fm < i) nzi++;
365: *ajtmp++ = fm - shift;
366: fm = fill[fm];
367: }
368: idnew[i] = ainew[i] + nzi;
369: }
370: if (ai[n] != 0) {
371: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
372: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
373: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use n",af);
374: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);n",af);
375: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.n");
376: } else {
377: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrixn");
378: }
380: ISRestoreIndices(isrow,&r);
381: ISRestoreIndices(isicol,&ic);
383: PetscFree(fill);
385: /* put together the new matrix */
386: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);
387: PetscLogObjectParent(*B,isicol);
388: b = (Mat_SeqAIJ*)(*B)->data;
389: PetscFree(b->imax);
390: b->singlemalloc = PETSC_FALSE;
391: /* the next line frees the default space generated by the Create() */
392: PetscFree(b->a);
393: PetscFree(b->ilen);
394: ierr = PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar),&b->a);
395: b->j = ajnew;
396: b->i = ainew;
397: b->diag = idnew;
398: b->ilen = 0;
399: b->imax = 0;
400: b->row = isrow;
401: b->col = iscol;
402: if (info) {
403: b->lu_damp = (PetscTruth) ((int)info->damp);
404: b->lu_damping = info->damping;
405: } else {
406: b->lu_damp = PETSC_FALSE;
407: b->lu_damping = 0.0;
408: }
409: ierr = PetscObjectReference((PetscObject)isrow);
410: ierr = PetscObjectReference((PetscObject)iscol);
411: b->icol = isicol;
412: ierr = PetscMalloc((n+1)*sizeof(Scalar),&b->solve_work);
413: /* In b structure: Free imax, ilen, old a, old j.
414: Allocate idnew, solve_work, new a, new j */
415: PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar)));
416: b->maxnz = b->nz = ainew[n] + shift;
418: (*B)->factor = FACTOR_LU;
419: (*B)->info.factor_mallocs = realloc;
420: (*B)->info.fill_ratio_given = f;
421: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
423: if (ai[n] != 0) {
424: (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
425: } else {
426: (*B)->info.fill_ratio_needed = 0.0;
427: }
428: return(0);
429: }
430: /* ----------------------------------------------------------- */
431: EXTERN int Mat_AIJ_CheckInode(Mat);
433: int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
434: {
435: Mat C = *B;
436: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
437: IS isrow = b->row,isicol = b->icol;
438: int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
439: int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift;
440: int *diag_offset = b->diag,diag;
441: int *pj,ndamp = 0;
442: Scalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443: PetscReal damping = b->lu_damping;
444: PetscTruth damp;
448: ierr = ISGetIndices(isrow,&r);
449: ierr = ISGetIndices(isicol,&ic);
450: PetscMalloc((n+1)*sizeof(Scalar),&rtmp);
451: ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));
452: rtmps = rtmp + shift; ics = ic + shift;
454: do {
455: damp = PETSC_FALSE;
456: for (i=0; i<n; i++) {
457: nz = ai[i+1] - ai[i];
458: ajtmp = aj + ai[i] + shift;
459: for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
461: /* load in initial (unfactored row) */
462: nz = a->i[r[i]+1] - a->i[r[i]];
463: ajtmpold = a->j + a->i[r[i]] + shift;
464: v = a->a + a->i[r[i]] + shift;
465: for (j=0; j<nz; j++) {
466: rtmp[ics[ajtmpold[j]]] = v[j];
467: if (ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */
468: rtmp[ics[ajtmpold[j]]] += damping;
469: }
470: }
472: row = *ajtmp++ + shift;
473: while (row < i) {
474: pc = rtmp + row;
475: if (*pc != 0.0) {
476: pv = b->a + diag_offset[row] + shift;
477: pj = b->j + diag_offset[row] + (!shift);
478: multiplier = *pc / *pv++;
479: *pc = multiplier;
480: nz = ai[row+1] - diag_offset[row] - 1;
481: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
482: PetscLogFlops(2*nz);
483: }
484: row = *ajtmp++ + shift;
485: }
486: /* finished row so stick it into b->a */
487: pv = b->a + ai[i] + shift;
488: pj = b->j + ai[i] + shift;
489: nz = ai[i+1] - ai[i];
490: for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];}
491: diag = diag_offset[i] - ai[i];
492: if (PetscAbsScalar(pv[diag]) < 1.e-12) {
493: if (b->lu_damp) {
494: damp = PETSC_TRUE;
495: if (damping) damping *= 2.0;
496: else damping = 1.e-12;
497: ndamp++;
498: break;
499: } else {
500: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d",i);
501: }
502: }
503: }
504: } while (damp);
506: /* invert diagonal entries for simplier triangular solves */
507: for (i=0; i<n; i++) {
508: b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
509: }
511: PetscFree(rtmp);
512: ISRestoreIndices(isicol,&ic);
513: ISRestoreIndices(isrow,&r);
514: C->factor = FACTOR_LU;
515: Mat_AIJ_CheckInode(C);
516: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
517: C->assembled = PETSC_TRUE;
518: PetscLogFlops(C->n);
519: if (ndamp || b->lu_damping) {
520: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %gn",ndamp,damping);
521: }
522: return(0);
523: }
524: /* ----------------------------------------------------------- */
525: int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info)
526: {
528: Mat C;
531: MatLUFactorSymbolic(A,row,col,info,&C);
532: MatLUFactorNumeric(A,&C);
533: MatHeaderCopy(A,C);
534: return(0);
535: }
536: /* ----------------------------------------------------------- */
537: int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
538: {
539: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
540: IS iscol = a->col,isrow = a->row;
541: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
542: int nz,shift = a->indexshift,*rout,*cout;
543: Scalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
546: if (!n) return(0);
548: VecGetArray(bb,&b);
549: VecGetArray(xx,&x);
550: tmp = a->solve_work;
552: ISGetIndices(isrow,&rout); r = rout;
553: ISGetIndices(iscol,&cout); c = cout + (n-1);
555: /* forward solve the lower triangular */
556: tmp[0] = b[*r++];
557: tmps = tmp + shift;
558: for (i=1; i<n; i++) {
559: v = aa + ai[i] + shift;
560: vi = aj + ai[i] + shift;
561: nz = a->diag[i] - ai[i];
562: sum = b[*r++];
563: while (nz--) sum -= *v++ * tmps[*vi++];
564: tmp[i] = sum;
565: }
567: /* backward solve the upper triangular */
568: for (i=n-1; i>=0; i--){
569: v = aa + a->diag[i] + (!shift);
570: vi = aj + a->diag[i] + (!shift);
571: nz = ai[i+1] - a->diag[i] - 1;
572: sum = tmp[i];
573: while (nz--) sum -= *v++ * tmps[*vi++];
574: x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
575: }
577: ISRestoreIndices(isrow,&rout);
578: ISRestoreIndices(iscol,&cout);
579: VecRestoreArray(bb,&b);
580: VecRestoreArray(xx,&x);
581: PetscLogFlops(2*a->nz - A->n);
582: return(0);
583: }
585: /* ----------------------------------------------------------- */
586: int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
587: {
588: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
589: int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
590: Scalar *x,*b,*aa = a->a,sum;
591: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
592: int adiag_i,i,*vi,nz,ai_i;
593: Scalar *v;
594: #endif
597: if (!n) return(0);
598: if (a->indexshift) {
599: MatSolve_SeqAIJ(A,bb,xx);
600: return(0);
601: }
603: VecGetArray(bb,&b);
604: VecGetArray(xx,&x);
606: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
607: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
608: #else
609: /* forward solve the lower triangular */
610: x[0] = b[0];
611: for (i=1; i<n; i++) {
612: ai_i = ai[i];
613: v = aa + ai_i;
614: vi = aj + ai_i;
615: nz = adiag[i] - ai_i;
616: sum = b[i];
617: while (nz--) sum -= *v++ * x[*vi++];
618: x[i] = sum;
619: }
621: /* backward solve the upper triangular */
622: for (i=n-1; i>=0; i--){
623: adiag_i = adiag[i];
624: v = aa + adiag_i + 1;
625: vi = aj + adiag_i + 1;
626: nz = ai[i+1] - adiag_i - 1;
627: sum = x[i];
628: while (nz--) sum -= *v++ * x[*vi++];
629: x[i] = sum*aa[adiag_i];
630: }
631: #endif
632: PetscLogFlops(2*a->nz - A->n);
633: VecRestoreArray(bb,&b);
634: VecRestoreArray(xx,&x);
635: return(0);
636: }
638: int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
639: {
640: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
641: IS iscol = a->col,isrow = a->row;
642: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
643: int nz,shift = a->indexshift,*rout,*cout;
644: Scalar *x,*b,*tmp,*aa = a->a,sum,*v;
647: if (yy != xx) {VecCopy(yy,xx);}
649: VecGetArray(bb,&b);
650: VecGetArray(xx,&x);
651: tmp = a->solve_work;
653: ISGetIndices(isrow,&rout); r = rout;
654: ISGetIndices(iscol,&cout); c = cout + (n-1);
656: /* forward solve the lower triangular */
657: tmp[0] = b[*r++];
658: for (i=1; i<n; i++) {
659: v = aa + ai[i] + shift;
660: vi = aj + ai[i] + shift;
661: nz = a->diag[i] - ai[i];
662: sum = b[*r++];
663: while (nz--) sum -= *v++ * tmp[*vi++ + shift];
664: tmp[i] = sum;
665: }
667: /* backward solve the upper triangular */
668: for (i=n-1; i>=0; i--){
669: v = aa + a->diag[i] + (!shift);
670: vi = aj + a->diag[i] + (!shift);
671: nz = ai[i+1] - a->diag[i] - 1;
672: sum = tmp[i];
673: while (nz--) sum -= *v++ * tmp[*vi++ + shift];
674: tmp[i] = sum*aa[a->diag[i]+shift];
675: x[*c--] += tmp[i];
676: }
678: ISRestoreIndices(isrow,&rout);
679: ISRestoreIndices(iscol,&cout);
680: VecRestoreArray(bb,&b);
681: VecRestoreArray(xx,&x);
682: PetscLogFlops(2*a->nz);
684: return(0);
685: }
686: /* -------------------------------------------------------------------*/
687: int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
688: {
689: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
690: IS iscol = a->col,isrow = a->row;
691: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
692: int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
693: Scalar *x,*b,*tmp,*aa = a->a,*v,s1;
696: VecGetArray(bb,&b);
697: VecGetArray(xx,&x);
698: tmp = a->solve_work;
700: ISGetIndices(isrow,&rout); r = rout;
701: ISGetIndices(iscol,&cout); c = cout;
703: /* copy the b into temp work space according to permutation */
704: for (i=0; i<n; i++) tmp[i] = b[c[i]];
706: /* forward solve the U^T */
707: for (i=0; i<n; i++) {
708: v = aa + diag[i] + shift;
709: vi = aj + diag[i] + (!shift);
710: nz = ai[i+1] - diag[i] - 1;
711: s1 = tmp[i];
712: s1 *= *(v++); /* multiply by inverse of diagonal entry */
713: while (nz--) {
714: tmp[*vi++ + shift] -= (*v++)*s1;
715: }
716: tmp[i] = s1;
717: }
719: /* backward solve the L^T */
720: for (i=n-1; i>=0; i--){
721: v = aa + diag[i] - 1 + shift;
722: vi = aj + diag[i] - 1 + shift;
723: nz = diag[i] - ai[i];
724: s1 = tmp[i];
725: while (nz--) {
726: tmp[*vi-- + shift] -= (*v--)*s1;
727: }
728: }
730: /* copy tmp into x according to permutation */
731: for (i=0; i<n; i++) x[r[i]] = tmp[i];
733: ISRestoreIndices(isrow,&rout);
734: ISRestoreIndices(iscol,&cout);
735: VecRestoreArray(bb,&b);
736: VecRestoreArray(xx,&x);
738: PetscLogFlops(2*a->nz-A->n);
739: return(0);
740: }
742: int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
743: {
744: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
745: IS iscol = a->col,isrow = a->row;
746: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
747: int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
748: Scalar *x,*b,*tmp,*aa = a->a,*v;
751: if (zz != xx) VecCopy(zz,xx);
753: VecGetArray(bb,&b);
754: VecGetArray(xx,&x);
755: tmp = a->solve_work;
757: ISGetIndices(isrow,&rout); r = rout;
758: ISGetIndices(iscol,&cout); c = cout;
760: /* copy the b into temp work space according to permutation */
761: for (i=0; i<n; i++) tmp[i] = b[c[i]];
763: /* forward solve the U^T */
764: for (i=0; i<n; i++) {
765: v = aa + diag[i] + shift;
766: vi = aj + diag[i] + (!shift);
767: nz = ai[i+1] - diag[i] - 1;
768: tmp[i] *= *v++;
769: while (nz--) {
770: tmp[*vi++ + shift] -= (*v++)*tmp[i];
771: }
772: }
774: /* backward solve the L^T */
775: for (i=n-1; i>=0; i--){
776: v = aa + diag[i] - 1 + shift;
777: vi = aj + diag[i] - 1 + shift;
778: nz = diag[i] - ai[i];
779: while (nz--) {
780: tmp[*vi-- + shift] -= (*v--)*tmp[i];
781: }
782: }
784: /* copy tmp into x according to permutation */
785: for (i=0; i<n; i++) x[r[i]] += tmp[i];
787: ISRestoreIndices(isrow,&rout);
788: ISRestoreIndices(iscol,&cout);
789: VecRestoreArray(bb,&b);
790: VecRestoreArray(xx,&x);
792: PetscLogFlops(2*a->nz);
793: return(0);
794: }
795: /* ----------------------------------------------------------------*/
796: EXTERN int MatMissingDiagonal_SeqAIJ(Mat);
798: int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
799: {
800: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
801: IS isicol;
802: int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
803: int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
804: int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
805: int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
806: PetscTruth col_identity,row_identity;
807: PetscReal f;
808:
810: if (info) {
811: f = info->fill;
812: levels = (int)info->levels;
813: diagonal_fill = (int)info->diagonal_fill;
814: } else {
815: f = 1.0;
816: levels = 0;
817: diagonal_fill = 0;
818: }
819: if (!isrow) {
820: ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);
821: }
822: if (!iscol) {
823: ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);
824: }
826: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
828: /* special case that simply copies fill pattern */
829: ISIdentity(isrow,&row_identity);
830: ISIdentity(iscol,&col_identity);
831: if (!levels && row_identity && col_identity) {
832: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
833: (*fact)->factor = FACTOR_LU;
834: b = (Mat_SeqAIJ*)(*fact)->data;
835: if (!b->diag) {
836: MatMarkDiagonal_SeqAIJ(*fact);
837: }
838: MatMissingDiagonal_SeqAIJ(*fact);
839: b->row = isrow;
840: b->col = iscol;
841: b->icol = isicol;
842: if (info) {
843: b->lu_damp = (PetscTruth)((int)info->damp);
844: b->lu_damping = info->damping;
845: } else {
846: b->lu_damp = PETSC_FALSE;
847: b->lu_damping = 0.0;
848: }
849: ierr = PetscMalloc(((*fact)->m+1)*sizeof(Scalar),&b->solve_work);
850: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
851: ierr = PetscObjectReference((PetscObject)isrow);
852: ierr = PetscObjectReference((PetscObject)iscol);
853: return(0);
854: }
856: ISGetIndices(isrow,&r);
857: ISGetIndices(isicol,&ic);
859: /* get new row pointers */
860: PetscMalloc((n+1)*sizeof(int),&ainew);
861: ainew[0] = -shift;
862: /* don't know how many column pointers are needed so estimate */
863: jmax = (int)(f*(ai[n]+!shift));
864: PetscMalloc((jmax)*sizeof(int),&ajnew);
865: /* ajfill is level of fill for each fill entry */
866: PetscMalloc((jmax)*sizeof(int),&ajfill);
867: /* fill is a linked list of nonzeros in active row */
868: PetscMalloc((n+1)*sizeof(int),&fill);
869: /* im is level for each filled value */
870: PetscMalloc((n+1)*sizeof(int),&im);
871: /* dloc is location of diagonal in factor */
872: PetscMalloc((n+1)*sizeof(int),&dloc);
873: dloc[0] = 0;
874: for (prow=0; prow<n; prow++) {
876: /* copy current row into linked list */
877: nzf = nz = ai[r[prow]+1] - ai[r[prow]];
878: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
879: xi = aj + ai[r[prow]] + shift;
880: fill[n] = n;
881: fill[prow] = -1; /* marker to indicate if diagonal exists */
882: while (nz--) {
883: fm = n;
884: idx = ic[*xi++ + shift];
885: do {
886: m = fm;
887: fm = fill[m];
888: } while (fm < idx);
889: fill[m] = idx;
890: fill[idx] = fm;
891: im[idx] = 0;
892: }
894: /* make sure diagonal entry is included */
895: if (diagonal_fill && fill[prow] == -1) {
896: fm = n;
897: while (fill[fm] < prow) fm = fill[fm];
898: fill[prow] = fill[fm]; /* insert diagonal into linked list */
899: fill[fm] = prow;
900: im[prow] = 0;
901: nzf++;
902: dcount++;
903: }
905: nzi = 0;
906: row = fill[n];
907: while (row < prow) {
908: incrlev = im[row] + 1;
909: nz = dloc[row];
910: xi = ajnew + ainew[row] + shift + nz + 1;
911: flev = ajfill + ainew[row] + shift + nz + 1;
912: nnz = ainew[row+1] - ainew[row] - nz - 1;
913: fm = row;
914: while (nnz-- > 0) {
915: idx = *xi++ + shift;
916: if (*flev + incrlev > levels) {
917: flev++;
918: continue;
919: }
920: do {
921: m = fm;
922: fm = fill[m];
923: } while (fm < idx);
924: if (fm != idx) {
925: im[idx] = *flev + incrlev;
926: fill[m] = idx;
927: fill[idx] = fm;
928: fm = idx;
929: nzf++;
930: } else {
931: if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
932: }
933: flev++;
934: }
935: row = fill[row];
936: nzi++;
937: }
938: /* copy new filled row into permanent storage */
939: ainew[prow+1] = ainew[prow] + nzf;
940: if (ainew[prow+1] > jmax-shift) {
942: /* estimate how much additional space we will need */
943: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
944: /* just double the memory each time */
945: /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
946: int maxadd = jmax;
947: if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
948: jmax += maxadd;
950: /* allocate a longer ajnew and ajfill */
951: ierr = PetscMalloc(jmax*sizeof(int),&xi);
952: ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
953: ierr = PetscFree(ajnew);
954: ajnew = xi;
955: ierr = PetscMalloc(jmax*sizeof(int),&xi);
956: ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
957: ierr = PetscFree(ajfill);
958: ajfill = xi;
959: realloc++; /* count how many times we realloc */
960: }
961: xi = ajnew + ainew[prow] + shift;
962: flev = ajfill + ainew[prow] + shift;
963: dloc[prow] = nzi;
964: fm = fill[n];
965: while (nzf--) {
966: *xi++ = fm - shift;
967: *flev++ = im[fm];
968: fm = fill[fm];
969: }
970: /* make sure row has diagonal entry */
971: if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
972: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrixn
973: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
974: }
975: }
976: PetscFree(ajfill);
977: ISRestoreIndices(isrow,&r);
978: ISRestoreIndices(isicol,&ic);
979: PetscFree(fill);
980: PetscFree(im);
982: {
983: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
984: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
985: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use n",af);
986: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);n",af);
987: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.n");
988: if (diagonal_fill) {
989: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount);
990: }
991: }
993: /* put together the new matrix */
994: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
995: PetscLogObjectParent(*fact,isicol);
996: b = (Mat_SeqAIJ*)(*fact)->data;
997: PetscFree(b->imax);
998: b->singlemalloc = PETSC_FALSE;
999: len = (ainew[n] + shift)*sizeof(Scalar);
1000: /* the next line frees the default space generated by the Create() */
1001: PetscFree(b->a);
1002: PetscFree(b->ilen);
1003: PetscMalloc(len+1,&b->a);
1004: b->j = ajnew;
1005: b->i = ainew;
1006: for (i=0; i<n; i++) dloc[i] += ainew[i];
1007: b->diag = dloc;
1008: b->ilen = 0;
1009: b->imax = 0;
1010: b->row = isrow;
1011: b->col = iscol;
1012: ierr = PetscObjectReference((PetscObject)isrow);
1013: ierr = PetscObjectReference((PetscObject)iscol);
1014: b->icol = isicol;
1015: PetscMalloc((n+1)*sizeof(Scalar),&b->solve_work);
1016: /* In b structure: Free imax, ilen, old a, old j.
1017: Allocate dloc, solve_work, new a, new j */
1018: PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar)));
1019: b->maxnz = b->nz = ainew[n] + shift;
1020: if (info) {
1021: b->lu_damp = (PetscTruth)((int)info->damp);
1022: b->lu_damping = info->damping;
1023: } else {
1024: b->lu_damp = PETSC_FALSE;
1025: b->lu_damping = 0.0;
1026: }
1027: (*fact)->factor = FACTOR_LU;
1029: (*fact)->info.factor_mallocs = realloc;
1030: (*fact)->info.fill_ratio_given = f;
1031: (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1032: (*fact)->factor = FACTOR_LU;
1034: return(0);
1035: }