Actual source code: aijfact.c
1: /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/vec/vecimpl.h
5: #include src/inline/dot.h
7: #undef __FUNCT__
9: int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
10: {
13: SETERRQ(PETSC_ERR_SUP,"Code not written");
14: #if !defined(PETSC_USE_DEBUG)
15: return(0);
16: #endif
17: }
20: EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
21: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
23: EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
24: EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
25: EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);
27: #undef __FUNCT__
29: /* ------------------------------------------------------------
31: This interface was contribed by Tony Caola
33: This routine is an interface to the pivoting drop-tolerance
34: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
35: SPARSEKIT2.
37: The SPARSEKIT2 routines used here are covered by the GNU
38: copyright; see the file gnu in this directory.
40: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
41: help in getting this routine ironed out.
43: The major drawback to this routine is that if info->fill is
44: not large enough it fails rather than allocating more space;
45: this can be fixed by hacking/improving the f2c version of
46: Yousef Saad's code.
48: ishift = 0, for indices start at 1
49: ishift = 1, for indices starting at 0
50: ------------------------------------------------------------
51: */
52: int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
53: {
54: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
56: SETERRQ(1,"This distribution does not include GNU Copyright coden
57: You can obtain the drop tolerance routines by installing PETSc fromn
58: www.mcs.anl.gov/petscn");
59: #else
60: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
61: IS iscolf,isicol,isirow;
62: PetscTruth reorder;
63: int *c,*r,*ic,ierr,i,n = A->m;
64: int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65: int *ordcol,*iwk,*iperm,*jw;
66: int ishift = !a->indexshift;
67: int jmax,lfill,job,*o_i,*o_j;
68: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
69: PetscReal permtol,af;
73: if (info->dt == PETSC_DEFAULT) info->dt = .005;
74: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
75: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
76: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
77: lfill = (int)(info->dtcount/2.0);
78: jmax = (int)(info->fill*a->nz);
79: permtol = info->dtcol;
82: /* ------------------------------------------------------------
83: If reorder=.TRUE., then the original matrix has to be
84: reordered to reflect the user selected ordering scheme, and
85: then de-reordered so it is in it's original format.
86: Because Saad's dperm() is NOT in place, we have to copy
87: the original matrix and allocate more storage. . .
88: ------------------------------------------------------------
89: */
91: /* set reorder to true if either isrow or iscol is not identity */
92: ISIdentity(isrow,&reorder);
93: if (reorder) {ISIdentity(iscol,&reorder);}
94: reorder = PetscNot(reorder);
96:
97: /* storage for ilu factor */
98: PetscMalloc((n+1)*sizeof(int),&new_i);
99: PetscMalloc(jmax*sizeof(int),&new_j);
100: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
101: PetscMalloc(n*sizeof(int),&ordcol);
103: /* ------------------------------------------------------------
104: Make sure that everything is Fortran formatted (1-Based)
105: ------------------------------------------------------------
106: */
107: if (ishift) {
108: for (i=old_i[0];i<old_i[n];i++) {
109: old_j[i]++;
110: }
111: for(i=0;i<n+1;i++) {
112: old_i[i]++;
113: };
114: };
116: if (reorder) {
117: ISGetIndices(iscol,&c);
118: ISGetIndices(isrow,&r);
119: for(i=0;i<n;i++) {
120: r[i] = r[i]+1;
121: c[i] = c[i]+1;
122: }
123: PetscMalloc((n+1)*sizeof(int),&old_i2);
124: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);
125: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
126: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
127: for (i=0;i<n;i++) {
128: r[i] = r[i]-1;
129: c[i] = c[i]-1;
130: }
131: ISRestoreIndices(iscol,&c);
132: ISRestoreIndices(isrow,&r);
133: o_a = old_a2;
134: o_j = old_j2;
135: o_i = old_i2;
136: } else {
137: o_a = old_a;
138: o_j = old_j;
139: o_i = old_i;
140: }
142: /* ------------------------------------------------------------
143: Call Saad's ilutp() routine to generate the factorization
144: ------------------------------------------------------------
145: */
147: PetscMalloc(2*n*sizeof(int),&iperm);
148: PetscMalloc(2*n*sizeof(int),&jw);
149: PetscMalloc(n*sizeof(PetscScalar),&w);
151: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
152: if (ierr) {
153: switch (ierr) {
154: case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
155: case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
156: case -5: SETERRQ(1,"ilutp(), zero row encountered");
157: case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
158: case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
159: default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
160: }
161: }
163: PetscFree(w);
164: PetscFree(jw);
166: /* ------------------------------------------------------------
167: Saad's routine gives the result in Modified Sparse Row (msr)
168: Convert to Compressed Sparse Row format (csr)
169: ------------------------------------------------------------
170: */
172: PetscMalloc(n*sizeof(PetscScalar),&wk);
173: PetscMalloc((n+1)*sizeof(int),&iwk);
175: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
177: PetscFree(iwk);
178: PetscFree(wk);
180: if (reorder) {
181: PetscFree(old_a2);
182: PetscFree(old_j2);
183: PetscFree(old_i2);
184: } else {
185: /* fix permutation of old_j that the factorization introduced */
186: for (i=old_i[0]; i<old_i[n]; i++) {
187: old_j[i-1] = iperm[old_j[i-1]-1];
188: }
189: }
191: /* get rid of the shift to indices starting at 1 */
192: if (ishift) {
193: for (i=0; i<n+1; i++) {
194: old_i[i]--;
195: }
196: for (i=old_i[0];i<old_i[n];i++) {
197: old_j[i]--;
198: }
199: }
201: /* Make the factored matrix 0-based */
202: if (ishift) {
203: for (i=0; i<n+1; i++) {
204: new_i[i]--;
205: }
206: for (i=new_i[0];i<new_i[n];i++) {
207: new_j[i]--;
208: }
209: }
211: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
212: /*-- permute the right-hand-side and solution vectors --*/
213: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
214: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
215: ISGetIndices(isicol,&ic);
216: for(i=0; i<n; i++) {
217: ordcol[i] = ic[iperm[i]-1];
218: };
219: ISRestoreIndices(isicol,&ic);
220: ISDestroy(isicol);
222: PetscFree(iperm);
224: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
225: PetscFree(ordcol);
227: /*----- put together the new matrix -----*/
229: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
230: (*fact)->factor = FACTOR_LU;
231: (*fact)->assembled = PETSC_TRUE;
233: b = (Mat_SeqAIJ*)(*fact)->data;
234: PetscFree(b->imax);
235: b->sorted = PETSC_FALSE;
236: b->singlemalloc = PETSC_FALSE;
237: /* the next line frees the default space generated by the MatCreate() */
238: ierr = PetscFree(b->a);
239: ierr = PetscFree(b->ilen);
240: b->a = new_a;
241: b->j = new_j;
242: b->i = new_i;
243: b->ilen = 0;
244: b->imax = 0;
245: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
246: b->row = isirow;
247: b->col = iscolf;
248: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
249: b->maxnz = b->nz = new_i[n];
250: b->indexshift = a->indexshift;
251: MatMarkDiagonal_SeqAIJ(*fact);
252: (*fact)->info.factor_mallocs = 0;
254: MatMarkDiagonal_SeqAIJ(A);
256: /* check out for identical nodes. If found, use inode functions */
257: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
259: af = ((double)b->nz)/((double)a->nz) + .001;
260: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %gn",info->fill,af);
261: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use n",af);
262: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);n",af);
263: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.n");
265: return(0);
266: #endif
267: }
269: /*
270: Factorization code for AIJ format.
271: */
272: #undef __FUNCT__
274: int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B)
275: {
276: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
277: IS isicol;
278: int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
279: int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift;
280: int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
281: PetscReal f;
284: if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
285: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
286: ISGetIndices(isrow,&r);
287: ISGetIndices(isicol,&ic);
289: /* get new row pointers */
290: PetscMalloc((n+1)*sizeof(int),&ainew);
291: ainew[0] = -shift;
292: /* don't know how many column pointers are needed so estimate */
293: f = info->fill;
294: jmax = (int)(f*ai[n]+(!shift));
295: PetscMalloc((jmax)*sizeof(int),&ajnew);
296: /* fill is a linked list of nonzeros in active row */
297: PetscMalloc((2*n+1)*sizeof(int),&fill);
298: im = fill + n + 1;
299: /* idnew is location of diagonal in factor */
300: PetscMalloc((n+1)*sizeof(int),&idnew);
301: idnew[0] = -shift;
303: for (i=0; i<n; i++) {
304: /* first copy previous fill into linked list */
305: nnz = nz = ai[r[i]+1] - ai[r[i]];
306: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
307: ajtmp = aj + ai[r[i]] + shift;
308: fill[n] = n;
309: while (nz--) {
310: fm = n;
311: idx = ic[*ajtmp++ + shift];
312: do {
313: m = fm;
314: fm = fill[m];
315: } while (fm < idx);
316: fill[m] = idx;
317: fill[idx] = fm;
318: }
319: row = fill[n];
320: while (row < i) {
321: ajtmp = ajnew + idnew[row] + (!shift);
322: nzbd = 1 + idnew[row] - ainew[row];
323: nz = im[row] - nzbd;
324: fm = row;
325: while (nz-- > 0) {
326: idx = *ajtmp++ + shift;
327: nzbd++;
328: if (idx == i) im[row] = nzbd;
329: do {
330: m = fm;
331: fm = fill[m];
332: } while (fm < idx);
333: if (fm != idx) {
334: fill[m] = idx;
335: fill[idx] = fm;
336: fm = idx;
337: nnz++;
338: }
339: }
340: row = fill[row];
341: }
342: /* copy new filled row into permanent storage */
343: ainew[i+1] = ainew[i] + nnz;
344: if (ainew[i+1] > jmax) {
346: /* estimate how much additional space we will need */
347: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
348: /* just double the memory each time */
349: int maxadd = jmax;
350: /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
351: if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
352: jmax += maxadd;
354: /* allocate a longer ajnew */
355: PetscMalloc(jmax*sizeof(int),&ajtmp);
356: ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
357: ierr = PetscFree(ajnew);
358: ajnew = ajtmp;
359: realloc++; /* count how many times we realloc */
360: }
361: ajtmp = ajnew + ainew[i] + shift;
362: fm = fill[n];
363: nzi = 0;
364: im[i] = nnz;
365: while (nnz--) {
366: if (fm < i) nzi++;
367: *ajtmp++ = fm - shift;
368: fm = fill[fm];
369: }
370: idnew[i] = ainew[i] + nzi;
371: }
372: if (ai[n] != 0) {
373: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
374: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
375: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use n",af);
376: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);n",af);
377: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.n");
378: } else {
379: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrixn");
380: }
382: ISRestoreIndices(isrow,&r);
383: ISRestoreIndices(isicol,&ic);
385: PetscFree(fill);
387: /* put together the new matrix */
388: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);
389: PetscLogObjectParent(*B,isicol);
390: b = (Mat_SeqAIJ*)(*B)->data;
391: PetscFree(b->imax);
392: b->singlemalloc = PETSC_FALSE;
393: /* the next line frees the default space generated by the Create() */
394: PetscFree(b->a);
395: PetscFree(b->ilen);
396: ierr = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);
397: b->j = ajnew;
398: b->i = ainew;
399: b->diag = idnew;
400: b->ilen = 0;
401: b->imax = 0;
402: b->row = isrow;
403: b->col = iscol;
404: b->lu_damping = info->damping;
405: b->lu_zeropivot = info->zeropivot;
406: ierr = PetscObjectReference((PetscObject)isrow);
407: ierr = PetscObjectReference((PetscObject)iscol);
408: b->icol = isicol;
409: ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
410: /* In b structure: Free imax, ilen, old a, old j.
411: Allocate idnew, solve_work, new a, new j */
412: PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar)));
413: b->maxnz = b->nz = ainew[n] + shift;
415: (*B)->factor = FACTOR_LU;
416: (*B)->info.factor_mallocs = realloc;
417: (*B)->info.fill_ratio_given = f;
418: Mat_AIJ_CheckInode(*B,PETSC_FALSE);
419: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
421: if (ai[n] != 0) {
422: (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
423: } else {
424: (*B)->info.fill_ratio_needed = 0.0;
425: }
426: return(0);
427: }
428: /* ----------------------------------------------------------- */
429: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
431: #undef __FUNCT__
433: int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
434: {
435: Mat C = *B;
436: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
437: IS isrow = b->row,isicol = b->icol;
438: int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
439: int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift;
440: int *diag_offset = b->diag,diag;
441: int *pj,ndamp = 0;
442: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443: PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot,rs;
444: PetscTruth damp;
447: ierr = ISGetIndices(isrow,&r);
448: ierr = ISGetIndices(isicol,&ic);
449: ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
450: ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
451: rtmps = rtmp + shift; ics = ic + shift;
453: do {
454: damp = PETSC_FALSE;
455: for (i=0; i<n; i++) {
456: nz = ai[i+1] - ai[i];
457: ajtmp = aj + ai[i] + shift;
458: for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
460: /* load in initial (unfactored row) */
461: nz = a->i[r[i]+1] - a->i[r[i]];
462: ajtmpold = a->j + a->i[r[i]] + shift;
463: v = a->a + a->i[r[i]] + shift;
464: for (j=0; j<nz; j++) {
465: rtmp[ics[ajtmpold[j]]] = v[j];
466: if (ndamp && ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */
467: rtmp[ics[ajtmpold[j]]] += damping;
468: }
469: }
471: row = *ajtmp++ + shift;
472: while (row < i) {
473: pc = rtmp + row;
474: if (*pc != 0.0) {
475: pv = b->a + diag_offset[row] + shift;
476: pj = b->j + diag_offset[row] + (!shift);
477: multiplier = *pc / *pv++;
478: *pc = multiplier;
479: nz = ai[row+1] - diag_offset[row] - 1;
480: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
481: PetscLogFlops(2*nz);
482: }
483: row = *ajtmp++ + shift;
484: }
485: /* finished row so stick it into b->a */
486: pv = b->a + ai[i] + shift;
487: pj = b->j + ai[i] + shift;
488: nz = ai[i+1] - ai[i];
489: diag = diag_offset[i] - ai[i];
490: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
491: rs = 0.0;
492: for (j=0; j<nz; j++) {
493: pv[j] = rtmps[pj[j]];
494: if (j != diag) rs += PetscAbsScalar(pv[j]);
495: }
496: if (PetscAbsScalar(pv[diag]) < zeropivot*rs) {
497: if (damping) {
498: if (ndamp) damping *= 2.0;
499: damp = PETSC_TRUE;
500: ndamp++;
501: break;
502: } else {
503: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g * rs %g",i,PetscAbsScalar(pv[diag]),zeropivot,rs);
504: }
505: }
506: }
507: } while (damp);
509: /* invert diagonal entries for simplier triangular solves */
510: for (i=0; i<n; i++) {
511: b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
512: }
514: PetscFree(rtmp);
515: ISRestoreIndices(isicol,&ic);
516: ISRestoreIndices(isrow,&r);
517: C->factor = FACTOR_LU;
518: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
519: C->assembled = PETSC_TRUE;
520: PetscLogFlops(C->n);
521: if (ndamp) {
522: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %gn",ndamp,damping);
523: }
524: return(0);
525: }
526: /* ----------------------------------------------------------- */
527: #undef __FUNCT__
529: int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info)
530: {
532: Mat C;
535: MatLUFactorSymbolic(A,row,col,info,&C);
536: MatLUFactorNumeric(A,&C);
537: MatHeaderCopy(A,C);
538: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
539: return(0);
540: }
541: /* ----------------------------------------------------------- */
542: #undef __FUNCT__
544: int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
545: {
546: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
547: IS iscol = a->col,isrow = a->row;
548: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
549: int nz,shift = a->indexshift,*rout,*cout;
550: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
553: if (!n) return(0);
555: VecGetArray(bb,&b);
556: VecGetArray(xx,&x);
557: tmp = a->solve_work;
559: ISGetIndices(isrow,&rout); r = rout;
560: ISGetIndices(iscol,&cout); c = cout + (n-1);
562: /* forward solve the lower triangular */
563: tmp[0] = b[*r++];
564: tmps = tmp + shift;
565: for (i=1; i<n; i++) {
566: v = aa + ai[i] + shift;
567: vi = aj + ai[i] + shift;
568: nz = a->diag[i] - ai[i];
569: sum = b[*r++];
570: while (nz--) sum -= *v++ * tmps[*vi++];
571: tmp[i] = sum;
572: }
574: /* backward solve the upper triangular */
575: for (i=n-1; i>=0; i--){
576: v = aa + a->diag[i] + (!shift);
577: vi = aj + a->diag[i] + (!shift);
578: nz = ai[i+1] - a->diag[i] - 1;
579: sum = tmp[i];
580: while (nz--) sum -= *v++ * tmps[*vi++];
581: x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
582: }
584: ISRestoreIndices(isrow,&rout);
585: ISRestoreIndices(iscol,&cout);
586: VecRestoreArray(bb,&b);
587: VecRestoreArray(xx,&x);
588: PetscLogFlops(2*a->nz - A->n);
589: return(0);
590: }
592: /* ----------------------------------------------------------- */
593: #undef __FUNCT__
595: int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
596: {
597: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
598: int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
599: PetscScalar *x,*b,*aa = a->a;
600: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
601: int adiag_i,i,*vi,nz,ai_i;
602: PetscScalar *v,sum;
603: #endif
606: if (!n) return(0);
607: if (a->indexshift) {
608: MatSolve_SeqAIJ(A,bb,xx);
609: return(0);
610: }
612: VecGetArray(bb,&b);
613: VecGetArray(xx,&x);
615: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
616: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
617: #else
618: /* forward solve the lower triangular */
619: x[0] = b[0];
620: for (i=1; i<n; i++) {
621: ai_i = ai[i];
622: v = aa + ai_i;
623: vi = aj + ai_i;
624: nz = adiag[i] - ai_i;
625: sum = b[i];
626: while (nz--) sum -= *v++ * x[*vi++];
627: x[i] = sum;
628: }
630: /* backward solve the upper triangular */
631: for (i=n-1; i>=0; i--){
632: adiag_i = adiag[i];
633: v = aa + adiag_i + 1;
634: vi = aj + adiag_i + 1;
635: nz = ai[i+1] - adiag_i - 1;
636: sum = x[i];
637: while (nz--) sum -= *v++ * x[*vi++];
638: x[i] = sum*aa[adiag_i];
639: }
640: #endif
641: PetscLogFlops(2*a->nz - A->n);
642: VecRestoreArray(bb,&b);
643: VecRestoreArray(xx,&x);
644: return(0);
645: }
647: #undef __FUNCT__
649: int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
650: {
651: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
652: IS iscol = a->col,isrow = a->row;
653: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
654: int nz,shift = a->indexshift,*rout,*cout;
655: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
658: if (yy != xx) {VecCopy(yy,xx);}
660: VecGetArray(bb,&b);
661: VecGetArray(xx,&x);
662: tmp = a->solve_work;
664: ISGetIndices(isrow,&rout); r = rout;
665: ISGetIndices(iscol,&cout); c = cout + (n-1);
667: /* forward solve the lower triangular */
668: tmp[0] = b[*r++];
669: for (i=1; i<n; i++) {
670: v = aa + ai[i] + shift;
671: vi = aj + ai[i] + shift;
672: nz = a->diag[i] - ai[i];
673: sum = b[*r++];
674: while (nz--) sum -= *v++ * tmp[*vi++ + shift];
675: tmp[i] = sum;
676: }
678: /* backward solve the upper triangular */
679: for (i=n-1; i>=0; i--){
680: v = aa + a->diag[i] + (!shift);
681: vi = aj + a->diag[i] + (!shift);
682: nz = ai[i+1] - a->diag[i] - 1;
683: sum = tmp[i];
684: while (nz--) sum -= *v++ * tmp[*vi++ + shift];
685: tmp[i] = sum*aa[a->diag[i]+shift];
686: x[*c--] += tmp[i];
687: }
689: ISRestoreIndices(isrow,&rout);
690: ISRestoreIndices(iscol,&cout);
691: VecRestoreArray(bb,&b);
692: VecRestoreArray(xx,&x);
693: PetscLogFlops(2*a->nz);
695: return(0);
696: }
697: /* -------------------------------------------------------------------*/
698: #undef __FUNCT__
700: int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
701: {
702: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
703: IS iscol = a->col,isrow = a->row;
704: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
705: int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
706: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
709: VecGetArray(bb,&b);
710: VecGetArray(xx,&x);
711: tmp = a->solve_work;
713: ISGetIndices(isrow,&rout); r = rout;
714: ISGetIndices(iscol,&cout); c = cout;
716: /* copy the b into temp work space according to permutation */
717: for (i=0; i<n; i++) tmp[i] = b[c[i]];
719: /* forward solve the U^T */
720: for (i=0; i<n; i++) {
721: v = aa + diag[i] + shift;
722: vi = aj + diag[i] + (!shift);
723: nz = ai[i+1] - diag[i] - 1;
724: s1 = tmp[i];
725: s1 *= (*v++); /* multiply by inverse of diagonal entry */
726: while (nz--) {
727: tmp[*vi++ + shift] -= (*v++)*s1;
728: }
729: tmp[i] = s1;
730: }
732: /* backward solve the L^T */
733: for (i=n-1; i>=0; i--){
734: v = aa + diag[i] - 1 + shift;
735: vi = aj + diag[i] - 1 + shift;
736: nz = diag[i] - ai[i];
737: s1 = tmp[i];
738: while (nz--) {
739: tmp[*vi-- + shift] -= (*v--)*s1;
740: }
741: }
743: /* copy tmp into x according to permutation */
744: for (i=0; i<n; i++) x[r[i]] = tmp[i];
746: ISRestoreIndices(isrow,&rout);
747: ISRestoreIndices(iscol,&cout);
748: VecRestoreArray(bb,&b);
749: VecRestoreArray(xx,&x);
751: PetscLogFlops(2*a->nz-A->n);
752: return(0);
753: }
755: #undef __FUNCT__
757: int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
758: {
759: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
760: IS iscol = a->col,isrow = a->row;
761: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
762: int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
763: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
766: if (zz != xx) VecCopy(zz,xx);
768: VecGetArray(bb,&b);
769: VecGetArray(xx,&x);
770: tmp = a->solve_work;
772: ISGetIndices(isrow,&rout); r = rout;
773: ISGetIndices(iscol,&cout); c = cout;
775: /* copy the b into temp work space according to permutation */
776: for (i=0; i<n; i++) tmp[i] = b[c[i]];
778: /* forward solve the U^T */
779: for (i=0; i<n; i++) {
780: v = aa + diag[i] + shift;
781: vi = aj + diag[i] + (!shift);
782: nz = ai[i+1] - diag[i] - 1;
783: tmp[i] *= *v++;
784: while (nz--) {
785: tmp[*vi++ + shift] -= (*v++)*tmp[i];
786: }
787: }
789: /* backward solve the L^T */
790: for (i=n-1; i>=0; i--){
791: v = aa + diag[i] - 1 + shift;
792: vi = aj + diag[i] - 1 + shift;
793: nz = diag[i] - ai[i];
794: while (nz--) {
795: tmp[*vi-- + shift] -= (*v--)*tmp[i];
796: }
797: }
799: /* copy tmp into x according to permutation */
800: for (i=0; i<n; i++) x[r[i]] += tmp[i];
802: ISRestoreIndices(isrow,&rout);
803: ISRestoreIndices(iscol,&cout);
804: VecRestoreArray(bb,&b);
805: VecRestoreArray(xx,&x);
807: PetscLogFlops(2*a->nz);
808: return(0);
809: }
810: /* ----------------------------------------------------------------*/
811: EXTERN int MatMissingDiagonal_SeqAIJ(Mat);
813: #undef __FUNCT__
815: int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
816: {
817: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
818: IS isicol;
819: int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
820: int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
821: int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
822: int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
823: PetscTruth col_identity,row_identity;
824: PetscReal f;
825:
827: f = info->fill;
828: levels = (int)info->levels;
829: diagonal_fill = (int)info->diagonal_fill;
830: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
832: /* special case that simply copies fill pattern */
833: ISIdentity(isrow,&row_identity);
834: ISIdentity(iscol,&col_identity);
835: if (!levels && row_identity && col_identity) {
836: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
837: (*fact)->factor = FACTOR_LU;
838: b = (Mat_SeqAIJ*)(*fact)->data;
839: if (!b->diag) {
840: MatMarkDiagonal_SeqAIJ(*fact);
841: }
842: MatMissingDiagonal_SeqAIJ(*fact);
843: b->row = isrow;
844: b->col = iscol;
845: b->icol = isicol;
846: b->lu_damping = info->damping;
847: b->lu_zeropivot = info->zeropivot;
848: ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
849: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
850: ierr = PetscObjectReference((PetscObject)isrow);
851: ierr = PetscObjectReference((PetscObject)iscol);
852: return(0);
853: }
855: ISGetIndices(isrow,&r);
856: ISGetIndices(isicol,&ic);
858: /* get new row pointers */
859: PetscMalloc((n+1)*sizeof(int),&ainew);
860: ainew[0] = -shift;
861: /* don't know how many column pointers are needed so estimate */
862: jmax = (int)(f*(ai[n]+!shift));
863: PetscMalloc((jmax)*sizeof(int),&ajnew);
864: /* ajfill is level of fill for each fill entry */
865: PetscMalloc((jmax)*sizeof(int),&ajfill);
866: /* fill is a linked list of nonzeros in active row */
867: PetscMalloc((n+1)*sizeof(int),&fill);
868: /* im is level for each filled value */
869: PetscMalloc((n+1)*sizeof(int),&im);
870: /* dloc is location of diagonal in factor */
871: PetscMalloc((n+1)*sizeof(int),&dloc);
872: dloc[0] = 0;
873: for (prow=0; prow<n; prow++) {
875: /* copy current row into linked list */
876: nzf = nz = ai[r[prow]+1] - ai[r[prow]];
877: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
878: xi = aj + ai[r[prow]] + shift;
879: fill[n] = n;
880: fill[prow] = -1; /* marker to indicate if diagonal exists */
881: while (nz--) {
882: fm = n;
883: idx = ic[*xi++ + shift];
884: do {
885: m = fm;
886: fm = fill[m];
887: } while (fm < idx);
888: fill[m] = idx;
889: fill[idx] = fm;
890: im[idx] = 0;
891: }
893: /* make sure diagonal entry is included */
894: if (diagonal_fill && fill[prow] == -1) {
895: fm = n;
896: while (fill[fm] < prow) fm = fill[fm];
897: fill[prow] = fill[fm]; /* insert diagonal into linked list */
898: fill[fm] = prow;
899: im[prow] = 0;
900: nzf++;
901: dcount++;
902: }
904: nzi = 0;
905: row = fill[n];
906: while (row < prow) {
907: incrlev = im[row] + 1;
908: nz = dloc[row];
909: xi = ajnew + ainew[row] + shift + nz + 1;
910: flev = ajfill + ainew[row] + shift + nz + 1;
911: nnz = ainew[row+1] - ainew[row] - nz - 1;
912: fm = row;
913: while (nnz-- > 0) {
914: idx = *xi++ + shift;
915: if (*flev + incrlev > levels) {
916: flev++;
917: continue;
918: }
919: do {
920: m = fm;
921: fm = fill[m];
922: } while (fm < idx);
923: if (fm != idx) {
924: im[idx] = *flev + incrlev;
925: fill[m] = idx;
926: fill[idx] = fm;
927: fm = idx;
928: nzf++;
929: } else {
930: if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
931: }
932: flev++;
933: }
934: row = fill[row];
935: nzi++;
936: }
937: /* copy new filled row into permanent storage */
938: ainew[prow+1] = ainew[prow] + nzf;
939: if (ainew[prow+1] > jmax-shift) {
941: /* estimate how much additional space we will need */
942: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
943: /* just double the memory each time */
944: /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
945: int maxadd = jmax;
946: if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
947: jmax += maxadd;
949: /* allocate a longer ajnew and ajfill */
950: ierr = PetscMalloc(jmax*sizeof(int),&xi);
951: ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
952: ierr = PetscFree(ajnew);
953: ajnew = xi;
954: ierr = PetscMalloc(jmax*sizeof(int),&xi);
955: ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
956: ierr = PetscFree(ajfill);
957: ajfill = xi;
958: realloc++; /* count how many times we realloc */
959: }
960: xi = ajnew + ainew[prow] + shift;
961: flev = ajfill + ainew[prow] + shift;
962: dloc[prow] = nzi;
963: fm = fill[n];
964: while (nzf--) {
965: *xi++ = fm - shift;
966: *flev++ = im[fm];
967: fm = fill[fm];
968: }
969: /* make sure row has diagonal entry */
970: if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
971: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrixn
972: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
973: }
974: }
975: PetscFree(ajfill);
976: ISRestoreIndices(isrow,&r);
977: ISRestoreIndices(isicol,&ic);
978: PetscFree(fill);
979: PetscFree(im);
981: {
982: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
983: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
984: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use n",af);
985: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);n",af);
986: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.n");
987: if (diagonal_fill) {
988: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
989: }
990: }
992: /* put together the new matrix */
993: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
994: PetscLogObjectParent(*fact,isicol);
995: b = (Mat_SeqAIJ*)(*fact)->data;
996: PetscFree(b->imax);
997: b->singlemalloc = PETSC_FALSE;
998: len = (ainew[n] + shift)*sizeof(PetscScalar);
999: /* the next line frees the default space generated by the Create() */
1000: PetscFree(b->a);
1001: PetscFree(b->ilen);
1002: PetscMalloc(len+1,&b->a);
1003: b->j = ajnew;
1004: b->i = ainew;
1005: for (i=0; i<n; i++) dloc[i] += ainew[i];
1006: b->diag = dloc;
1007: b->ilen = 0;
1008: b->imax = 0;
1009: b->row = isrow;
1010: b->col = iscol;
1011: ierr = PetscObjectReference((PetscObject)isrow);
1012: ierr = PetscObjectReference((PetscObject)iscol);
1013: b->icol = isicol;
1014: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1015: /* In b structure: Free imax, ilen, old a, old j.
1016: Allocate dloc, solve_work, new a, new j */
1017: PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar)));
1018: b->maxnz = b->nz = ainew[n] + shift;
1019: b->lu_damping = info->damping;
1020: b->lu_zeropivot = info->zeropivot;
1021: (*fact)->factor = FACTOR_LU;
1022: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
1023: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1025: (*fact)->info.factor_mallocs = realloc;
1026: (*fact)->info.fill_ratio_given = f;
1027: (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1028: return(0);
1029: }
1031: #include src/mat/impls/sbaij/seq/sbaij.h
1032: typedef struct {
1033: int levels;
1034: } Mat_SeqAIJ_SeqSBAIJ;
1036: #undef __FUNCT__
1038: int MatCholeskyFactorNumeric_SeqAIJ(Mat A,Mat *fact)
1039: {
1040: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1041: int ierr;
1042: Mat_SeqAIJ_SeqSBAIJ *ptr = (Mat_SeqAIJ_SeqSBAIJ*)(*fact)->spptr;
1045: if (!a->sbaijMat){
1046: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1047: }
1048: MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(a->sbaijMat,fact);
1049: if (ptr->levels > 0){
1050: MatDestroy(a->sbaijMat);
1051: }
1052: a->sbaijMat = PETSC_NULL;
1053: PetscFree(ptr);
1054:
1055: return(0);
1056: }
1058: #undef __FUNCT__
1060: int MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,PetscReal fill,int levels,Mat *fact)
1061: {
1062: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1063: Mat_SeqSBAIJ *b;
1064: int ierr;
1065: PetscTruth perm_identity;
1066: Mat_SeqAIJ_SeqSBAIJ *ptr;
1067:
1069: ISIdentity(perm,&perm_identity);
1070: if (!perm_identity){
1071: SETERRQ(1,"Non-identity permutation is not supported yet");
1072: }
1073: if (!a->sbaijMat){
1074: MatConvert(A,MATSEQSBAIJ,&a->sbaijMat);
1075: }
1077: if (levels > 0){
1078: MatICCFactorSymbolic(a->sbaijMat,perm,fill,levels,fact);
1080: } else { /* in-place icc(0) */
1081: (*fact) = a->sbaijMat;
1082: (*fact)->factor = FACTOR_CHOLESKY;
1083: b = (Mat_SeqSBAIJ*)(*fact)->data;
1084: b->row = perm;
1085: b->icol = perm;
1086: ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
1087: ierr = PetscObjectReference((PetscObject)perm);
1088: ierr = PetscObjectReference((PetscObject)perm);
1089: }
1091: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1092: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1094: ierr = PetscNew(Mat_SeqAIJ_SeqSBAIJ,&ptr);
1095: (*fact)->spptr = (void*)ptr;
1096: ptr->levels = levels; /* to be used by CholeskyFactorNumeric_SeqAIJ() */
1097:
1098: return(0);
1099: }