Actual source code: aijfact.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/inline/dot.h
5: #include src/inline/spops.h
6: #include petscbt.h
7: #include src/mat/utils/freespace.h
11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12: {
15: SETERRQ(PETSC_ERR_SUP,"Code not written");
16: #if !defined(PETSC_USE_DEBUG)
17: return(0);
18: #endif
19: }
22: EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
24: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28: #endif
32: /* ------------------------------------------------------------
34: This interface was contribed by Tony Caola
36: This routine is an interface to the pivoting drop-tolerance
37: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38: SPARSEKIT2.
40: The SPARSEKIT2 routines used here are covered by the GNU
41: copyright; see the file gnu in this directory.
43: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44: help in getting this routine ironed out.
46: The major drawback to this routine is that if info->fill is
47: not large enough it fails rather than allocating more space;
48: this can be fixed by hacking/improving the f2c version of
49: Yousef Saad's code.
51: ------------------------------------------------------------
52: */
53: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54: {
55: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
57: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58: You can obtain the drop tolerance routines by installing PETSc from\n\
59: www.mcs.anl.gov/petsc\n");
60: #else
61: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
62: IS iscolf,isicol,isirow;
63: PetscTruth reorder;
64: PetscErrorCode ierr,sierr;
65: PetscInt *c,*r,*ic,i,n = A->m;
66: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67: PetscInt *ordcol,*iwk,*iperm,*jw;
68: PetscInt jmax,lfill,job,*o_i,*o_j;
69: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70: PetscReal af;
74: if (info->dt == PETSC_DEFAULT) info->dt = .005;
75: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
77: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
78: lfill = (PetscInt)(info->dtcount/2.0);
79: jmax = (PetscInt)(info->fill*a->nz);
82: /* ------------------------------------------------------------
83: If reorder=.TRUE., then the original matrix has to be
84: reordered to reflect the user selected ordering scheme, and
85: then de-reordered so it is in it's original format.
86: Because Saad's dperm() is NOT in place, we have to copy
87: the original matrix and allocate more storage. . .
88: ------------------------------------------------------------
89: */
91: /* set reorder to true if either isrow or iscol is not identity */
92: ISIdentity(isrow,&reorder);
93: if (reorder) {ISIdentity(iscol,&reorder);}
94: reorder = PetscNot(reorder);
96:
97: /* storage for ilu factor */
98: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
99: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
100: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
101: PetscMalloc(n*sizeof(PetscInt),&ordcol);
103: /* ------------------------------------------------------------
104: Make sure that everything is Fortran formatted (1-Based)
105: ------------------------------------------------------------
106: */
107: for (i=old_i[0];i<old_i[n];i++) {
108: old_j[i]++;
109: }
110: for(i=0;i<n+1;i++) {
111: old_i[i]++;
112: };
113:
115: if (reorder) {
116: ISGetIndices(iscol,&c);
117: ISGetIndices(isrow,&r);
118: for(i=0;i<n;i++) {
119: r[i] = r[i]+1;
120: c[i] = c[i]+1;
121: }
122: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
123: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
124: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
125: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126: for (i=0;i<n;i++) {
127: r[i] = r[i]-1;
128: c[i] = c[i]-1;
129: }
130: ISRestoreIndices(iscol,&c);
131: ISRestoreIndices(isrow,&r);
132: o_a = old_a2;
133: o_j = old_j2;
134: o_i = old_i2;
135: } else {
136: o_a = old_a;
137: o_j = old_j;
138: o_i = old_i;
139: }
141: /* ------------------------------------------------------------
142: Call Saad's ilutp() routine to generate the factorization
143: ------------------------------------------------------------
144: */
146: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
147: PetscMalloc(2*n*sizeof(PetscInt),&jw);
148: PetscMalloc(n*sizeof(PetscScalar),&w);
150: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151: if (sierr) {
152: switch (sierr) {
153: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
154: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
155: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159: }
160: }
162: PetscFree(w);
163: PetscFree(jw);
165: /* ------------------------------------------------------------
166: Saad's routine gives the result in Modified Sparse Row (msr)
167: Convert to Compressed Sparse Row format (csr)
168: ------------------------------------------------------------
169: */
171: PetscMalloc(n*sizeof(PetscScalar),&wk);
172: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
174: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
176: PetscFree(iwk);
177: PetscFree(wk);
179: if (reorder) {
180: PetscFree(old_a2);
181: PetscFree(old_j2);
182: PetscFree(old_i2);
183: } else {
184: /* fix permutation of old_j that the factorization introduced */
185: for (i=old_i[0]; i<old_i[n]; i++) {
186: old_j[i-1] = iperm[old_j[i-1]-1];
187: }
188: }
190: /* get rid of the shift to indices starting at 1 */
191: for (i=0; i<n+1; i++) {
192: old_i[i]--;
193: }
194: for (i=old_i[0];i<old_i[n];i++) {
195: old_j[i]--;
196: }
197:
198: /* Make the factored matrix 0-based */
199: for (i=0; i<n+1; i++) {
200: new_i[i]--;
201: }
202: for (i=new_i[0];i<new_i[n];i++) {
203: new_j[i]--;
204: }
206: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207: /*-- permute the right-hand-side and solution vectors --*/
208: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
210: ISGetIndices(isicol,&ic);
211: for(i=0; i<n; i++) {
212: ordcol[i] = ic[iperm[i]-1];
213: };
214: ISRestoreIndices(isicol,&ic);
215: ISDestroy(isicol);
217: PetscFree(iperm);
219: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
220: PetscFree(ordcol);
222: /*----- put together the new matrix -----*/
224: MatCreate(A->comm,fact);
225: MatSetSizes(*fact,n,n,n,n);
226: MatSetType(*fact,A->type_name);
227: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
228: (*fact)->factor = FACTOR_LU;
229: (*fact)->assembled = PETSC_TRUE;
231: b = (Mat_SeqAIJ*)(*fact)->data;
232: b->freedata = PETSC_TRUE;
233: b->sorted = PETSC_FALSE;
234: b->singlemalloc = PETSC_FALSE;
235: b->a = new_a;
236: b->j = new_j;
237: b->i = new_i;
238: b->ilen = 0;
239: b->imax = 0;
240: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241: b->row = isirow;
242: b->col = iscolf;
243: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
244: b->maxnz = b->nz = new_i[n];
245: MatMarkDiagonal_SeqAIJ(*fact);
246: (*fact)->info.factor_mallocs = 0;
248: MatMarkDiagonal_SeqAIJ(A);
250: af = ((double)b->nz)/((double)a->nz) + .001;
251: PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));
252: PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));
253: PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));
254: PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));
256: MatILUDTFactor_Inode(A,isrow,iscol,info,fact);
258: return(0);
259: #endif
260: }
264: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265: {
266: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
267: IS isicol;
269: PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270: PetscInt *bi,*bj,*ajtmp;
271: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272: PetscReal f;
273: PetscInt nlnk,*lnk,k,*cols,**bi_ptr;
274: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275: PetscBT lnkbt;
278: if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
280: ISGetIndices(isrow,&r);
281: ISGetIndices(isicol,&ic);
283: /* get new row pointers */
284: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
285: bi[0] = 0;
287: /* bdiag is location of diagonal in factor */
288: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
289: bdiag[0] = 0;
291: /* linked list for storing column indices of the active row */
292: nlnk = n + 1;
293: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
295: PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);
296: im = cols + n;
297: bi_ptr = (PetscInt**)(im + n);
299: /* initial FreeSpace size is f*(ai[n]+1) */
300: f = info->fill;
301: GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);
302: current_space = free_space;
304: for (i=0; i<n; i++) {
305: /* copy previous fill into linked list */
306: nzi = 0;
307: nnz = ai[r[i]+1] - ai[r[i]];
308: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309: ajtmp = aj + ai[r[i]];
310: for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
311: PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);
312: nzi += nlnk;
314: /* add pivot rows into linked list */
315: row = lnk[n];
316: while (row < i) {
317: nzbd = bdiag[row] - bi[row] + 1;
318: ajtmp = bi_ptr[row] + nzbd;
319: nnz = im[row] - nzbd; /* num of columns with row<indices<=i ? */
320: PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
321: nzi += nlnk;
322: row = lnk[row];
323: }
325: bi[i+1] = bi[i] + nzi;
326: im[i] = nzi;
328: /* mark bdiag */
329: nzbd = 0;
330: nnz = nzi;
331: k = lnk[n];
332: while (nnz-- && k < i){
333: nzbd++;
334: k = lnk[k];
335: }
336: bdiag[i] = bi[i] + nzbd;
338: /* if free space is not available, make more free space */
339: if (current_space->local_remaining<nzi) {
340: nnz = (n - i)*nzi; /* estimated and max additional space needed */
341: GetMoreSpace(nnz,¤t_space);
342: reallocs++;
343: }
345: /* copy data into free space, then initialize lnk */
346: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
347: bi_ptr[i] = current_space->array;
348: current_space->array += nzi;
349: current_space->local_used += nzi;
350: current_space->local_remaining -= nzi;
351: }
352: #if defined(PETSC_USE_DEBUG)
353: if (ai[n] != 0) {
354: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
355: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));
356: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));
357: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));
358: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));
359: } else {
360: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));
361: }
362: #endif
364: ISRestoreIndices(isrow,&r);
365: ISRestoreIndices(isicol,&ic);
367: /* destroy list of free space and other temporary array(s) */
368: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
369: MakeSpaceContiguous(&free_space,bj);
370: PetscLLDestroy(lnk,lnkbt);
371: PetscFree(cols);
373: /* put together the new matrix */
374: MatCreate(A->comm,B);
375: MatSetSizes(*B,n,n,n,n);
376: MatSetType(*B,A->type_name);
377: MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
378: PetscLogObjectParent(*B,isicol);
379: b = (Mat_SeqAIJ*)(*B)->data;
380: b->freedata = PETSC_TRUE;
381: b->singlemalloc = PETSC_FALSE;
382: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
383: b->j = bj;
384: b->i = bi;
385: b->diag = bdiag;
386: b->ilen = 0;
387: b->imax = 0;
388: b->row = isrow;
389: b->col = iscol;
390: PetscObjectReference((PetscObject)isrow);
391: PetscObjectReference((PetscObject)iscol);
392: b->icol = isicol;
393: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
395: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
396: PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
397: b->maxnz = b->nz = bi[n] ;
399: (*B)->factor = FACTOR_LU;
400: (*B)->info.factor_mallocs = reallocs;
401: (*B)->info.fill_ratio_given = f;
403: if (ai[n] != 0) {
404: (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
405: } else {
406: (*B)->info.fill_ratio_needed = 0.0;
407: }
408: MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
409: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
410: return(0);
411: }
413: /* ----------------------------------------------------------- */
416: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
417: {
418: Mat C=*B;
419: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
420: IS isrow = b->row,isicol = b->icol;
422: PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
423: PetscInt *ajtmp,*bjtmp,nz,row,*ics;
424: PetscInt *diag_offset = b->diag,diag,*pj;
425: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
426: PetscScalar d;
427: PetscReal rs;
428: LUShift_Ctx sctx;
429: PetscInt newshift;
432: ISGetIndices(isrow,&r);
433: ISGetIndices(isicol,&ic);
434: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
435: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
436: rtmps = rtmp; ics = ic;
438: if (!a->diag) {
439: MatMarkDiagonal_SeqAIJ(A);
440: }
441: /* if both shift schemes are chosen by user, only use info->shiftpd */
442: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
443: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
444: PetscInt *aai = a->i,*ddiag = a->diag;
445: sctx.shift_top = 0;
446: for (i=0; i<n; i++) {
447: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
448: d = (a->a)[ddiag[i]];
449: rs = -PetscAbsScalar(d) - PetscRealPart(d);
450: v = a->a+aai[i];
451: nz = aai[i+1] - aai[i];
452: for (j=0; j<nz; j++)
453: rs += PetscAbsScalar(v[j]);
454: if (rs>sctx.shift_top) sctx.shift_top = rs;
455: }
456: if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
457: sctx.shift_top *= 1.1;
458: sctx.nshift_max = 5;
459: sctx.shift_lo = 0.;
460: sctx.shift_hi = 1.;
461: }
463: sctx.shift_amount = 0;
464: sctx.nshift = 0;
465: do {
466: sctx.lushift = PETSC_FALSE;
467: for (i=0; i<n; i++){
468: nz = bi[i+1] - bi[i];
469: bjtmp = bj + bi[i];
470: for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
472: /* load in initial (unfactored row) */
473: nz = a->i[r[i]+1] - a->i[r[i]];
474: ajtmp = a->j + a->i[r[i]];
475: v = a->a + a->i[r[i]];
476: for (j=0; j<nz; j++) {
477: rtmp[ics[ajtmp[j]]] = v[j];
478: }
479: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
481: row = *bjtmp++;
482: while (row < i) {
483: pc = rtmp + row;
484: if (*pc != 0.0) {
485: pv = b->a + diag_offset[row];
486: pj = b->j + diag_offset[row] + 1;
487: multiplier = *pc / *pv++;
488: *pc = multiplier;
489: nz = bi[row+1] - diag_offset[row] - 1;
490: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
491: PetscLogFlops(2*nz);
492: }
493: row = *bjtmp++;
494: }
495: /* finished row so stick it into b->a */
496: pv = b->a + bi[i] ;
497: pj = b->j + bi[i] ;
498: nz = bi[i+1] - bi[i];
499: diag = diag_offset[i] - bi[i];
500: rs = 0.0;
501: for (j=0; j<nz; j++) {
502: pv[j] = rtmps[pj[j]];
503: if (j != diag) rs += PetscAbsScalar(pv[j]);
504: }
506: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
507: sctx.rs = rs;
508: sctx.pv = pv[diag];
509: MatLUCheckShift_inline(info,sctx,newshift);
510: if (newshift == 1){
511: break; /* sctx.shift_amount is updated */
512: } else if (newshift == -1){
513: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
514: }
515: }
517: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
518: /*
519: * if no shift in this attempt & shifting & started shifting & can refine,
520: * then try lower shift
521: */
522: sctx.shift_hi = info->shift_fraction;
523: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
524: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
525: sctx.lushift = PETSC_TRUE;
526: sctx.nshift++;
527: }
528: } while (sctx.lushift);
530: /* invert diagonal entries for simplier triangular solves */
531: for (i=0; i<n; i++) {
532: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
533: }
535: PetscFree(rtmp);
536: ISRestoreIndices(isicol,&ic);
537: ISRestoreIndices(isrow,&r);
538: C->factor = FACTOR_LU;
539: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
540: C->assembled = PETSC_TRUE;
541: PetscLogFlops(C->n);
542: if (sctx.nshift){
543: if (info->shiftnz) {
544: PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
545: } else if (info->shiftpd) {
546: PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));
547: }
548: }
549: return(0);
550: }
554: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
555: {
557: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
558: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
559: return(0);
560: }
563: /* ----------------------------------------------------------- */
566: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
567: {
569: Mat C;
572: MatLUFactorSymbolic(A,row,col,info,&C);
573: MatLUFactorNumeric(A,info,&C);
574: MatHeaderCopy(A,C);
575: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
576: return(0);
577: }
578: /* ----------------------------------------------------------- */
581: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
582: {
583: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
584: IS iscol = a->col,isrow = a->row;
586: PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
587: PetscInt nz,*rout,*cout;
588: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
591: if (!n) return(0);
593: VecGetArray(bb,&b);
594: VecGetArray(xx,&x);
595: tmp = a->solve_work;
597: ISGetIndices(isrow,&rout); r = rout;
598: ISGetIndices(iscol,&cout); c = cout + (n-1);
600: /* forward solve the lower triangular */
601: tmp[0] = b[*r++];
602: tmps = tmp;
603: for (i=1; i<n; i++) {
604: v = aa + ai[i] ;
605: vi = aj + ai[i] ;
606: nz = a->diag[i] - ai[i];
607: sum = b[*r++];
608: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
609: tmp[i] = sum;
610: }
612: /* backward solve the upper triangular */
613: for (i=n-1; i>=0; i--){
614: v = aa + a->diag[i] + 1;
615: vi = aj + a->diag[i] + 1;
616: nz = ai[i+1] - a->diag[i] - 1;
617: sum = tmp[i];
618: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
619: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
620: }
622: ISRestoreIndices(isrow,&rout);
623: ISRestoreIndices(iscol,&cout);
624: VecRestoreArray(bb,&b);
625: VecRestoreArray(xx,&x);
626: PetscLogFlops(2*a->nz - A->n);
627: return(0);
628: }
630: /* ----------------------------------------------------------- */
633: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
634: {
635: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
637: PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
638: PetscScalar *x,*b,*aa = a->a;
639: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
640: PetscInt adiag_i,i,*vi,nz,ai_i;
641: PetscScalar *v,sum;
642: #endif
645: if (!n) return(0);
647: VecGetArray(bb,&b);
648: VecGetArray(xx,&x);
650: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
651: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
652: #else
653: /* forward solve the lower triangular */
654: x[0] = b[0];
655: for (i=1; i<n; i++) {
656: ai_i = ai[i];
657: v = aa + ai_i;
658: vi = aj + ai_i;
659: nz = adiag[i] - ai_i;
660: sum = b[i];
661: while (nz--) sum -= *v++ * x[*vi++];
662: x[i] = sum;
663: }
665: /* backward solve the upper triangular */
666: for (i=n-1; i>=0; i--){
667: adiag_i = adiag[i];
668: v = aa + adiag_i + 1;
669: vi = aj + adiag_i + 1;
670: nz = ai[i+1] - adiag_i - 1;
671: sum = x[i];
672: while (nz--) sum -= *v++ * x[*vi++];
673: x[i] = sum*aa[adiag_i];
674: }
675: #endif
676: PetscLogFlops(2*a->nz - A->n);
677: VecRestoreArray(bb,&b);
678: VecRestoreArray(xx,&x);
679: return(0);
680: }
684: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
685: {
686: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
687: IS iscol = a->col,isrow = a->row;
689: PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
690: PetscInt nz,*rout,*cout;
691: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
694: if (yy != xx) {VecCopy(yy,xx);}
696: VecGetArray(bb,&b);
697: VecGetArray(xx,&x);
698: tmp = a->solve_work;
700: ISGetIndices(isrow,&rout); r = rout;
701: ISGetIndices(iscol,&cout); c = cout + (n-1);
703: /* forward solve the lower triangular */
704: tmp[0] = b[*r++];
705: for (i=1; i<n; i++) {
706: v = aa + ai[i] ;
707: vi = aj + ai[i] ;
708: nz = a->diag[i] - ai[i];
709: sum = b[*r++];
710: while (nz--) sum -= *v++ * tmp[*vi++ ];
711: tmp[i] = sum;
712: }
714: /* backward solve the upper triangular */
715: for (i=n-1; i>=0; i--){
716: v = aa + a->diag[i] + 1;
717: vi = aj + a->diag[i] + 1;
718: nz = ai[i+1] - a->diag[i] - 1;
719: sum = tmp[i];
720: while (nz--) sum -= *v++ * tmp[*vi++ ];
721: tmp[i] = sum*aa[a->diag[i]];
722: x[*c--] += tmp[i];
723: }
725: ISRestoreIndices(isrow,&rout);
726: ISRestoreIndices(iscol,&cout);
727: VecRestoreArray(bb,&b);
728: VecRestoreArray(xx,&x);
729: PetscLogFlops(2*a->nz);
731: return(0);
732: }
733: /* -------------------------------------------------------------------*/
736: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
737: {
738: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
739: IS iscol = a->col,isrow = a->row;
741: PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
742: PetscInt nz,*rout,*cout,*diag = a->diag;
743: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
746: VecGetArray(bb,&b);
747: VecGetArray(xx,&x);
748: tmp = a->solve_work;
750: ISGetIndices(isrow,&rout); r = rout;
751: ISGetIndices(iscol,&cout); c = cout;
753: /* copy the b into temp work space according to permutation */
754: for (i=0; i<n; i++) tmp[i] = b[c[i]];
756: /* forward solve the U^T */
757: for (i=0; i<n; i++) {
758: v = aa + diag[i] ;
759: vi = aj + diag[i] + 1;
760: nz = ai[i+1] - diag[i] - 1;
761: s1 = tmp[i];
762: s1 *= (*v++); /* multiply by inverse of diagonal entry */
763: while (nz--) {
764: tmp[*vi++ ] -= (*v++)*s1;
765: }
766: tmp[i] = s1;
767: }
769: /* backward solve the L^T */
770: for (i=n-1; i>=0; i--){
771: v = aa + diag[i] - 1 ;
772: vi = aj + diag[i] - 1 ;
773: nz = diag[i] - ai[i];
774: s1 = tmp[i];
775: while (nz--) {
776: tmp[*vi-- ] -= (*v--)*s1;
777: }
778: }
780: /* copy tmp into x according to permutation */
781: for (i=0; i<n; i++) x[r[i]] = tmp[i];
783: ISRestoreIndices(isrow,&rout);
784: ISRestoreIndices(iscol,&cout);
785: VecRestoreArray(bb,&b);
786: VecRestoreArray(xx,&x);
788: PetscLogFlops(2*a->nz-A->n);
789: return(0);
790: }
794: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
795: {
796: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
797: IS iscol = a->col,isrow = a->row;
799: PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
800: PetscInt nz,*rout,*cout,*diag = a->diag;
801: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
804: if (zz != xx) {VecCopy(zz,xx);}
806: VecGetArray(bb,&b);
807: VecGetArray(xx,&x);
808: tmp = a->solve_work;
810: ISGetIndices(isrow,&rout); r = rout;
811: ISGetIndices(iscol,&cout); c = cout;
813: /* copy the b into temp work space according to permutation */
814: for (i=0; i<n; i++) tmp[i] = b[c[i]];
816: /* forward solve the U^T */
817: for (i=0; i<n; i++) {
818: v = aa + diag[i] ;
819: vi = aj + diag[i] + 1;
820: nz = ai[i+1] - diag[i] - 1;
821: tmp[i] *= *v++;
822: while (nz--) {
823: tmp[*vi++ ] -= (*v++)*tmp[i];
824: }
825: }
827: /* backward solve the L^T */
828: for (i=n-1; i>=0; i--){
829: v = aa + diag[i] - 1 ;
830: vi = aj + diag[i] - 1 ;
831: nz = diag[i] - ai[i];
832: while (nz--) {
833: tmp[*vi-- ] -= (*v--)*tmp[i];
834: }
835: }
837: /* copy tmp into x according to permutation */
838: for (i=0; i<n; i++) x[r[i]] += tmp[i];
840: ISRestoreIndices(isrow,&rout);
841: ISRestoreIndices(iscol,&cout);
842: VecRestoreArray(bb,&b);
843: VecRestoreArray(xx,&x);
845: PetscLogFlops(2*a->nz);
846: return(0);
847: }
848: /* ----------------------------------------------------------------*/
849: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
853: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
854: {
855: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
856: IS isicol;
858: PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
859: PetscInt *bi,*cols,nnz,*cols_lvl;
860: PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
861: PetscInt i,levels,diagonal_fill;
862: PetscTruth col_identity,row_identity;
863: PetscReal f;
864: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
865: PetscBT lnkbt;
866: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
867: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
868: FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
869:
871: f = info->fill;
872: levels = (PetscInt)info->levels;
873: diagonal_fill = (PetscInt)info->diagonal_fill;
874: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
876: /* special case that simply copies fill pattern */
877: ISIdentity(isrow,&row_identity);
878: ISIdentity(iscol,&col_identity);
879: if (!levels && row_identity && col_identity) {
880: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
881: (*fact)->factor = FACTOR_LU;
882: b = (Mat_SeqAIJ*)(*fact)->data;
883: if (!b->diag) {
884: MatMarkDiagonal_SeqAIJ(*fact);
885: }
886: MatMissingDiagonal_SeqAIJ(*fact);
887: b->row = isrow;
888: b->col = iscol;
889: b->icol = isicol;
890: PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
891: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
892: PetscObjectReference((PetscObject)isrow);
893: PetscObjectReference((PetscObject)iscol);
894: return(0);
895: }
897: ISGetIndices(isrow,&r);
898: ISGetIndices(isicol,&ic);
900: /* get new row pointers */
901: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
902: bi[0] = 0;
903: /* bdiag is location of diagonal in factor */
904: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
905: bdiag[0] = 0;
907: PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
908: bjlvl_ptr = (PetscInt**)(bj_ptr + n);
910: /* create a linked list for storing column indices of the active row */
911: nlnk = n + 1;
912: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
914: /* initial FreeSpace size is f*(ai[n]+1) */
915: GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);
916: current_space = free_space;
917: GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
918: current_space_lvl = free_space_lvl;
919:
920: for (i=0; i<n; i++) {
921: nzi = 0;
922: /* copy current row into linked list */
923: nnz = ai[r[i]+1] - ai[r[i]];
924: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
925: cols = aj + ai[r[i]];
926: lnk[i] = -1; /* marker to indicate if diagonal exists */
927: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
928: nzi += nlnk;
930: /* make sure diagonal entry is included */
931: if (diagonal_fill && lnk[i] == -1) {
932: fm = n;
933: while (lnk[fm] < i) fm = lnk[fm];
934: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
935: lnk[fm] = i;
936: lnk_lvl[i] = 0;
937: nzi++; dcount++;
938: }
940: /* add pivot rows into the active row */
941: nzbd = 0;
942: prow = lnk[n];
943: while (prow < i) {
944: nnz = bdiag[prow];
945: cols = bj_ptr[prow] + nnz + 1;
946: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
947: nnz = bi[prow+1] - bi[prow] - nnz - 1;
948: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
949: nzi += nlnk;
950: prow = lnk[prow];
951: nzbd++;
952: }
953: bdiag[i] = nzbd;
954: bi[i+1] = bi[i] + nzi;
956: /* if free space is not available, make more free space */
957: if (current_space->local_remaining<nzi) {
958: nnz = nzi*(n - i); /* estimated and max additional space needed */
959: GetMoreSpace(nnz,¤t_space);
960: GetMoreSpace(nnz,¤t_space_lvl);
961: reallocs++;
962: }
964: /* copy data into free_space and free_space_lvl, then initialize lnk */
965: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
966: bj_ptr[i] = current_space->array;
967: bjlvl_ptr[i] = current_space_lvl->array;
969: /* make sure the active row i has diagonal entry */
970: if (*(bj_ptr[i]+bdiag[i]) != i) {
971: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
972: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
973: }
975: current_space->array += nzi;
976: current_space->local_used += nzi;
977: current_space->local_remaining -= nzi;
978: current_space_lvl->array += nzi;
979: current_space_lvl->local_used += nzi;
980: current_space_lvl->local_remaining -= nzi;
981: }
983: ISRestoreIndices(isrow,&r);
984: ISRestoreIndices(isicol,&ic);
986: /* destroy list of free space and other temporary arrays */
987: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
988: MakeSpaceContiguous(&free_space,bj);
989: PetscIncompleteLLDestroy(lnk,lnkbt);
990: DestroySpace(free_space_lvl);
991: PetscFree(bj_ptr);
993: #if defined(PETSC_USE_DEBUG)
994: {
995: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
996: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));
997: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));
998: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));
999: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));
1000: if (diagonal_fill) {
1001: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));
1002: }
1003: }
1004: #endif
1006: /* put together the new matrix */
1007: MatCreate(A->comm,fact);
1008: MatSetSizes(*fact,n,n,n,n);
1009: MatSetType(*fact,A->type_name);
1010: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1011: PetscLogObjectParent(*fact,isicol);
1012: b = (Mat_SeqAIJ*)(*fact)->data;
1013: b->freedata = PETSC_TRUE;
1014: b->singlemalloc = PETSC_FALSE;
1015: len = (bi[n] )*sizeof(PetscScalar);
1016: PetscMalloc(len+1,&b->a);
1017: b->j = bj;
1018: b->i = bi;
1019: for (i=0; i<n; i++) bdiag[i] += bi[i];
1020: b->diag = bdiag;
1021: b->ilen = 0;
1022: b->imax = 0;
1023: b->row = isrow;
1024: b->col = iscol;
1025: PetscObjectReference((PetscObject)isrow);
1026: PetscObjectReference((PetscObject)iscol);
1027: b->icol = isicol;
1028: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1029: /* In b structure: Free imax, ilen, old a, old j.
1030: Allocate bdiag, solve_work, new a, new j */
1031: PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1032: b->maxnz = b->nz = bi[n] ;
1033: (*fact)->factor = FACTOR_LU;
1034: (*fact)->info.factor_mallocs = reallocs;
1035: (*fact)->info.fill_ratio_given = f;
1036: (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1038: MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1039: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1041: return(0);
1042: }
1044: #include src/mat/impls/sbaij/seq/sbaij.h
1047: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1048: {
1049: Mat C = *B;
1050: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1051: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
1052: IS ip=b->row;
1054: PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1055: PetscInt *ai=a->i,*aj=a->j;
1056: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1057: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1058: PetscReal zeropivot,rs,shiftnz;
1059: PetscTruth shiftpd;
1060: ChShift_Ctx sctx;
1061: PetscInt newshift;
1064: shiftnz = info->shiftnz;
1065: shiftpd = info->shiftpd;
1066: zeropivot = info->zeropivot;
1068: ISGetIndices(ip,&rip);
1069:
1070: /* initialization */
1071: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1072: PetscMalloc(nz,&il);
1073: jl = il + mbs;
1074: rtmp = (MatScalar*)(jl + mbs);
1076: sctx.shift_amount = 0;
1077: sctx.nshift = 0;
1078: do {
1079: sctx.chshift = PETSC_FALSE;
1080: for (i=0; i<mbs; i++) {
1081: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1082: }
1083:
1084: for (k = 0; k<mbs; k++){
1085: bval = ba + bi[k];
1086: /* initialize k-th row by the perm[k]-th row of A */
1087: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1088: for (j = jmin; j < jmax; j++){
1089: col = rip[aj[j]];
1090: if (col >= k){ /* only take upper triangular entry */
1091: rtmp[col] = aa[j];
1092: *bval++ = 0.0; /* for in-place factorization */
1093: }
1094: }
1095: /* shift the diagonal of the matrix */
1096: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1098: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1099: dk = rtmp[k];
1100: i = jl[k]; /* first row to be added to k_th row */
1102: while (i < k){
1103: nexti = jl[i]; /* next row to be added to k_th row */
1105: /* compute multiplier, update diag(k) and U(i,k) */
1106: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1107: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1108: dk += uikdi*ba[ili];
1109: ba[ili] = uikdi; /* -U(i,k) */
1111: /* add multiple of row i to k-th row */
1112: jmin = ili + 1; jmax = bi[i+1];
1113: if (jmin < jmax){
1114: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1115: /* update il and jl for row i */
1116: il[i] = jmin;
1117: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1118: }
1119: i = nexti;
1120: }
1122: /* shift the diagonals when zero pivot is detected */
1123: /* compute rs=sum of abs(off-diagonal) */
1124: rs = 0.0;
1125: jmin = bi[k]+1;
1126: nz = bi[k+1] - jmin;
1127: if (nz){
1128: bcol = bj + jmin;
1129: while (nz--){
1130: rs += PetscAbsScalar(rtmp[*bcol]);
1131: bcol++;
1132: }
1133: }
1135: sctx.rs = rs;
1136: sctx.pv = dk;
1137: MatCholeskyCheckShift_inline(info,sctx,newshift);
1138: if (newshift == 1){
1139: break; /* sctx.shift_amount is updated */
1140: } else if (newshift == -1){
1141: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1142: }
1143:
1144: /* copy data into U(k,:) */
1145: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1146: jmin = bi[k]+1; jmax = bi[k+1];
1147: if (jmin < jmax) {
1148: for (j=jmin; j<jmax; j++){
1149: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1150: }
1151: /* add the k-th row into il and jl */
1152: il[k] = jmin;
1153: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1154: }
1155: }
1156: } while (sctx.chshift);
1157: PetscFree(il);
1159: ISRestoreIndices(ip,&rip);
1160: C->factor = FACTOR_CHOLESKY;
1161: C->assembled = PETSC_TRUE;
1162: C->preallocated = PETSC_TRUE;
1163: PetscLogFlops(C->m);
1164: if (sctx.nshift){
1165: if (shiftnz) {
1166: PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
1167: } else if (shiftpd) {
1168: PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
1169: }
1170: }
1171: return(0);
1172: }
1176: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1177: {
1178: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1179: Mat_SeqSBAIJ *b;
1180: Mat B;
1182: PetscTruth perm_identity;
1183: PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1184: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1185: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1186: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1187: PetscReal fill=info->fill,levels=info->levels;
1188: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1189: FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1190: PetscBT lnkbt;
1191:
1193: ISIdentity(perm,&perm_identity);
1194: ISGetIndices(perm,&rip);
1196: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1197: ui[0] = 0;
1199: /* special case that simply copies fill pattern */
1200: if (!levels && perm_identity) {
1201: MatMarkDiagonal_SeqAIJ(A);
1202: for (i=0; i<am; i++) {
1203: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1204: }
1205: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1206: cols = uj;
1207: for (i=0; i<am; i++) {
1208: aj = a->j + a->diag[i];
1209: ncols = ui[i+1] - ui[i];
1210: for (j=0; j<ncols; j++) *cols++ = *aj++;
1211: }
1212: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1213: /* initialization */
1214: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
1216: /* jl: linked list for storing indices of the pivot rows
1217: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1218: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1219: il = jl + am;
1220: uj_ptr = (PetscInt**)(il + am);
1221: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1222: for (i=0; i<am; i++){
1223: jl[i] = am; il[i] = 0;
1224: }
1226: /* create and initialize a linked list for storing column indices of the active row k */
1227: nlnk = am + 1;
1228: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1230: /* initial FreeSpace size is fill*(ai[am]+1) */
1231: GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);
1232: current_space = free_space;
1233: GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1234: current_space_lvl = free_space_lvl;
1236: for (k=0; k<am; k++){ /* for each active row k */
1237: /* initialize lnk by the column indices of row rip[k] of A */
1238: nzk = 0;
1239: ncols = ai[rip[k]+1] - ai[rip[k]];
1240: ncols_upper = 0;
1241: for (j=0; j<ncols; j++){
1242: i = *(aj + ai[rip[k]] + j);
1243: if (rip[i] >= k){ /* only take upper triangular entry */
1244: ajtmp[ncols_upper] = i;
1245: ncols_upper++;
1246: }
1247: }
1248: PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);
1249: nzk += nlnk;
1251: /* update lnk by computing fill-in for each pivot row to be merged in */
1252: prow = jl[k]; /* 1st pivot row */
1253:
1254: while (prow < k){
1255: nextprow = jl[prow];
1256:
1257: /* merge prow into k-th row */
1258: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1259: jmax = ui[prow+1];
1260: ncols = jmax-jmin;
1261: i = jmin - ui[prow];
1262: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1263: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
1264: j = *(uj - 1);
1265: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1266: nzk += nlnk;
1268: /* update il and jl for prow */
1269: if (jmin < jmax){
1270: il[prow] = jmin;
1271: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1272: }
1273: prow = nextprow;
1274: }
1276: /* if free space is not available, make more free space */
1277: if (current_space->local_remaining<nzk) {
1278: i = am - k + 1; /* num of unfactored rows */
1279: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1280: GetMoreSpace(i,¤t_space);
1281: GetMoreSpace(i,¤t_space_lvl);
1282: reallocs++;
1283: }
1285: /* copy data into free_space and free_space_lvl, then initialize lnk */
1286: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1288: /* add the k-th row into il and jl */
1289: if (nzk > 1){
1290: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1291: jl[k] = jl[i]; jl[i] = k;
1292: il[k] = ui[k] + 1;
1293: }
1294: uj_ptr[k] = current_space->array;
1295: uj_lvl_ptr[k] = current_space_lvl->array;
1297: current_space->array += nzk;
1298: current_space->local_used += nzk;
1299: current_space->local_remaining -= nzk;
1301: current_space_lvl->array += nzk;
1302: current_space_lvl->local_used += nzk;
1303: current_space_lvl->local_remaining -= nzk;
1305: ui[k+1] = ui[k] + nzk;
1306: }
1308: #if defined(PETSC_USE_DEBUG)
1309: if (ai[am] != 0) {
1310: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1311: PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));
1312: PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));
1313: PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));
1314: } else {
1315: PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));
1316: }
1317: #endif
1319: ISRestoreIndices(perm,&rip);
1320: PetscFree(jl);
1321: PetscFree(ajtmp);
1323: /* destroy list of free space and other temporary array(s) */
1324: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1325: MakeSpaceContiguous(&free_space,uj);
1326: PetscIncompleteLLDestroy(lnk,lnkbt);
1327: DestroySpace(free_space_lvl);
1329: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1331: /* put together the new matrix in MATSEQSBAIJ format */
1332: MatCreate(PETSC_COMM_SELF,fact);
1333: MatSetSizes(*fact,am,am,am,am);
1334: B = *fact;
1335: MatSetType(B,MATSEQSBAIJ);
1336: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1338: b = (Mat_SeqSBAIJ*)B->data;
1339: b->singlemalloc = PETSC_FALSE;
1340: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1341: b->j = uj;
1342: b->i = ui;
1343: b->diag = 0;
1344: b->ilen = 0;
1345: b->imax = 0;
1346: b->row = perm;
1347: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1348: PetscObjectReference((PetscObject)perm);
1349: b->icol = perm;
1350: PetscObjectReference((PetscObject)perm);
1351: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1352: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1353: b->maxnz = b->nz = ui[am];
1354:
1355: B->factor = FACTOR_CHOLESKY;
1356: B->info.factor_mallocs = reallocs;
1357: B->info.fill_ratio_given = fill;
1358: if (ai[am] != 0) {
1359: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1360: } else {
1361: B->info.fill_ratio_needed = 0.0;
1362: }
1363: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1364: if (perm_identity){
1365: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1366: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1367: }
1368: return(0);
1369: }
1373: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1374: {
1375: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1376: Mat_SeqSBAIJ *b;
1377: Mat B;
1379: PetscTruth perm_identity;
1380: PetscReal fill = info->fill;
1381: PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1382: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1383: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1384: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1385: PetscBT lnkbt;
1386: IS iperm;
1389: /* check whether perm is the identity mapping */
1390: ISIdentity(perm,&perm_identity);
1391: ISGetIndices(perm,&rip);
1393: if (!perm_identity){
1394: /* check if perm is symmetric! */
1395: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1396: ISGetIndices(iperm,&riip);
1397: for (i=0; i<am; i++) {
1398: if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1399: }
1400: ISRestoreIndices(iperm,&riip);
1401: ISDestroy(iperm);
1402: }
1404: /* initialization */
1405: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1406: ui[0] = 0;
1408: /* jl: linked list for storing indices of the pivot rows
1409: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1410: PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1411: il = jl + am;
1412: cols = il + am;
1413: ui_ptr = (PetscInt**)(cols + am);
1414: for (i=0; i<am; i++){
1415: jl[i] = am; il[i] = 0;
1416: }
1418: /* create and initialize a linked list for storing column indices of the active row k */
1419: nlnk = am + 1;
1420: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
1422: /* initial FreeSpace size is fill*(ai[am]+1) */
1423: GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);
1424: current_space = free_space;
1426: for (k=0; k<am; k++){ /* for each active row k */
1427: /* initialize lnk by the column indices of row rip[k] of A */
1428: nzk = 0;
1429: ncols = ai[rip[k]+1] - ai[rip[k]];
1430: ncols_upper = 0;
1431: for (j=0; j<ncols; j++){
1432: i = rip[*(aj + ai[rip[k]] + j)];
1433: if (i >= k){ /* only take upper triangular entry */
1434: cols[ncols_upper] = i;
1435: ncols_upper++;
1436: }
1437: }
1438: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1439: nzk += nlnk;
1441: /* update lnk by computing fill-in for each pivot row to be merged in */
1442: prow = jl[k]; /* 1st pivot row */
1443:
1444: while (prow < k){
1445: nextprow = jl[prow];
1446: /* merge prow into k-th row */
1447: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1448: jmax = ui[prow+1];
1449: ncols = jmax-jmin;
1450: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1451: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1452: nzk += nlnk;
1454: /* update il and jl for prow */
1455: if (jmin < jmax){
1456: il[prow] = jmin;
1457: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1458: }
1459: prow = nextprow;
1460: }
1462: /* if free space is not available, make more free space */
1463: if (current_space->local_remaining<nzk) {
1464: i = am - k + 1; /* num of unfactored rows */
1465: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1466: GetMoreSpace(i,¤t_space);
1467: reallocs++;
1468: }
1470: /* copy data into free space, then initialize lnk */
1471: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
1473: /* add the k-th row into il and jl */
1474: if (nzk-1 > 0){
1475: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1476: jl[k] = jl[i]; jl[i] = k;
1477: il[k] = ui[k] + 1;
1478: }
1479: ui_ptr[k] = current_space->array;
1480: current_space->array += nzk;
1481: current_space->local_used += nzk;
1482: current_space->local_remaining -= nzk;
1484: ui[k+1] = ui[k] + nzk;
1485: }
1487: #if defined(PETSC_USE_DEBUG)
1488: if (ai[am] != 0) {
1489: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1490: PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));
1491: PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));
1492: PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));
1493: } else {
1494: PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));
1495: }
1496: #endif
1498: ISRestoreIndices(perm,&rip);
1499: PetscFree(jl);
1501: /* destroy list of free space and other temporary array(s) */
1502: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1503: MakeSpaceContiguous(&free_space,uj);
1504: PetscLLDestroy(lnk,lnkbt);
1506: /* put together the new matrix in MATSEQSBAIJ format */
1507: MatCreate(PETSC_COMM_SELF,fact);
1508: MatSetSizes(*fact,am,am,am,am);
1509: B = *fact;
1510: MatSetType(B,MATSEQSBAIJ);
1511: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1513: b = (Mat_SeqSBAIJ*)B->data;
1514: b->singlemalloc = PETSC_FALSE;
1515: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1516: b->j = uj;
1517: b->i = ui;
1518: b->diag = 0;
1519: b->ilen = 0;
1520: b->imax = 0;
1521: b->row = perm;
1522: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1523: PetscObjectReference((PetscObject)perm);
1524: b->icol = perm;
1525: PetscObjectReference((PetscObject)perm);
1526: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1527: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1528: b->maxnz = b->nz = ui[am];
1529:
1530: B->factor = FACTOR_CHOLESKY;
1531: B->info.factor_mallocs = reallocs;
1532: B->info.fill_ratio_given = fill;
1533: if (ai[am] != 0) {
1534: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1535: } else {
1536: B->info.fill_ratio_needed = 0.0;
1537: }
1538: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1539: if (perm_identity){
1540: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1541: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1542: }
1543: return(0);
1544: }