Actual source code: aijfact.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/inline/dot.h
5: #include src/inline/spops.h
6: #include petscbt.h
7: #include src/mat/utils/freespace.h
11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12: {
15: SETERRQ(PETSC_ERR_SUP,"Code not written");
16: #if !defined(PETSC_USE_DEBUG)
17: return(0);
18: #endif
19: }
22: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26: #endif
30: /* ------------------------------------------------------------
32: This interface was contribed by Tony Caola
34: This routine is an interface to the pivoting drop-tolerance
35: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36: SPARSEKIT2.
38: The SPARSEKIT2 routines used here are covered by the GNU
39: copyright; see the file gnu in this directory.
41: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42: help in getting this routine ironed out.
44: The major drawback to this routine is that if info->fill is
45: not large enough it fails rather than allocating more space;
46: this can be fixed by hacking/improving the f2c version of
47: Yousef Saad's code.
49: ------------------------------------------------------------
50: */
51: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52: {
53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
55: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56: You can obtain the drop tolerance routines by installing PETSc from\n\
57: www.mcs.anl.gov/petsc\n");
58: #else
59: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
60: IS iscolf,isicol,isirow;
61: PetscTruth reorder;
62: PetscErrorCode ierr,sierr;
63: PetscInt *c,*r,*ic,i,n = A->rmap.n;
64: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65: PetscInt *ordcol,*iwk,*iperm,*jw;
66: PetscInt jmax,lfill,job,*o_i,*o_j;
67: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68: PetscReal af;
72: if (info->dt == PETSC_DEFAULT) info->dt = .005;
73: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
75: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
76: lfill = (PetscInt)(info->dtcount/2.0);
77: jmax = (PetscInt)(info->fill*a->nz);
80: /* ------------------------------------------------------------
81: If reorder=.TRUE., then the original matrix has to be
82: reordered to reflect the user selected ordering scheme, and
83: then de-reordered so it is in it's original format.
84: Because Saad's dperm() is NOT in place, we have to copy
85: the original matrix and allocate more storage. . .
86: ------------------------------------------------------------
87: */
89: /* set reorder to true if either isrow or iscol is not identity */
90: ISIdentity(isrow,&reorder);
91: if (reorder) {ISIdentity(iscol,&reorder);}
92: reorder = PetscNot(reorder);
94:
95: /* storage for ilu factor */
96: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
97: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
98: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
99: PetscMalloc(n*sizeof(PetscInt),&ordcol);
101: /* ------------------------------------------------------------
102: Make sure that everything is Fortran formatted (1-Based)
103: ------------------------------------------------------------
104: */
105: for (i=old_i[0];i<old_i[n];i++) {
106: old_j[i]++;
107: }
108: for(i=0;i<n+1;i++) {
109: old_i[i]++;
110: };
111:
113: if (reorder) {
114: ISGetIndices(iscol,&c);
115: ISGetIndices(isrow,&r);
116: for(i=0;i<n;i++) {
117: r[i] = r[i]+1;
118: c[i] = c[i]+1;
119: }
120: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
121: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
122: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
123: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124: for (i=0;i<n;i++) {
125: r[i] = r[i]-1;
126: c[i] = c[i]-1;
127: }
128: ISRestoreIndices(iscol,&c);
129: ISRestoreIndices(isrow,&r);
130: o_a = old_a2;
131: o_j = old_j2;
132: o_i = old_i2;
133: } else {
134: o_a = old_a;
135: o_j = old_j;
136: o_i = old_i;
137: }
139: /* ------------------------------------------------------------
140: Call Saad's ilutp() routine to generate the factorization
141: ------------------------------------------------------------
142: */
144: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
145: PetscMalloc(2*n*sizeof(PetscInt),&jw);
146: PetscMalloc(n*sizeof(PetscScalar),&w);
148: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149: if (sierr) {
150: switch (sierr) {
151: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157: }
158: }
160: PetscFree(w);
161: PetscFree(jw);
163: /* ------------------------------------------------------------
164: Saad's routine gives the result in Modified Sparse Row (msr)
165: Convert to Compressed Sparse Row format (csr)
166: ------------------------------------------------------------
167: */
169: PetscMalloc(n*sizeof(PetscScalar),&wk);
170: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
172: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
174: PetscFree(iwk);
175: PetscFree(wk);
177: if (reorder) {
178: PetscFree(old_a2);
179: PetscFree(old_j2);
180: PetscFree(old_i2);
181: } else {
182: /* fix permutation of old_j that the factorization introduced */
183: for (i=old_i[0]; i<old_i[n]; i++) {
184: old_j[i-1] = iperm[old_j[i-1]-1];
185: }
186: }
188: /* get rid of the shift to indices starting at 1 */
189: for (i=0; i<n+1; i++) {
190: old_i[i]--;
191: }
192: for (i=old_i[0];i<old_i[n];i++) {
193: old_j[i]--;
194: }
195:
196: /* Make the factored matrix 0-based */
197: for (i=0; i<n+1; i++) {
198: new_i[i]--;
199: }
200: for (i=new_i[0];i<new_i[n];i++) {
201: new_j[i]--;
202: }
204: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205: /*-- permute the right-hand-side and solution vectors --*/
206: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
208: ISGetIndices(isicol,&ic);
209: for(i=0; i<n; i++) {
210: ordcol[i] = ic[iperm[i]-1];
211: };
212: ISRestoreIndices(isicol,&ic);
213: ISDestroy(isicol);
215: PetscFree(iperm);
217: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
218: PetscFree(ordcol);
220: /*----- put together the new matrix -----*/
222: MatCreate(((PetscObject)A)->comm,fact);
223: MatSetSizes(*fact,n,n,n,n);
224: MatSetType(*fact,((PetscObject)A)->type_name);
225: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
226: (*fact)->factor = FACTOR_LU;
227: (*fact)->assembled = PETSC_TRUE;
229: b = (Mat_SeqAIJ*)(*fact)->data;
230: b->free_a = PETSC_TRUE;
231: b->free_ij = PETSC_TRUE;
232: b->singlemalloc = PETSC_FALSE;
233: b->a = new_a;
234: b->j = new_j;
235: b->i = new_i;
236: b->ilen = 0;
237: b->imax = 0;
238: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239: b->row = isirow;
240: b->col = iscolf;
241: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
242: b->maxnz = b->nz = new_i[n];
243: MatMarkDiagonal_SeqAIJ(*fact);
244: (*fact)->info.factor_mallocs = 0;
246: af = ((double)b->nz)/((double)a->nz) + .001;
247: PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);
248: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
249: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
250: PetscInfo(A,"for best performance.\n");
252: MatILUDTFactor_Inode(A,isrow,iscol,info,fact);
254: return(0);
255: #endif
256: }
260: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
261: {
262: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
263: IS isicol;
264: PetscErrorCode ierr;
265: PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
266: PetscInt *bi,*bj,*ajtmp;
267: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
268: PetscReal f;
269: PetscInt nlnk,*lnk,k,**bi_ptr;
270: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
271: PetscBT lnkbt;
274: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
275: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
276: ISGetIndices(isrow,&r);
277: ISGetIndices(isicol,&ic);
279: /* get new row pointers */
280: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
281: bi[0] = 0;
283: /* bdiag is location of diagonal in factor */
284: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
285: bdiag[0] = 0;
287: /* linked list for storing column indices of the active row */
288: nlnk = n + 1;
289: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
291: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
293: /* initial FreeSpace size is f*(ai[n]+1) */
294: f = info->fill;
295: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
296: current_space = free_space;
298: for (i=0; i<n; i++) {
299: /* copy previous fill into linked list */
300: nzi = 0;
301: nnz = ai[r[i]+1] - ai[r[i]];
302: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
303: ajtmp = aj + ai[r[i]];
304: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
305: nzi += nlnk;
307: /* add pivot rows into linked list */
308: row = lnk[n];
309: while (row < i) {
310: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
311: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
312: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
313: nzi += nlnk;
314: row = lnk[row];
315: }
316: bi[i+1] = bi[i] + nzi;
317: im[i] = nzi;
319: /* mark bdiag */
320: nzbd = 0;
321: nnz = nzi;
322: k = lnk[n];
323: while (nnz-- && k < i){
324: nzbd++;
325: k = lnk[k];
326: }
327: bdiag[i] = bi[i] + nzbd;
329: /* if free space is not available, make more free space */
330: if (current_space->local_remaining<nzi) {
331: nnz = (n - i)*nzi; /* estimated and max additional space needed */
332: PetscFreeSpaceGet(nnz,¤t_space);
333: reallocs++;
334: }
336: /* copy data into free space, then initialize lnk */
337: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
338: bi_ptr[i] = current_space->array;
339: current_space->array += nzi;
340: current_space->local_used += nzi;
341: current_space->local_remaining -= nzi;
342: }
343: #if defined(PETSC_USE_INFO)
344: if (ai[n] != 0) {
345: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
346: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
347: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
348: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
349: PetscInfo(A,"for best performance.\n");
350: } else {
351: PetscInfo(A,"Empty matrix\n");
352: }
353: #endif
355: ISRestoreIndices(isrow,&r);
356: ISRestoreIndices(isicol,&ic);
358: /* destroy list of free space and other temporary array(s) */
359: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
360: PetscFreeSpaceContiguous(&free_space,bj);
361: PetscLLDestroy(lnk,lnkbt);
362: PetscFree2(bi_ptr,im);
364: /* put together the new matrix */
365: MatCreate(((PetscObject)A)->comm,B);
366: MatSetSizes(*B,n,n,n,n);
367: MatSetType(*B,((PetscObject)A)->type_name);
368: MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
369: PetscLogObjectParent(*B,isicol);
370: b = (Mat_SeqAIJ*)(*B)->data;
371: b->free_a = PETSC_TRUE;
372: b->free_ij = PETSC_TRUE;
373: b->singlemalloc = PETSC_FALSE;
374: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
375: b->j = bj;
376: b->i = bi;
377: b->diag = bdiag;
378: b->ilen = 0;
379: b->imax = 0;
380: b->row = isrow;
381: b->col = iscol;
382: PetscObjectReference((PetscObject)isrow);
383: PetscObjectReference((PetscObject)iscol);
384: b->icol = isicol;
385: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
387: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
388: PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
389: b->maxnz = b->nz = bi[n] ;
391: (*B)->factor = FACTOR_LU;
392: (*B)->info.factor_mallocs = reallocs;
393: (*B)->info.fill_ratio_given = f;
395: if (ai[n] != 0) {
396: (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
397: } else {
398: (*B)->info.fill_ratio_needed = 0.0;
399: }
400: MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
401: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
402: return(0);
403: }
405: /*
406: Trouble in factorization, should we dump the original matrix?
407: */
410: PetscErrorCode MatFactorDumpMatrix(Mat A)
411: {
413: PetscTruth flg;
416: PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);
417: if (flg) {
418: PetscViewer viewer;
419: char filename[PETSC_MAX_PATH_LEN];
421: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
422: PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
423: MatView(A,viewer);
424: PetscViewerDestroy(viewer);
425: }
426: return(0);
427: }
429: /* ----------------------------------------------------------- */
432: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
433: {
434: Mat C=*B;
435: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
436: IS isrow = b->row,isicol = b->icol;
438: PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
439: PetscInt *ajtmp,*bjtmp,nz,row,*ics;
440: PetscInt *diag_offset = b->diag,diag,*pj;
441: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
442: PetscScalar d;
443: PetscReal rs;
444: LUShift_Ctx sctx;
445: PetscInt newshift,*ddiag;
448: ISGetIndices(isrow,&r);
449: ISGetIndices(isicol,&ic);
450: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
451: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
452: rtmps = rtmp; ics = ic;
454: sctx.shift_top = 0;
455: sctx.nshift_max = 0;
456: sctx.shift_lo = 0;
457: sctx.shift_hi = 0;
459: /* if both shift schemes are chosen by user, only use info->shiftpd */
460: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
461: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
462: PetscInt *aai = a->i;
463: ddiag = a->diag;
464: sctx.shift_top = 0;
465: for (i=0; i<n; i++) {
466: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
467: d = (a->a)[ddiag[i]];
468: rs = -PetscAbsScalar(d) - PetscRealPart(d);
469: v = a->a+aai[i];
470: nz = aai[i+1] - aai[i];
471: for (j=0; j<nz; j++)
472: rs += PetscAbsScalar(v[j]);
473: if (rs>sctx.shift_top) sctx.shift_top = rs;
474: }
475: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
476: sctx.shift_top *= 1.1;
477: sctx.nshift_max = 5;
478: sctx.shift_lo = 0.;
479: sctx.shift_hi = 1.;
480: }
482: sctx.shift_amount = 0;
483: sctx.nshift = 0;
484: do {
485: sctx.lushift = PETSC_FALSE;
486: for (i=0; i<n; i++){
487: nz = bi[i+1] - bi[i];
488: bjtmp = bj + bi[i];
489: for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
491: /* load in initial (unfactored row) */
492: nz = a->i[r[i]+1] - a->i[r[i]];
493: ajtmp = a->j + a->i[r[i]];
494: v = a->a + a->i[r[i]];
495: for (j=0; j<nz; j++) {
496: rtmp[ics[ajtmp[j]]] = v[j];
497: }
498: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
500: row = *bjtmp++;
501: while (row < i) {
502: pc = rtmp + row;
503: if (*pc != 0.0) {
504: pv = b->a + diag_offset[row];
505: pj = b->j + diag_offset[row] + 1;
506: multiplier = *pc / *pv++;
507: *pc = multiplier;
508: nz = bi[row+1] - diag_offset[row] - 1;
509: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
510: PetscLogFlops(2*nz);
511: }
512: row = *bjtmp++;
513: }
514: /* finished row so stick it into b->a */
515: pv = b->a + bi[i] ;
516: pj = b->j + bi[i] ;
517: nz = bi[i+1] - bi[i];
518: diag = diag_offset[i] - bi[i];
519: rs = 0.0;
520: for (j=0; j<nz; j++) {
521: pv[j] = rtmps[pj[j]];
522: if (j != diag) rs += PetscAbsScalar(pv[j]);
523: }
525: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
526: sctx.rs = rs;
527: sctx.pv = pv[diag];
528: MatLUCheckShift_inline(info,sctx,i,newshift);
529: if (newshift == 1) break;
530: }
532: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
533: /*
534: * if no shift in this attempt & shifting & started shifting & can refine,
535: * then try lower shift
536: */
537: sctx.shift_hi = info->shift_fraction;
538: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
539: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
540: sctx.lushift = PETSC_TRUE;
541: sctx.nshift++;
542: }
543: } while (sctx.lushift);
545: /* invert diagonal entries for simplier triangular solves */
546: for (i=0; i<n; i++) {
547: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
548: }
550: PetscFree(rtmp);
551: ISRestoreIndices(isicol,&ic);
552: ISRestoreIndices(isrow,&r);
553: C->factor = FACTOR_LU;
554: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
555: C->assembled = PETSC_TRUE;
556: PetscLogFlops(C->cmap.n);
557: if (sctx.nshift){
558: if (info->shiftnz) {
559: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
560: } else if (info->shiftpd) {
561: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
562: }
563: }
564: return(0);
565: }
567: /*
568: This routine implements inplace ILU(0) with row or/and column permutations.
569: Input:
570: A - original matrix
571: Output;
572: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
573: a->j (col index) is permuted by the inverse of colperm, then sorted
574: a->a reordered accordingly with a->j
575: a->diag (ptr to diagonal elements) is updated.
576: */
579: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
580: {
581: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
582: IS isrow = a->row,isicol = a->icol;
584: PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j;
585: PetscInt *ajtmp,nz,row,*ics;
586: PetscInt *diag = a->diag,nbdiag,*pj;
587: PetscScalar *rtmp,*v,*pc,multiplier,*pv,d;
588: PetscReal rs;
589: LUShift_Ctx sctx;
590: PetscInt newshift;
593: if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
594: ISGetIndices(isrow,&r);
595: ISGetIndices(isicol,&ic);
596: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
597: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
598: ics = ic;
600: sctx.shift_top = 0;
601: sctx.nshift_max = 0;
602: sctx.shift_lo = 0;
603: sctx.shift_hi = 0;
605: /* if both shift schemes are chosen by user, only use info->shiftpd */
606: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
607: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
608: sctx.shift_top = 0;
609: for (i=0; i<n; i++) {
610: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
611: d = (a->a)[diag[i]];
612: rs = -PetscAbsScalar(d) - PetscRealPart(d);
613: v = a->a+ai[i];
614: nz = ai[i+1] - ai[i];
615: for (j=0; j<nz; j++)
616: rs += PetscAbsScalar(v[j]);
617: if (rs>sctx.shift_top) sctx.shift_top = rs;
618: }
619: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
620: sctx.shift_top *= 1.1;
621: sctx.nshift_max = 5;
622: sctx.shift_lo = 0.;
623: sctx.shift_hi = 1.;
624: }
626: sctx.shift_amount = 0;
627: sctx.nshift = 0;
628: do {
629: sctx.lushift = PETSC_FALSE;
630: for (i=0; i<n; i++){
631: /* load in initial unfactored row */
632: nz = ai[r[i]+1] - ai[r[i]];
633: ajtmp = aj + ai[r[i]];
634: v = a->a + ai[r[i]];
635: /* sort permuted ajtmp and values v accordingly */
636: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
637: PetscSortIntWithScalarArray(nz,ajtmp,v);
639: diag[r[i]] = ai[r[i]];
640: for (j=0; j<nz; j++) {
641: rtmp[ajtmp[j]] = v[j];
642: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
643: }
644: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
646: row = *ajtmp++;
647: while (row < i) {
648: pc = rtmp + row;
649: if (*pc != 0.0) {
650: pv = a->a + diag[r[row]];
651: pj = aj + diag[r[row]] + 1;
653: multiplier = *pc / *pv++;
654: *pc = multiplier;
655: nz = ai[r[row]+1] - diag[r[row]] - 1;
656: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
657: PetscLogFlops(2*nz);
658: }
659: row = *ajtmp++;
660: }
661: /* finished row so overwrite it onto a->a */
662: pv = a->a + ai[r[i]] ;
663: pj = aj + ai[r[i]] ;
664: nz = ai[r[i]+1] - ai[r[i]];
665: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
666:
667: rs = 0.0;
668: for (j=0; j<nz; j++) {
669: pv[j] = rtmp[pj[j]];
670: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
671: }
673: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
674: sctx.rs = rs;
675: sctx.pv = pv[nbdiag];
676: MatLUCheckShift_inline(info,sctx,i,newshift);
677: if (newshift == 1) break;
678: }
680: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
681: /*
682: * if no shift in this attempt & shifting & started shifting & can refine,
683: * then try lower shift
684: */
685: sctx.shift_hi = info->shift_fraction;
686: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
687: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
688: sctx.lushift = PETSC_TRUE;
689: sctx.nshift++;
690: }
691: } while (sctx.lushift);
693: /* invert diagonal entries for simplier triangular solves */
694: for (i=0; i<n; i++) {
695: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
696: }
698: PetscFree(rtmp);
699: ISRestoreIndices(isicol,&ic);
700: ISRestoreIndices(isrow,&r);
701: A->factor = FACTOR_LU;
702: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
703: A->assembled = PETSC_TRUE;
704: PetscLogFlops(A->cmap.n);
705: if (sctx.nshift){
706: if (info->shiftnz) {
707: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
708: } else if (info->shiftpd) {
709: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
710: }
711: }
712: return(0);
713: }
717: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
718: {
720: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
721: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
722: return(0);
723: }
726: /* ----------------------------------------------------------- */
729: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
730: {
732: Mat C;
735: MatLUFactorSymbolic(A,row,col,info,&C);
736: MatLUFactorNumeric(A,info,&C);
737: MatHeaderCopy(A,C);
738: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
739: return(0);
740: }
741: /* ----------------------------------------------------------- */
744: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
745: {
746: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
747: IS iscol = a->col,isrow = a->row;
749: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
750: PetscInt nz,*rout,*cout;
751: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
754: if (!n) return(0);
756: VecGetArray(bb,&b);
757: VecGetArray(xx,&x);
758: tmp = a->solve_work;
760: ISGetIndices(isrow,&rout); r = rout;
761: ISGetIndices(iscol,&cout); c = cout + (n-1);
763: /* forward solve the lower triangular */
764: tmp[0] = b[*r++];
765: tmps = tmp;
766: for (i=1; i<n; i++) {
767: v = aa + ai[i] ;
768: vi = aj + ai[i] ;
769: nz = a->diag[i] - ai[i];
770: sum = b[*r++];
771: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
772: tmp[i] = sum;
773: }
775: /* backward solve the upper triangular */
776: for (i=n-1; i>=0; i--){
777: v = aa + a->diag[i] + 1;
778: vi = aj + a->diag[i] + 1;
779: nz = ai[i+1] - a->diag[i] - 1;
780: sum = tmp[i];
781: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
782: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
783: }
785: ISRestoreIndices(isrow,&rout);
786: ISRestoreIndices(iscol,&cout);
787: VecRestoreArray(bb,&b);
788: VecRestoreArray(xx,&x);
789: PetscLogFlops(2*a->nz - A->cmap.n);
790: return(0);
791: }
795: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
796: {
797: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
798: IS iscol = a->col,isrow = a->row;
800: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
801: PetscInt nz,*rout,*cout,neq;
802: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
805: if (!n) return(0);
807: MatGetArray(B,&b);
808: MatGetArray(X,&x);
809:
810: tmp = a->solve_work;
811: ISGetIndices(isrow,&rout); r = rout;
812: ISGetIndices(iscol,&cout); c = cout;
814: for (neq=0; neq<n; neq++){
815: /* forward solve the lower triangular */
816: tmp[0] = b[r[0]];
817: tmps = tmp;
818: for (i=1; i<n; i++) {
819: v = aa + ai[i] ;
820: vi = aj + ai[i] ;
821: nz = a->diag[i] - ai[i];
822: sum = b[r[i]];
823: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
824: tmp[i] = sum;
825: }
826: /* backward solve the upper triangular */
827: for (i=n-1; i>=0; i--){
828: v = aa + a->diag[i] + 1;
829: vi = aj + a->diag[i] + 1;
830: nz = ai[i+1] - a->diag[i] - 1;
831: sum = tmp[i];
832: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
833: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
834: }
836: b += n;
837: x += n;
838: }
839: ISRestoreIndices(isrow,&rout);
840: ISRestoreIndices(iscol,&cout);
841: MatRestoreArray(B,&b);
842: MatRestoreArray(X,&x);
843: PetscLogFlops(n*(2*a->nz - n));
844: return(0);
845: }
849: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
850: {
851: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
852: IS iscol = a->col,isrow = a->row;
854: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
855: PetscInt nz,*rout,*cout,row;
856: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
859: if (!n) return(0);
861: VecGetArray(bb,&b);
862: VecGetArray(xx,&x);
863: tmp = a->solve_work;
865: ISGetIndices(isrow,&rout); r = rout;
866: ISGetIndices(iscol,&cout); c = cout + (n-1);
868: /* forward solve the lower triangular */
869: tmp[0] = b[*r++];
870: tmps = tmp;
871: for (row=1; row<n; row++) {
872: i = rout[row]; /* permuted row */
873: v = aa + ai[i] ;
874: vi = aj + ai[i] ;
875: nz = a->diag[i] - ai[i];
876: sum = b[*r++];
877: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
878: tmp[row] = sum;
879: }
881: /* backward solve the upper triangular */
882: for (row=n-1; row>=0; row--){
883: i = rout[row]; /* permuted row */
884: v = aa + a->diag[i] + 1;
885: vi = aj + a->diag[i] + 1;
886: nz = ai[i+1] - a->diag[i] - 1;
887: sum = tmp[row];
888: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
889: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
890: }
892: ISRestoreIndices(isrow,&rout);
893: ISRestoreIndices(iscol,&cout);
894: VecRestoreArray(bb,&b);
895: VecRestoreArray(xx,&x);
896: PetscLogFlops(2*a->nz - A->cmap.n);
897: return(0);
898: }
900: /* ----------------------------------------------------------- */
903: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
904: {
905: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
907: PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
908: PetscScalar *x,*b,*aa = a->a;
909: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
910: PetscInt adiag_i,i,*vi,nz,ai_i;
911: PetscScalar *v,sum;
912: #endif
915: if (!n) return(0);
917: VecGetArray(bb,&b);
918: VecGetArray(xx,&x);
920: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
921: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
922: #else
923: /* forward solve the lower triangular */
924: x[0] = b[0];
925: for (i=1; i<n; i++) {
926: ai_i = ai[i];
927: v = aa + ai_i;
928: vi = aj + ai_i;
929: nz = adiag[i] - ai_i;
930: sum = b[i];
931: while (nz--) sum -= *v++ * x[*vi++];
932: x[i] = sum;
933: }
935: /* backward solve the upper triangular */
936: for (i=n-1; i>=0; i--){
937: adiag_i = adiag[i];
938: v = aa + adiag_i + 1;
939: vi = aj + adiag_i + 1;
940: nz = ai[i+1] - adiag_i - 1;
941: sum = x[i];
942: while (nz--) sum -= *v++ * x[*vi++];
943: x[i] = sum*aa[adiag_i];
944: }
945: #endif
946: PetscLogFlops(2*a->nz - A->cmap.n);
947: VecRestoreArray(bb,&b);
948: VecRestoreArray(xx,&x);
949: return(0);
950: }
954: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
955: {
956: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
957: IS iscol = a->col,isrow = a->row;
959: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
960: PetscInt nz,*rout,*cout;
961: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
964: if (yy != xx) {VecCopy(yy,xx);}
966: VecGetArray(bb,&b);
967: VecGetArray(xx,&x);
968: tmp = a->solve_work;
970: ISGetIndices(isrow,&rout); r = rout;
971: ISGetIndices(iscol,&cout); c = cout + (n-1);
973: /* forward solve the lower triangular */
974: tmp[0] = b[*r++];
975: for (i=1; i<n; i++) {
976: v = aa + ai[i] ;
977: vi = aj + ai[i] ;
978: nz = a->diag[i] - ai[i];
979: sum = b[*r++];
980: while (nz--) sum -= *v++ * tmp[*vi++ ];
981: tmp[i] = sum;
982: }
984: /* backward solve the upper triangular */
985: for (i=n-1; i>=0; i--){
986: v = aa + a->diag[i] + 1;
987: vi = aj + a->diag[i] + 1;
988: nz = ai[i+1] - a->diag[i] - 1;
989: sum = tmp[i];
990: while (nz--) sum -= *v++ * tmp[*vi++ ];
991: tmp[i] = sum*aa[a->diag[i]];
992: x[*c--] += tmp[i];
993: }
995: ISRestoreIndices(isrow,&rout);
996: ISRestoreIndices(iscol,&cout);
997: VecRestoreArray(bb,&b);
998: VecRestoreArray(xx,&x);
999: PetscLogFlops(2*a->nz);
1001: return(0);
1002: }
1003: /* -------------------------------------------------------------------*/
1006: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1007: {
1008: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1009: IS iscol = a->col,isrow = a->row;
1011: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1012: PetscInt nz,*rout,*cout,*diag = a->diag;
1013: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
1016: VecGetArray(bb,&b);
1017: VecGetArray(xx,&x);
1018: tmp = a->solve_work;
1020: ISGetIndices(isrow,&rout); r = rout;
1021: ISGetIndices(iscol,&cout); c = cout;
1023: /* copy the b into temp work space according to permutation */
1024: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1026: /* forward solve the U^T */
1027: for (i=0; i<n; i++) {
1028: v = aa + diag[i] ;
1029: vi = aj + diag[i] + 1;
1030: nz = ai[i+1] - diag[i] - 1;
1031: s1 = tmp[i];
1032: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1033: while (nz--) {
1034: tmp[*vi++ ] -= (*v++)*s1;
1035: }
1036: tmp[i] = s1;
1037: }
1039: /* backward solve the L^T */
1040: for (i=n-1; i>=0; i--){
1041: v = aa + diag[i] - 1 ;
1042: vi = aj + diag[i] - 1 ;
1043: nz = diag[i] - ai[i];
1044: s1 = tmp[i];
1045: while (nz--) {
1046: tmp[*vi-- ] -= (*v--)*s1;
1047: }
1048: }
1050: /* copy tmp into x according to permutation */
1051: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1053: ISRestoreIndices(isrow,&rout);
1054: ISRestoreIndices(iscol,&cout);
1055: VecRestoreArray(bb,&b);
1056: VecRestoreArray(xx,&x);
1058: PetscLogFlops(2*a->nz-A->cmap.n);
1059: return(0);
1060: }
1064: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1065: {
1066: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1067: IS iscol = a->col,isrow = a->row;
1069: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1070: PetscInt nz,*rout,*cout,*diag = a->diag;
1071: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
1074: if (zz != xx) {VecCopy(zz,xx);}
1076: VecGetArray(bb,&b);
1077: VecGetArray(xx,&x);
1078: tmp = a->solve_work;
1080: ISGetIndices(isrow,&rout); r = rout;
1081: ISGetIndices(iscol,&cout); c = cout;
1083: /* copy the b into temp work space according to permutation */
1084: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1086: /* forward solve the U^T */
1087: for (i=0; i<n; i++) {
1088: v = aa + diag[i] ;
1089: vi = aj + diag[i] + 1;
1090: nz = ai[i+1] - diag[i] - 1;
1091: tmp[i] *= *v++;
1092: while (nz--) {
1093: tmp[*vi++ ] -= (*v++)*tmp[i];
1094: }
1095: }
1097: /* backward solve the L^T */
1098: for (i=n-1; i>=0; i--){
1099: v = aa + diag[i] - 1 ;
1100: vi = aj + diag[i] - 1 ;
1101: nz = diag[i] - ai[i];
1102: while (nz--) {
1103: tmp[*vi-- ] -= (*v--)*tmp[i];
1104: }
1105: }
1107: /* copy tmp into x according to permutation */
1108: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1110: ISRestoreIndices(isrow,&rout);
1111: ISRestoreIndices(iscol,&cout);
1112: VecRestoreArray(bb,&b);
1113: VecRestoreArray(xx,&x);
1115: PetscLogFlops(2*a->nz);
1116: return(0);
1117: }
1118: /* ----------------------------------------------------------------*/
1122: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1123: {
1124: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1125: IS isicol;
1126: PetscErrorCode ierr;
1127: PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
1128: PetscInt *bi,*cols,nnz,*cols_lvl;
1129: PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1130: PetscInt i,levels,diagonal_fill;
1131: PetscTruth col_identity,row_identity;
1132: PetscReal f;
1133: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1134: PetscBT lnkbt;
1135: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1136: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1137: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1138: PetscTruth missing;
1141: f = info->fill;
1142: levels = (PetscInt)info->levels;
1143: diagonal_fill = (PetscInt)info->diagonal_fill;
1144: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1146: /* special case that simply copies fill pattern */
1147: ISIdentity(isrow,&row_identity);
1148: ISIdentity(iscol,&col_identity);
1149: if (!levels && row_identity && col_identity) {
1150: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
1151: (*fact)->factor = FACTOR_LU;
1152: (*fact)->info.factor_mallocs = 0;
1153: (*fact)->info.fill_ratio_given = info->fill;
1154: (*fact)->info.fill_ratio_needed = 1.0;
1155: b = (Mat_SeqAIJ*)(*fact)->data;
1156: MatMissingDiagonal(A,&missing,&d);
1157: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1158: b->row = isrow;
1159: b->col = iscol;
1160: b->icol = isicol;
1161: PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);
1162: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1163: PetscObjectReference((PetscObject)isrow);
1164: PetscObjectReference((PetscObject)iscol);
1165: return(0);
1166: }
1168: ISGetIndices(isrow,&r);
1169: ISGetIndices(isicol,&ic);
1171: /* get new row pointers */
1172: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1173: bi[0] = 0;
1174: /* bdiag is location of diagonal in factor */
1175: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1176: bdiag[0] = 0;
1178: PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
1179: bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1181: /* create a linked list for storing column indices of the active row */
1182: nlnk = n + 1;
1183: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1185: /* initial FreeSpace size is f*(ai[n]+1) */
1186: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1187: current_space = free_space;
1188: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1189: current_space_lvl = free_space_lvl;
1190:
1191: for (i=0; i<n; i++) {
1192: nzi = 0;
1193: /* copy current row into linked list */
1194: nnz = ai[r[i]+1] - ai[r[i]];
1195: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1196: cols = aj + ai[r[i]];
1197: lnk[i] = -1; /* marker to indicate if diagonal exists */
1198: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1199: nzi += nlnk;
1201: /* make sure diagonal entry is included */
1202: if (diagonal_fill && lnk[i] == -1) {
1203: fm = n;
1204: while (lnk[fm] < i) fm = lnk[fm];
1205: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1206: lnk[fm] = i;
1207: lnk_lvl[i] = 0;
1208: nzi++; dcount++;
1209: }
1211: /* add pivot rows into the active row */
1212: nzbd = 0;
1213: prow = lnk[n];
1214: while (prow < i) {
1215: nnz = bdiag[prow];
1216: cols = bj_ptr[prow] + nnz + 1;
1217: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1218: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1219: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1220: nzi += nlnk;
1221: prow = lnk[prow];
1222: nzbd++;
1223: }
1224: bdiag[i] = nzbd;
1225: bi[i+1] = bi[i] + nzi;
1227: /* if free space is not available, make more free space */
1228: if (current_space->local_remaining<nzi) {
1229: nnz = nzi*(n - i); /* estimated and max additional space needed */
1230: PetscFreeSpaceGet(nnz,¤t_space);
1231: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1232: reallocs++;
1233: }
1235: /* copy data into free_space and free_space_lvl, then initialize lnk */
1236: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1237: bj_ptr[i] = current_space->array;
1238: bjlvl_ptr[i] = current_space_lvl->array;
1240: /* make sure the active row i has diagonal entry */
1241: if (*(bj_ptr[i]+bdiag[i]) != i) {
1242: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1243: try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1244: }
1246: current_space->array += nzi;
1247: current_space->local_used += nzi;
1248: current_space->local_remaining -= nzi;
1249: current_space_lvl->array += nzi;
1250: current_space_lvl->local_used += nzi;
1251: current_space_lvl->local_remaining -= nzi;
1252: }
1254: ISRestoreIndices(isrow,&r);
1255: ISRestoreIndices(isicol,&ic);
1257: /* destroy list of free space and other temporary arrays */
1258: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1259: PetscFreeSpaceContiguous(&free_space,bj);
1260: PetscIncompleteLLDestroy(lnk,lnkbt);
1261: PetscFreeSpaceDestroy(free_space_lvl);
1262: PetscFree(bj_ptr);
1264: #if defined(PETSC_USE_INFO)
1265: {
1266: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1267: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1268: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1269: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1270: PetscInfo(A,"for best performance.\n");
1271: if (diagonal_fill) {
1272: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1273: }
1274: }
1275: #endif
1277: /* put together the new matrix */
1278: MatCreate(((PetscObject)A)->comm,fact);
1279: MatSetSizes(*fact,n,n,n,n);
1280: MatSetType(*fact,((PetscObject)A)->type_name);
1281: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1282: PetscLogObjectParent(*fact,isicol);
1283: b = (Mat_SeqAIJ*)(*fact)->data;
1284: b->free_a = PETSC_TRUE;
1285: b->free_ij = PETSC_TRUE;
1286: b->singlemalloc = PETSC_FALSE;
1287: len = (bi[n] )*sizeof(PetscScalar);
1288: PetscMalloc(len+1,&b->a);
1289: b->j = bj;
1290: b->i = bi;
1291: for (i=0; i<n; i++) bdiag[i] += bi[i];
1292: b->diag = bdiag;
1293: b->ilen = 0;
1294: b->imax = 0;
1295: b->row = isrow;
1296: b->col = iscol;
1297: PetscObjectReference((PetscObject)isrow);
1298: PetscObjectReference((PetscObject)iscol);
1299: b->icol = isicol;
1300: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1301: /* In b structure: Free imax, ilen, old a, old j.
1302: Allocate bdiag, solve_work, new a, new j */
1303: PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1304: b->maxnz = b->nz = bi[n] ;
1305: (*fact)->factor = FACTOR_LU;
1306: (*fact)->info.factor_mallocs = reallocs;
1307: (*fact)->info.fill_ratio_given = f;
1308: (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1310: MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1311: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1313: return(0);
1314: }
1316: #include src/mat/impls/sbaij/seq/sbaij.h
1319: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1320: {
1321: Mat C = *B;
1322: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1323: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
1324: IS ip=b->row,iip = b->icol;
1326: PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1327: PetscInt *ai=a->i,*aj=a->j;
1328: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1329: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1330: PetscReal zeropivot,rs,shiftnz;
1331: PetscReal shiftpd;
1332: ChShift_Ctx sctx;
1333: PetscInt newshift;
1337: shiftnz = info->shiftnz;
1338: shiftpd = info->shiftpd;
1339: zeropivot = info->zeropivot;
1341: ISGetIndices(ip,&rip);
1342: ISGetIndices(iip,&riip);
1343:
1344: /* initialization */
1345: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1346: PetscMalloc(nz,&il);
1347: jl = il + mbs;
1348: rtmp = (MatScalar*)(jl + mbs);
1350: sctx.shift_amount = 0;
1351: sctx.nshift = 0;
1352: do {
1353: sctx.chshift = PETSC_FALSE;
1354: for (i=0; i<mbs; i++) {
1355: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1356: }
1357:
1358: for (k = 0; k<mbs; k++){
1359: bval = ba + bi[k];
1360: /* initialize k-th row by the perm[k]-th row of A */
1361: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1362: for (j = jmin; j < jmax; j++){
1363: col = riip[aj[j]];
1364: if (col >= k){ /* only take upper triangular entry */
1365: rtmp[col] = aa[j];
1366: *bval++ = 0.0; /* for in-place factorization */
1367: }
1368: }
1369: /* shift the diagonal of the matrix */
1370: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1372: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1373: dk = rtmp[k];
1374: i = jl[k]; /* first row to be added to k_th row */
1376: while (i < k){
1377: nexti = jl[i]; /* next row to be added to k_th row */
1379: /* compute multiplier, update diag(k) and U(i,k) */
1380: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1381: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1382: dk += uikdi*ba[ili];
1383: ba[ili] = uikdi; /* -U(i,k) */
1385: /* add multiple of row i to k-th row */
1386: jmin = ili + 1; jmax = bi[i+1];
1387: if (jmin < jmax){
1388: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1389: /* update il and jl for row i */
1390: il[i] = jmin;
1391: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1392: }
1393: i = nexti;
1394: }
1396: /* shift the diagonals when zero pivot is detected */
1397: /* compute rs=sum of abs(off-diagonal) */
1398: rs = 0.0;
1399: jmin = bi[k]+1;
1400: nz = bi[k+1] - jmin;
1401: bcol = bj + jmin;
1402: while (nz--){
1403: rs += PetscAbsScalar(rtmp[*bcol]);
1404: bcol++;
1405: }
1407: sctx.rs = rs;
1408: sctx.pv = dk;
1409: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1411: if (newshift == 1) {
1412: if (!sctx.shift_amount) {
1413: sctx.shift_amount = 1e-5;
1414: }
1415: break;
1416: }
1417:
1418: /* copy data into U(k,:) */
1419: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1420: jmin = bi[k]+1; jmax = bi[k+1];
1421: if (jmin < jmax) {
1422: for (j=jmin; j<jmax; j++){
1423: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1424: }
1425: /* add the k-th row into il and jl */
1426: il[k] = jmin;
1427: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1428: }
1429: }
1430: } while (sctx.chshift);
1431: PetscFree(il);
1433: ISRestoreIndices(ip,&rip);
1434: ISRestoreIndices(iip,&riip);
1435: C->factor = FACTOR_CHOLESKY;
1436: C->assembled = PETSC_TRUE;
1437: C->preallocated = PETSC_TRUE;
1438: PetscLogFlops(C->rmap.n);
1439: if (sctx.nshift){
1440: if (shiftnz) {
1441: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1442: } else if (shiftpd) {
1443: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1444: }
1445: }
1446: return(0);
1447: }
1451: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1452: {
1453: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1454: Mat_SeqSBAIJ *b;
1455: Mat B;
1456: PetscErrorCode ierr;
1457: PetscTruth perm_identity,missing;
1458: PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1459: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1460: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1461: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1462: PetscReal fill=info->fill,levels=info->levels;
1463: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1464: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1465: PetscBT lnkbt;
1466: IS iperm;
1467:
1469: MatMissingDiagonal(A,&missing,&d);
1470: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1471: ISIdentity(perm,&perm_identity);
1472: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1474: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1475: ui[0] = 0;
1477: /* ICC(0) without matrix ordering: simply copies fill pattern */
1478: if (!levels && perm_identity) {
1480: for (i=0; i<am; i++) {
1481: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1482: }
1483: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1484: cols = uj;
1485: for (i=0; i<am; i++) {
1486: aj = a->j + a->diag[i];
1487: ncols = ui[i+1] - ui[i];
1488: for (j=0; j<ncols; j++) *cols++ = *aj++;
1489: }
1490: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1491: ISGetIndices(iperm,&riip);
1492: ISGetIndices(perm,&rip);
1494: /* initialization */
1495: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
1497: /* jl: linked list for storing indices of the pivot rows
1498: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1499: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1500: il = jl + am;
1501: uj_ptr = (PetscInt**)(il + am);
1502: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1503: for (i=0; i<am; i++){
1504: jl[i] = am; il[i] = 0;
1505: }
1507: /* create and initialize a linked list for storing column indices of the active row k */
1508: nlnk = am + 1;
1509: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1511: /* initial FreeSpace size is fill*(ai[am]+1) */
1512: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1513: current_space = free_space;
1514: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1515: current_space_lvl = free_space_lvl;
1517: for (k=0; k<am; k++){ /* for each active row k */
1518: /* initialize lnk by the column indices of row rip[k] of A */
1519: nzk = 0;
1520: ncols = ai[rip[k]+1] - ai[rip[k]];
1521: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1522: ncols_upper = 0;
1523: for (j=0; j<ncols; j++){
1524: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1525: if (riip[i] >= k){ /* only take upper triangular entry */
1526: ajtmp[ncols_upper] = i;
1527: ncols_upper++;
1528: }
1529: }
1530: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
1531: nzk += nlnk;
1533: /* update lnk by computing fill-in for each pivot row to be merged in */
1534: prow = jl[k]; /* 1st pivot row */
1535:
1536: while (prow < k){
1537: nextprow = jl[prow];
1538:
1539: /* merge prow into k-th row */
1540: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1541: jmax = ui[prow+1];
1542: ncols = jmax-jmin;
1543: i = jmin - ui[prow];
1544: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1545: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
1546: j = *(uj - 1);
1547: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1548: nzk += nlnk;
1550: /* update il and jl for prow */
1551: if (jmin < jmax){
1552: il[prow] = jmin;
1553: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1554: }
1555: prow = nextprow;
1556: }
1558: /* if free space is not available, make more free space */
1559: if (current_space->local_remaining<nzk) {
1560: i = am - k + 1; /* num of unfactored rows */
1561: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1562: PetscFreeSpaceGet(i,¤t_space);
1563: PetscFreeSpaceGet(i,¤t_space_lvl);
1564: reallocs++;
1565: }
1567: /* copy data into free_space and free_space_lvl, then initialize lnk */
1568: if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1569: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1571: /* add the k-th row into il and jl */
1572: if (nzk > 1){
1573: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1574: jl[k] = jl[i]; jl[i] = k;
1575: il[k] = ui[k] + 1;
1576: }
1577: uj_ptr[k] = current_space->array;
1578: uj_lvl_ptr[k] = current_space_lvl->array;
1580: current_space->array += nzk;
1581: current_space->local_used += nzk;
1582: current_space->local_remaining -= nzk;
1584: current_space_lvl->array += nzk;
1585: current_space_lvl->local_used += nzk;
1586: current_space_lvl->local_remaining -= nzk;
1588: ui[k+1] = ui[k] + nzk;
1589: }
1591: #if defined(PETSC_USE_INFO)
1592: if (ai[am] != 0) {
1593: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1594: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1595: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1596: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1597: } else {
1598: PetscInfo(A,"Empty matrix.\n");
1599: }
1600: #endif
1602: ISRestoreIndices(perm,&rip);
1603: ISRestoreIndices(iperm,&riip);
1604: PetscFree(jl);
1605: PetscFree(ajtmp);
1607: /* destroy list of free space and other temporary array(s) */
1608: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1609: PetscFreeSpaceContiguous(&free_space,uj);
1610: PetscIncompleteLLDestroy(lnk,lnkbt);
1611: PetscFreeSpaceDestroy(free_space_lvl);
1613: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1615: /* put together the new matrix in MATSEQSBAIJ format */
1616: MatCreate(PETSC_COMM_SELF,fact);
1617: MatSetSizes(*fact,am,am,am,am);
1618: B = *fact;
1619: MatSetType(B,MATSEQSBAIJ);
1620: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1622: b = (Mat_SeqSBAIJ*)B->data;
1623: b->singlemalloc = PETSC_FALSE;
1624: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1625: b->j = uj;
1626: b->i = ui;
1627: b->diag = 0;
1628: b->ilen = 0;
1629: b->imax = 0;
1630: b->row = perm;
1631: b->col = perm;
1632: PetscObjectReference((PetscObject)perm);
1633: PetscObjectReference((PetscObject)perm);
1634: b->icol = iperm;
1635: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1636: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1637: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1638: b->maxnz = b->nz = ui[am];
1639: b->free_a = PETSC_TRUE;
1640: b->free_ij = PETSC_TRUE;
1641:
1642: B->factor = FACTOR_CHOLESKY;
1643: B->info.factor_mallocs = reallocs;
1644: B->info.fill_ratio_given = fill;
1645: if (ai[am] != 0) {
1646: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1647: } else {
1648: B->info.fill_ratio_needed = 0.0;
1649: }
1650: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1651: if (perm_identity){
1652: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1653: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1654: }
1655: return(0);
1656: }
1660: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1661: {
1662: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1663: Mat_SeqSBAIJ *b;
1664: Mat B;
1665: PetscErrorCode ierr;
1666: PetscTruth perm_identity;
1667: PetscReal fill = info->fill;
1668: PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1669: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1670: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1671: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1672: PetscBT lnkbt;
1673: IS iperm;
1676: /* check whether perm is the identity mapping */
1677: ISIdentity(perm,&perm_identity);
1678: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1679: ISGetIndices(iperm,&riip);
1680: ISGetIndices(perm,&rip);
1682: /* initialization */
1683: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1684: ui[0] = 0;
1686: /* jl: linked list for storing indices of the pivot rows
1687: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1688: PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1689: il = jl + am;
1690: cols = il + am;
1691: ui_ptr = (PetscInt**)(cols + am);
1692: for (i=0; i<am; i++){
1693: jl[i] = am; il[i] = 0;
1694: }
1696: /* create and initialize a linked list for storing column indices of the active row k */
1697: nlnk = am + 1;
1698: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
1700: /* initial FreeSpace size is fill*(ai[am]+1) */
1701: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1702: current_space = free_space;
1704: for (k=0; k<am; k++){ /* for each active row k */
1705: /* initialize lnk by the column indices of row rip[k] of A */
1706: nzk = 0;
1707: ncols = ai[rip[k]+1] - ai[rip[k]];
1708: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1709: ncols_upper = 0;
1710: for (j=0; j<ncols; j++){
1711: i = riip[*(aj + ai[rip[k]] + j)];
1712: if (i >= k){ /* only take upper triangular entry */
1713: cols[ncols_upper] = i;
1714: ncols_upper++;
1715: }
1716: }
1717: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1718: nzk += nlnk;
1720: /* update lnk by computing fill-in for each pivot row to be merged in */
1721: prow = jl[k]; /* 1st pivot row */
1722:
1723: while (prow < k){
1724: nextprow = jl[prow];
1725: /* merge prow into k-th row */
1726: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1727: jmax = ui[prow+1];
1728: ncols = jmax-jmin;
1729: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1730: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1731: nzk += nlnk;
1733: /* update il and jl for prow */
1734: if (jmin < jmax){
1735: il[prow] = jmin;
1736: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1737: }
1738: prow = nextprow;
1739: }
1741: /* if free space is not available, make more free space */
1742: if (current_space->local_remaining<nzk) {
1743: i = am - k + 1; /* num of unfactored rows */
1744: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1745: PetscFreeSpaceGet(i,¤t_space);
1746: reallocs++;
1747: }
1749: /* copy data into free space, then initialize lnk */
1750: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
1752: /* add the k-th row into il and jl */
1753: if (nzk-1 > 0){
1754: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1755: jl[k] = jl[i]; jl[i] = k;
1756: il[k] = ui[k] + 1;
1757: }
1758: ui_ptr[k] = current_space->array;
1759: current_space->array += nzk;
1760: current_space->local_used += nzk;
1761: current_space->local_remaining -= nzk;
1763: ui[k+1] = ui[k] + nzk;
1764: }
1766: #if defined(PETSC_USE_INFO)
1767: if (ai[am] != 0) {
1768: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1769: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1770: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1771: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1772: } else {
1773: PetscInfo(A,"Empty matrix.\n");
1774: }
1775: #endif
1777: ISRestoreIndices(perm,&rip);
1778: ISRestoreIndices(iperm,&riip);
1779: PetscFree(jl);
1781: /* destroy list of free space and other temporary array(s) */
1782: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1783: PetscFreeSpaceContiguous(&free_space,uj);
1784: PetscLLDestroy(lnk,lnkbt);
1786: /* put together the new matrix in MATSEQSBAIJ format */
1787: MatCreate(PETSC_COMM_SELF,fact);
1788: MatSetSizes(*fact,am,am,am,am);
1789: B = *fact;
1790: MatSetType(B,MATSEQSBAIJ);
1791: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1793: b = (Mat_SeqSBAIJ*)B->data;
1794: b->singlemalloc = PETSC_FALSE;
1795: b->free_a = PETSC_TRUE;
1796: b->free_ij = PETSC_TRUE;
1797: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1798: b->j = uj;
1799: b->i = ui;
1800: b->diag = 0;
1801: b->ilen = 0;
1802: b->imax = 0;
1803: b->row = perm;
1804: b->col = perm;
1805: PetscObjectReference((PetscObject)perm);
1806: PetscObjectReference((PetscObject)perm);
1807: b->icol = iperm;
1808: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1809: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1810: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1811: b->maxnz = b->nz = ui[am];
1812:
1813: B->factor = FACTOR_CHOLESKY;
1814: B->info.factor_mallocs = reallocs;
1815: B->info.fill_ratio_given = fill;
1816: if (ai[am] != 0) {
1817: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1818: } else {
1819: B->info.fill_ratio_needed = 0.0;
1820: }
1821: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1822: if (perm_identity){
1823: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1824: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1825: (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1826: (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1827: } else {
1828: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1;
1829: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1830: (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
1831: (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
1832: }
1833: return(0);
1834: }