Actual source code: aijfact.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/inline/dot.h
5: #include src/inline/spops.h
6: #include petscbt.h
7: #include src/mat/utils/freespace.h
11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12: {
15: SETERRQ(PETSC_ERR_SUP,"Code not written");
16: #if !defined(PETSC_USE_DEBUG)
17: return(0);
18: #endif
19: }
22: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26: #endif
30: /* ------------------------------------------------------------
32: This interface was contribed by Tony Caola
34: This routine is an interface to the pivoting drop-tolerance
35: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36: SPARSEKIT2.
38: The SPARSEKIT2 routines used here are covered by the GNU
39: copyright; see the file gnu in this directory.
41: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42: help in getting this routine ironed out.
44: The major drawback to this routine is that if info->fill is
45: not large enough it fails rather than allocating more space;
46: this can be fixed by hacking/improving the f2c version of
47: Yousef Saad's code.
49: ------------------------------------------------------------
50: */
51: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52: {
53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
55: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56: You can obtain the drop tolerance routines by installing PETSc from\n\
57: www.mcs.anl.gov/petsc\n");
58: #else
59: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
60: IS iscolf,isicol,isirow;
61: PetscTruth reorder;
62: PetscErrorCode ierr,sierr;
63: PetscInt *c,*r,*ic,i,n = A->rmap.n;
64: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65: PetscInt *ordcol,*iwk,*iperm,*jw;
66: PetscInt jmax,lfill,job,*o_i,*o_j;
67: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68: PetscReal af;
72: if (info->dt == PETSC_DEFAULT) info->dt = .005;
73: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
75: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
76: lfill = (PetscInt)(info->dtcount/2.0);
77: jmax = (PetscInt)(info->fill*a->nz);
80: /* ------------------------------------------------------------
81: If reorder=.TRUE., then the original matrix has to be
82: reordered to reflect the user selected ordering scheme, and
83: then de-reordered so it is in it's original format.
84: Because Saad's dperm() is NOT in place, we have to copy
85: the original matrix and allocate more storage. . .
86: ------------------------------------------------------------
87: */
89: /* set reorder to true if either isrow or iscol is not identity */
90: ISIdentity(isrow,&reorder);
91: if (reorder) {ISIdentity(iscol,&reorder);}
92: reorder = PetscNot(reorder);
94:
95: /* storage for ilu factor */
96: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
97: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
98: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
99: PetscMalloc(n*sizeof(PetscInt),&ordcol);
101: /* ------------------------------------------------------------
102: Make sure that everything is Fortran formatted (1-Based)
103: ------------------------------------------------------------
104: */
105: for (i=old_i[0];i<old_i[n];i++) {
106: old_j[i]++;
107: }
108: for(i=0;i<n+1;i++) {
109: old_i[i]++;
110: };
111:
113: if (reorder) {
114: ISGetIndices(iscol,&c);
115: ISGetIndices(isrow,&r);
116: for(i=0;i<n;i++) {
117: r[i] = r[i]+1;
118: c[i] = c[i]+1;
119: }
120: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
121: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
122: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
123: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124: for (i=0;i<n;i++) {
125: r[i] = r[i]-1;
126: c[i] = c[i]-1;
127: }
128: ISRestoreIndices(iscol,&c);
129: ISRestoreIndices(isrow,&r);
130: o_a = old_a2;
131: o_j = old_j2;
132: o_i = old_i2;
133: } else {
134: o_a = old_a;
135: o_j = old_j;
136: o_i = old_i;
137: }
139: /* ------------------------------------------------------------
140: Call Saad's ilutp() routine to generate the factorization
141: ------------------------------------------------------------
142: */
144: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
145: PetscMalloc(2*n*sizeof(PetscInt),&jw);
146: PetscMalloc(n*sizeof(PetscScalar),&w);
148: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149: if (sierr) {
150: switch (sierr) {
151: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157: }
158: }
160: PetscFree(w);
161: PetscFree(jw);
163: /* ------------------------------------------------------------
164: Saad's routine gives the result in Modified Sparse Row (msr)
165: Convert to Compressed Sparse Row format (csr)
166: ------------------------------------------------------------
167: */
169: PetscMalloc(n*sizeof(PetscScalar),&wk);
170: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
172: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
174: PetscFree(iwk);
175: PetscFree(wk);
177: if (reorder) {
178: PetscFree(old_a2);
179: PetscFree(old_j2);
180: PetscFree(old_i2);
181: } else {
182: /* fix permutation of old_j that the factorization introduced */
183: for (i=old_i[0]; i<old_i[n]; i++) {
184: old_j[i-1] = iperm[old_j[i-1]-1];
185: }
186: }
188: /* get rid of the shift to indices starting at 1 */
189: for (i=0; i<n+1; i++) {
190: old_i[i]--;
191: }
192: for (i=old_i[0];i<old_i[n];i++) {
193: old_j[i]--;
194: }
195:
196: /* Make the factored matrix 0-based */
197: for (i=0; i<n+1; i++) {
198: new_i[i]--;
199: }
200: for (i=new_i[0];i<new_i[n];i++) {
201: new_j[i]--;
202: }
204: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205: /*-- permute the right-hand-side and solution vectors --*/
206: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
208: ISGetIndices(isicol,&ic);
209: for(i=0; i<n; i++) {
210: ordcol[i] = ic[iperm[i]-1];
211: };
212: ISRestoreIndices(isicol,&ic);
213: ISDestroy(isicol);
215: PetscFree(iperm);
217: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
218: PetscFree(ordcol);
220: /*----- put together the new matrix -----*/
222: MatCreate(((PetscObject)A)->comm,fact);
223: MatSetSizes(*fact,n,n,n,n);
224: MatSetType(*fact,((PetscObject)A)->type_name);
225: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
226: (*fact)->factor = FACTOR_LU;
227: (*fact)->assembled = PETSC_TRUE;
229: b = (Mat_SeqAIJ*)(*fact)->data;
230: b->free_a = PETSC_TRUE;
231: b->free_ij = PETSC_TRUE;
232: b->singlemalloc = PETSC_FALSE;
233: b->a = new_a;
234: b->j = new_j;
235: b->i = new_i;
236: b->ilen = 0;
237: b->imax = 0;
238: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
239: b->row = isirow;
240: b->col = iscolf;
241: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
242: b->maxnz = b->nz = new_i[n];
243: MatMarkDiagonal_SeqAIJ(*fact);
244: (*fact)->info.factor_mallocs = 0;
246: af = ((double)b->nz)/((double)a->nz) + .001;
247: PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);
248: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
249: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
250: PetscInfo(A,"for best performance.\n");
252: MatILUDTFactor_Inode(A,isrow,iscol,info,fact);
254: return(0);
255: #endif
256: }
260: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
261: {
262: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
263: IS isicol;
264: PetscErrorCode ierr;
265: PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
266: PetscInt *bi,*bj,*ajtmp;
267: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
268: PetscReal f;
269: PetscInt nlnk,*lnk,k,**bi_ptr;
270: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
271: PetscBT lnkbt;
274: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
275: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
276: ISGetIndices(isrow,&r);
277: ISGetIndices(isicol,&ic);
279: /* get new row pointers */
280: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
281: bi[0] = 0;
283: /* bdiag is location of diagonal in factor */
284: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
285: bdiag[0] = 0;
287: /* linked list for storing column indices of the active row */
288: nlnk = n + 1;
289: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
291: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
293: /* initial FreeSpace size is f*(ai[n]+1) */
294: f = info->fill;
295: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
296: current_space = free_space;
298: for (i=0; i<n; i++) {
299: /* copy previous fill into linked list */
300: nzi = 0;
301: nnz = ai[r[i]+1] - ai[r[i]];
302: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
303: ajtmp = aj + ai[r[i]];
304: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
305: nzi += nlnk;
307: /* add pivot rows into linked list */
308: row = lnk[n];
309: while (row < i) {
310: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
311: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
312: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
313: nzi += nlnk;
314: row = lnk[row];
315: }
316: bi[i+1] = bi[i] + nzi;
317: im[i] = nzi;
319: /* mark bdiag */
320: nzbd = 0;
321: nnz = nzi;
322: k = lnk[n];
323: while (nnz-- && k < i){
324: nzbd++;
325: k = lnk[k];
326: }
327: bdiag[i] = bi[i] + nzbd;
329: /* if free space is not available, make more free space */
330: if (current_space->local_remaining<nzi) {
331: nnz = (n - i)*nzi; /* estimated and max additional space needed */
332: PetscFreeSpaceGet(nnz,¤t_space);
333: reallocs++;
334: }
336: /* copy data into free space, then initialize lnk */
337: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
338: bi_ptr[i] = current_space->array;
339: current_space->array += nzi;
340: current_space->local_used += nzi;
341: current_space->local_remaining -= nzi;
342: }
343: #if defined(PETSC_USE_INFO)
344: if (ai[n] != 0) {
345: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
346: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
347: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
348: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
349: PetscInfo(A,"for best performance.\n");
350: } else {
351: PetscInfo(A,"Empty matrix\n");
352: }
353: #endif
355: ISRestoreIndices(isrow,&r);
356: ISRestoreIndices(isicol,&ic);
358: /* destroy list of free space and other temporary array(s) */
359: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
360: PetscFreeSpaceContiguous(&free_space,bj);
361: PetscLLDestroy(lnk,lnkbt);
362: PetscFree2(bi_ptr,im);
364: /* put together the new matrix */
365: MatCreate(((PetscObject)A)->comm,B);
366: MatSetSizes(*B,n,n,n,n);
367: MatSetType(*B,((PetscObject)A)->type_name);
368: MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
369: PetscLogObjectParent(*B,isicol);
370: b = (Mat_SeqAIJ*)(*B)->data;
371: b->free_a = PETSC_TRUE;
372: b->free_ij = PETSC_TRUE;
373: b->singlemalloc = PETSC_FALSE;
374: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
375: b->j = bj;
376: b->i = bi;
377: b->diag = bdiag;
378: b->ilen = 0;
379: b->imax = 0;
380: b->row = isrow;
381: b->col = iscol;
382: PetscObjectReference((PetscObject)isrow);
383: PetscObjectReference((PetscObject)iscol);
384: b->icol = isicol;
385: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
387: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
388: PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
389: b->maxnz = b->nz = bi[n] ;
391: (*B)->factor = FACTOR_LU;
392: (*B)->info.factor_mallocs = reallocs;
393: (*B)->info.fill_ratio_given = f;
395: if (ai[n] != 0) {
396: (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
397: } else {
398: (*B)->info.fill_ratio_needed = 0.0;
399: }
400: MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
401: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
402: return(0);
403: }
405: /*
406: Trouble in factorization, should we dump the original matrix?
407: */
410: PetscErrorCode MatFactorDumpMatrix(Mat A)
411: {
413: PetscTruth flg;
416: PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);
417: if (flg) {
418: PetscViewer viewer;
419: char filename[PETSC_MAX_PATH_LEN];
421: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
422: PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
423: MatView(A,viewer);
424: PetscViewerDestroy(viewer);
425: }
426: return(0);
427: }
429: /* ----------------------------------------------------------- */
432: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
433: {
434: Mat C=*B;
435: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
436: IS isrow = b->row,isicol = b->icol;
438: PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
439: PetscInt *ajtmp,*bjtmp,nz,row,*ics;
440: PetscInt *diag_offset = b->diag,diag,*pj;
441: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
442: PetscScalar d;
443: PetscReal rs;
444: LUShift_Ctx sctx;
445: PetscInt newshift,*ddiag;
448: ISGetIndices(isrow,&r);
449: ISGetIndices(isicol,&ic);
450: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
451: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
452: rtmps = rtmp; ics = ic;
454: sctx.shift_top = 0;
455: sctx.nshift_max = 0;
456: sctx.shift_lo = 0;
457: sctx.shift_hi = 0;
459: /* if both shift schemes are chosen by user, only use info->shiftpd */
460: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
461: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
462: PetscInt *aai = a->i;
463: ddiag = a->diag;
464: sctx.shift_top = 0;
465: for (i=0; i<n; i++) {
466: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
467: d = (a->a)[ddiag[i]];
468: rs = -PetscAbsScalar(d) - PetscRealPart(d);
469: v = a->a+aai[i];
470: nz = aai[i+1] - aai[i];
471: for (j=0; j<nz; j++)
472: rs += PetscAbsScalar(v[j]);
473: if (rs>sctx.shift_top) sctx.shift_top = rs;
474: }
475: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
476: sctx.shift_top *= 1.1;
477: sctx.nshift_max = 5;
478: sctx.shift_lo = 0.;
479: sctx.shift_hi = 1.;
480: }
482: sctx.shift_amount = 0;
483: sctx.nshift = 0;
484: do {
485: sctx.lushift = PETSC_FALSE;
486: for (i=0; i<n; i++){
487: nz = bi[i+1] - bi[i];
488: bjtmp = bj + bi[i];
489: for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
491: /* load in initial (unfactored row) */
492: nz = a->i[r[i]+1] - a->i[r[i]];
493: ajtmp = a->j + a->i[r[i]];
494: v = a->a + a->i[r[i]];
495: for (j=0; j<nz; j++) {
496: rtmp[ics[ajtmp[j]]] = v[j];
497: }
498: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
500: row = *bjtmp++;
501: while (row < i) {
502: pc = rtmp + row;
503: if (*pc != 0.0) {
504: pv = b->a + diag_offset[row];
505: pj = b->j + diag_offset[row] + 1;
506: multiplier = *pc / *pv++;
507: *pc = multiplier;
508: nz = bi[row+1] - diag_offset[row] - 1;
509: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
510: PetscLogFlops(2*nz);
511: }
512: row = *bjtmp++;
513: }
514: /* finished row so stick it into b->a */
515: pv = b->a + bi[i] ;
516: pj = b->j + bi[i] ;
517: nz = bi[i+1] - bi[i];
518: diag = diag_offset[i] - bi[i];
519: rs = 0.0;
520: for (j=0; j<nz; j++) {
521: pv[j] = rtmps[pj[j]];
522: if (j != diag) rs += PetscAbsScalar(pv[j]);
523: }
525: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
526: sctx.rs = rs;
527: sctx.pv = pv[diag];
528: MatLUCheckShift_inline(info,sctx,i,newshift);
529: if (newshift == 1) break;
530: }
532: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
533: /*
534: * if no shift in this attempt & shifting & started shifting & can refine,
535: * then try lower shift
536: */
537: sctx.shift_hi = info->shift_fraction;
538: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
539: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
540: sctx.lushift = PETSC_TRUE;
541: sctx.nshift++;
542: }
543: } while (sctx.lushift);
545: /* invert diagonal entries for simplier triangular solves */
546: for (i=0; i<n; i++) {
547: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
548: }
550: PetscFree(rtmp);
551: ISRestoreIndices(isicol,&ic);
552: ISRestoreIndices(isrow,&r);
553: C->factor = FACTOR_LU;
554: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
555: C->assembled = PETSC_TRUE;
556: PetscLogFlops(C->cmap.n);
557: if (sctx.nshift){
558: if (info->shiftnz) {
559: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
560: } else if (info->shiftpd) {
561: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
562: }
563: }
564: return(0);
565: }
567: /*
568: This routine implements inplace ILU(0) with row or/and column permutations.
569: Input:
570: A - original matrix
571: Output;
572: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
573: a->j (col index) is permuted by the inverse of colperm, then sorted
574: a->a reordered accordingly with a->j
575: a->diag (ptr to diagonal elements) is updated.
576: */
579: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
580: {
581: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
582: IS isrow = a->row,isicol = a->icol;
584: PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j;
585: PetscInt *ajtmp,nz,row,*ics;
586: PetscInt *diag = a->diag,nbdiag,*pj;
587: PetscScalar *rtmp,*v,*pc,multiplier,*pv,d;
588: PetscReal rs;
589: LUShift_Ctx sctx;
590: PetscInt newshift;
593: if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
594: ISGetIndices(isrow,&r);
595: ISGetIndices(isicol,&ic);
596: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
597: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
598: ics = ic;
600: sctx.shift_top = 0;
601: sctx.nshift_max = 0;
602: sctx.shift_lo = 0;
603: sctx.shift_hi = 0;
605: /* if both shift schemes are chosen by user, only use info->shiftpd */
606: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
607: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
608: sctx.shift_top = 0;
609: for (i=0; i<n; i++) {
610: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
611: d = (a->a)[diag[i]];
612: rs = -PetscAbsScalar(d) - PetscRealPart(d);
613: v = a->a+ai[i];
614: nz = ai[i+1] - ai[i];
615: for (j=0; j<nz; j++)
616: rs += PetscAbsScalar(v[j]);
617: if (rs>sctx.shift_top) sctx.shift_top = rs;
618: }
619: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
620: sctx.shift_top *= 1.1;
621: sctx.nshift_max = 5;
622: sctx.shift_lo = 0.;
623: sctx.shift_hi = 1.;
624: }
626: sctx.shift_amount = 0;
627: sctx.nshift = 0;
628: do {
629: sctx.lushift = PETSC_FALSE;
630: for (i=0; i<n; i++){
631: /* load in initial unfactored row */
632: nz = ai[r[i]+1] - ai[r[i]];
633: ajtmp = aj + ai[r[i]];
634: v = a->a + ai[r[i]];
635: /* sort permuted ajtmp and values v accordingly */
636: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
637: PetscSortIntWithScalarArray(nz,ajtmp,v);
639: diag[r[i]] = ai[r[i]];
640: for (j=0; j<nz; j++) {
641: rtmp[ajtmp[j]] = v[j];
642: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
643: }
644: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
646: row = *ajtmp++;
647: while (row < i) {
648: pc = rtmp + row;
649: if (*pc != 0.0) {
650: pv = a->a + diag[r[row]];
651: pj = aj + diag[r[row]] + 1;
653: multiplier = *pc / *pv++;
654: *pc = multiplier;
655: nz = ai[r[row]+1] - diag[r[row]] - 1;
656: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
657: PetscLogFlops(2*nz);
658: }
659: row = *ajtmp++;
660: }
661: /* finished row so overwrite it onto a->a */
662: pv = a->a + ai[r[i]] ;
663: pj = aj + ai[r[i]] ;
664: nz = ai[r[i]+1] - ai[r[i]];
665: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
666:
667: rs = 0.0;
668: for (j=0; j<nz; j++) {
669: pv[j] = rtmp[pj[j]];
670: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
671: }
673: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
674: sctx.rs = rs;
675: sctx.pv = pv[nbdiag];
676: MatLUCheckShift_inline(info,sctx,i,newshift);
677: if (newshift == 1) break;
678: }
680: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
681: /*
682: * if no shift in this attempt & shifting & started shifting & can refine,
683: * then try lower shift
684: */
685: sctx.shift_hi = info->shift_fraction;
686: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
687: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
688: sctx.lushift = PETSC_TRUE;
689: sctx.nshift++;
690: }
691: } while (sctx.lushift);
693: /* invert diagonal entries for simplier triangular solves */
694: for (i=0; i<n; i++) {
695: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
696: }
698: PetscFree(rtmp);
699: ISRestoreIndices(isicol,&ic);
700: ISRestoreIndices(isrow,&r);
701: A->factor = FACTOR_LU;
702: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
703: A->assembled = PETSC_TRUE;
704: PetscLogFlops(A->cmap.n);
705: if (sctx.nshift){
706: if (info->shiftnz) {
707: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
708: } else if (info->shiftpd) {
709: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
710: }
711: }
712: return(0);
713: }
717: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
718: {
720: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
721: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
722: return(0);
723: }
726: /* ----------------------------------------------------------- */
729: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
730: {
732: Mat C;
735: MatLUFactorSymbolic(A,row,col,info,&C);
736: MatLUFactorNumeric(A,info,&C);
737: MatHeaderCopy(A,C);
738: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
739: return(0);
740: }
741: /* ----------------------------------------------------------- */
744: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
745: {
746: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
747: IS iscol = a->col,isrow = a->row;
748: PetscErrorCode ierr;
749: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
750: PetscInt nz,*rout,*cout;
751: PetscScalar *x,*tmp,*tmps,*aa = a->a,sum,*v;
752: const PetscScalar *b;
755: if (!n) return(0);
757: VecGetArray(bb,(PetscScalar**)&b);
758: VecGetArray(xx,&x);
759: tmp = a->solve_work;
761: ISGetIndices(isrow,&rout); r = rout;
762: ISGetIndices(iscol,&cout); c = cout + (n-1);
764: /* forward solve the lower triangular */
765: tmp[0] = b[*r++];
766: tmps = tmp;
767: for (i=1; i<n; i++) {
768: v = aa + ai[i] ;
769: vi = aj + ai[i] ;
770: nz = a->diag[i] - ai[i];
771: sum = b[*r++];
772: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
773: tmp[i] = sum;
774: }
776: /* backward solve the upper triangular */
777: for (i=n-1; i>=0; i--){
778: v = aa + a->diag[i] + 1;
779: vi = aj + a->diag[i] + 1;
780: nz = ai[i+1] - a->diag[i] - 1;
781: sum = tmp[i];
782: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
783: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
784: }
786: ISRestoreIndices(isrow,&rout);
787: ISRestoreIndices(iscol,&cout);
788: VecRestoreArray(bb,(PetscScalar**)&b);
789: VecRestoreArray(xx,&x);
790: PetscLogFlops(2*a->nz - A->cmap.n);
791: return(0);
792: }
796: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
797: {
798: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
799: IS iscol = a->col,isrow = a->row;
801: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
802: PetscInt nz,*rout,*cout,neq;
803: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
806: if (!n) return(0);
808: MatGetArray(B,&b);
809: MatGetArray(X,&x);
810:
811: tmp = a->solve_work;
812: ISGetIndices(isrow,&rout); r = rout;
813: ISGetIndices(iscol,&cout); c = cout;
815: for (neq=0; neq<n; neq++){
816: /* forward solve the lower triangular */
817: tmp[0] = b[r[0]];
818: tmps = tmp;
819: for (i=1; i<n; i++) {
820: v = aa + ai[i] ;
821: vi = aj + ai[i] ;
822: nz = a->diag[i] - ai[i];
823: sum = b[r[i]];
824: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
825: tmp[i] = sum;
826: }
827: /* backward solve the upper triangular */
828: for (i=n-1; i>=0; i--){
829: v = aa + a->diag[i] + 1;
830: vi = aj + a->diag[i] + 1;
831: nz = ai[i+1] - a->diag[i] - 1;
832: sum = tmp[i];
833: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
834: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
835: }
837: b += n;
838: x += n;
839: }
840: ISRestoreIndices(isrow,&rout);
841: ISRestoreIndices(iscol,&cout);
842: MatRestoreArray(B,&b);
843: MatRestoreArray(X,&x);
844: PetscLogFlops(n*(2*a->nz - n));
845: return(0);
846: }
850: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
851: {
852: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
853: IS iscol = a->col,isrow = a->row;
855: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
856: PetscInt nz,*rout,*cout,row;
857: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
860: if (!n) return(0);
862: VecGetArray(bb,&b);
863: VecGetArray(xx,&x);
864: tmp = a->solve_work;
866: ISGetIndices(isrow,&rout); r = rout;
867: ISGetIndices(iscol,&cout); c = cout + (n-1);
869: /* forward solve the lower triangular */
870: tmp[0] = b[*r++];
871: tmps = tmp;
872: for (row=1; row<n; row++) {
873: i = rout[row]; /* permuted row */
874: v = aa + ai[i] ;
875: vi = aj + ai[i] ;
876: nz = a->diag[i] - ai[i];
877: sum = b[*r++];
878: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
879: tmp[row] = sum;
880: }
882: /* backward solve the upper triangular */
883: for (row=n-1; row>=0; row--){
884: i = rout[row]; /* permuted row */
885: v = aa + a->diag[i] + 1;
886: vi = aj + a->diag[i] + 1;
887: nz = ai[i+1] - a->diag[i] - 1;
888: sum = tmp[row];
889: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
890: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
891: }
893: ISRestoreIndices(isrow,&rout);
894: ISRestoreIndices(iscol,&cout);
895: VecRestoreArray(bb,&b);
896: VecRestoreArray(xx,&x);
897: PetscLogFlops(2*a->nz - A->cmap.n);
898: return(0);
899: }
901: /* ----------------------------------------------------------- */
904: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
905: {
906: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
908: PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
909: PetscScalar *x,*b,*aa = a->a;
910: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
911: PetscInt adiag_i,i,*vi,nz,ai_i;
912: PetscScalar *v,sum;
913: #endif
916: if (!n) return(0);
918: VecGetArray(bb,&b);
919: VecGetArray(xx,&x);
921: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
922: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
923: #else
924: /* forward solve the lower triangular */
925: x[0] = b[0];
926: for (i=1; i<n; i++) {
927: ai_i = ai[i];
928: v = aa + ai_i;
929: vi = aj + ai_i;
930: nz = adiag[i] - ai_i;
931: sum = b[i];
932: while (nz--) sum -= *v++ * x[*vi++];
933: x[i] = sum;
934: }
936: /* backward solve the upper triangular */
937: for (i=n-1; i>=0; i--){
938: adiag_i = adiag[i];
939: v = aa + adiag_i + 1;
940: vi = aj + adiag_i + 1;
941: nz = ai[i+1] - adiag_i - 1;
942: sum = x[i];
943: while (nz--) sum -= *v++ * x[*vi++];
944: x[i] = sum*aa[adiag_i];
945: }
946: #endif
947: PetscLogFlops(2*a->nz - A->cmap.n);
948: VecRestoreArray(bb,&b);
949: VecRestoreArray(xx,&x);
950: return(0);
951: }
955: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
956: {
957: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
958: IS iscol = a->col,isrow = a->row;
960: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
961: PetscInt nz,*rout,*cout;
962: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
965: if (yy != xx) {VecCopy(yy,xx);}
967: VecGetArray(bb,&b);
968: VecGetArray(xx,&x);
969: tmp = a->solve_work;
971: ISGetIndices(isrow,&rout); r = rout;
972: ISGetIndices(iscol,&cout); c = cout + (n-1);
974: /* forward solve the lower triangular */
975: tmp[0] = b[*r++];
976: for (i=1; i<n; i++) {
977: v = aa + ai[i] ;
978: vi = aj + ai[i] ;
979: nz = a->diag[i] - ai[i];
980: sum = b[*r++];
981: while (nz--) sum -= *v++ * tmp[*vi++ ];
982: tmp[i] = sum;
983: }
985: /* backward solve the upper triangular */
986: for (i=n-1; i>=0; i--){
987: v = aa + a->diag[i] + 1;
988: vi = aj + a->diag[i] + 1;
989: nz = ai[i+1] - a->diag[i] - 1;
990: sum = tmp[i];
991: while (nz--) sum -= *v++ * tmp[*vi++ ];
992: tmp[i] = sum*aa[a->diag[i]];
993: x[*c--] += tmp[i];
994: }
996: ISRestoreIndices(isrow,&rout);
997: ISRestoreIndices(iscol,&cout);
998: VecRestoreArray(bb,&b);
999: VecRestoreArray(xx,&x);
1000: PetscLogFlops(2*a->nz);
1002: return(0);
1003: }
1004: /* -------------------------------------------------------------------*/
1007: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1008: {
1009: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1010: IS iscol = a->col,isrow = a->row;
1012: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1013: PetscInt nz,*rout,*cout,*diag = a->diag;
1014: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
1017: VecGetArray(bb,&b);
1018: VecGetArray(xx,&x);
1019: tmp = a->solve_work;
1021: ISGetIndices(isrow,&rout); r = rout;
1022: ISGetIndices(iscol,&cout); c = cout;
1024: /* copy the b into temp work space according to permutation */
1025: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1027: /* forward solve the U^T */
1028: for (i=0; i<n; i++) {
1029: v = aa + diag[i] ;
1030: vi = aj + diag[i] + 1;
1031: nz = ai[i+1] - diag[i] - 1;
1032: s1 = tmp[i];
1033: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1034: while (nz--) {
1035: tmp[*vi++ ] -= (*v++)*s1;
1036: }
1037: tmp[i] = s1;
1038: }
1040: /* backward solve the L^T */
1041: for (i=n-1; i>=0; i--){
1042: v = aa + diag[i] - 1 ;
1043: vi = aj + diag[i] - 1 ;
1044: nz = diag[i] - ai[i];
1045: s1 = tmp[i];
1046: while (nz--) {
1047: tmp[*vi-- ] -= (*v--)*s1;
1048: }
1049: }
1051: /* copy tmp into x according to permutation */
1052: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1054: ISRestoreIndices(isrow,&rout);
1055: ISRestoreIndices(iscol,&cout);
1056: VecRestoreArray(bb,&b);
1057: VecRestoreArray(xx,&x);
1059: PetscLogFlops(2*a->nz-A->cmap.n);
1060: return(0);
1061: }
1065: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1066: {
1067: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1068: IS iscol = a->col,isrow = a->row;
1070: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1071: PetscInt nz,*rout,*cout,*diag = a->diag;
1072: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
1075: if (zz != xx) {VecCopy(zz,xx);}
1077: VecGetArray(bb,&b);
1078: VecGetArray(xx,&x);
1079: tmp = a->solve_work;
1081: ISGetIndices(isrow,&rout); r = rout;
1082: ISGetIndices(iscol,&cout); c = cout;
1084: /* copy the b into temp work space according to permutation */
1085: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1087: /* forward solve the U^T */
1088: for (i=0; i<n; i++) {
1089: v = aa + diag[i] ;
1090: vi = aj + diag[i] + 1;
1091: nz = ai[i+1] - diag[i] - 1;
1092: tmp[i] *= *v++;
1093: while (nz--) {
1094: tmp[*vi++ ] -= (*v++)*tmp[i];
1095: }
1096: }
1098: /* backward solve the L^T */
1099: for (i=n-1; i>=0; i--){
1100: v = aa + diag[i] - 1 ;
1101: vi = aj + diag[i] - 1 ;
1102: nz = diag[i] - ai[i];
1103: while (nz--) {
1104: tmp[*vi-- ] -= (*v--)*tmp[i];
1105: }
1106: }
1108: /* copy tmp into x according to permutation */
1109: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1111: ISRestoreIndices(isrow,&rout);
1112: ISRestoreIndices(iscol,&cout);
1113: VecRestoreArray(bb,&b);
1114: VecRestoreArray(xx,&x);
1116: PetscLogFlops(2*a->nz);
1117: return(0);
1118: }
1119: /* ----------------------------------------------------------------*/
1120: EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth);
1124: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1125: {
1126: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1127: IS isicol;
1128: PetscErrorCode ierr;
1129: PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
1130: PetscInt *bi,*cols,nnz,*cols_lvl;
1131: PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1132: PetscInt i,levels,diagonal_fill;
1133: PetscTruth col_identity,row_identity;
1134: PetscReal f;
1135: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1136: PetscBT lnkbt;
1137: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1138: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1139: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1140: PetscTruth missing;
1143: f = info->fill;
1144: levels = (PetscInt)info->levels;
1145: diagonal_fill = (PetscInt)info->diagonal_fill;
1146: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1148: /* special case that simply copies fill pattern */
1149: ISIdentity(isrow,&row_identity);
1150: ISIdentity(iscol,&col_identity);
1151: if (!levels && row_identity && col_identity) {
1152: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
1153: (*fact)->factor = FACTOR_LU;
1154: (*fact)->info.factor_mallocs = 0;
1155: (*fact)->info.fill_ratio_given = info->fill;
1156: (*fact)->info.fill_ratio_needed = 1.0;
1157: b = (Mat_SeqAIJ*)(*fact)->data;
1158: MatMissingDiagonal(A,&missing,&d);
1159: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1160: b->row = isrow;
1161: b->col = iscol;
1162: b->icol = isicol;
1163: PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);
1164: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1165: PetscObjectReference((PetscObject)isrow);
1166: PetscObjectReference((PetscObject)iscol);
1167: Mat_CheckInode(*fact,PETSC_FALSE);
1168: return(0);
1169: }
1171: ISGetIndices(isrow,&r);
1172: ISGetIndices(isicol,&ic);
1174: /* get new row pointers */
1175: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1176: bi[0] = 0;
1177: /* bdiag is location of diagonal in factor */
1178: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1179: bdiag[0] = 0;
1181: PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
1182: bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1184: /* create a linked list for storing column indices of the active row */
1185: nlnk = n + 1;
1186: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1188: /* initial FreeSpace size is f*(ai[n]+1) */
1189: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1190: current_space = free_space;
1191: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1192: current_space_lvl = free_space_lvl;
1193:
1194: for (i=0; i<n; i++) {
1195: nzi = 0;
1196: /* copy current row into linked list */
1197: nnz = ai[r[i]+1] - ai[r[i]];
1198: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1199: cols = aj + ai[r[i]];
1200: lnk[i] = -1; /* marker to indicate if diagonal exists */
1201: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1202: nzi += nlnk;
1204: /* make sure diagonal entry is included */
1205: if (diagonal_fill && lnk[i] == -1) {
1206: fm = n;
1207: while (lnk[fm] < i) fm = lnk[fm];
1208: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1209: lnk[fm] = i;
1210: lnk_lvl[i] = 0;
1211: nzi++; dcount++;
1212: }
1214: /* add pivot rows into the active row */
1215: nzbd = 0;
1216: prow = lnk[n];
1217: while (prow < i) {
1218: nnz = bdiag[prow];
1219: cols = bj_ptr[prow] + nnz + 1;
1220: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1221: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1222: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1223: nzi += nlnk;
1224: prow = lnk[prow];
1225: nzbd++;
1226: }
1227: bdiag[i] = nzbd;
1228: bi[i+1] = bi[i] + nzi;
1230: /* if free space is not available, make more free space */
1231: if (current_space->local_remaining<nzi) {
1232: nnz = nzi*(n - i); /* estimated and max additional space needed */
1233: PetscFreeSpaceGet(nnz,¤t_space);
1234: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1235: reallocs++;
1236: }
1238: /* copy data into free_space and free_space_lvl, then initialize lnk */
1239: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1240: bj_ptr[i] = current_space->array;
1241: bjlvl_ptr[i] = current_space_lvl->array;
1243: /* make sure the active row i has diagonal entry */
1244: if (*(bj_ptr[i]+bdiag[i]) != i) {
1245: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1246: try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1247: }
1249: current_space->array += nzi;
1250: current_space->local_used += nzi;
1251: current_space->local_remaining -= nzi;
1252: current_space_lvl->array += nzi;
1253: current_space_lvl->local_used += nzi;
1254: current_space_lvl->local_remaining -= nzi;
1255: }
1257: ISRestoreIndices(isrow,&r);
1258: ISRestoreIndices(isicol,&ic);
1260: /* destroy list of free space and other temporary arrays */
1261: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1262: PetscFreeSpaceContiguous(&free_space,bj);
1263: PetscIncompleteLLDestroy(lnk,lnkbt);
1264: PetscFreeSpaceDestroy(free_space_lvl);
1265: PetscFree(bj_ptr);
1267: #if defined(PETSC_USE_INFO)
1268: {
1269: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1270: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1271: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1272: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1273: PetscInfo(A,"for best performance.\n");
1274: if (diagonal_fill) {
1275: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1276: }
1277: }
1278: #endif
1280: /* put together the new matrix */
1281: MatCreate(((PetscObject)A)->comm,fact);
1282: MatSetSizes(*fact,n,n,n,n);
1283: MatSetType(*fact,((PetscObject)A)->type_name);
1284: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1285: PetscLogObjectParent(*fact,isicol);
1286: b = (Mat_SeqAIJ*)(*fact)->data;
1287: b->free_a = PETSC_TRUE;
1288: b->free_ij = PETSC_TRUE;
1289: b->singlemalloc = PETSC_FALSE;
1290: len = (bi[n] )*sizeof(PetscScalar);
1291: PetscMalloc(len+1,&b->a);
1292: b->j = bj;
1293: b->i = bi;
1294: for (i=0; i<n; i++) bdiag[i] += bi[i];
1295: b->diag = bdiag;
1296: b->ilen = 0;
1297: b->imax = 0;
1298: b->row = isrow;
1299: b->col = iscol;
1300: PetscObjectReference((PetscObject)isrow);
1301: PetscObjectReference((PetscObject)iscol);
1302: b->icol = isicol;
1303: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1304: /* In b structure: Free imax, ilen, old a, old j.
1305: Allocate bdiag, solve_work, new a, new j */
1306: PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1307: b->maxnz = b->nz = bi[n] ;
1308: (*fact)->factor = FACTOR_LU;
1309: (*fact)->info.factor_mallocs = reallocs;
1310: (*fact)->info.fill_ratio_given = f;
1311: (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1313: MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1314: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1316: return(0);
1317: }
1319: #include src/mat/impls/sbaij/seq/sbaij.h
1322: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1323: {
1324: Mat C = *B;
1325: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1326: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
1327: IS ip=b->row,iip = b->icol;
1329: PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1330: PetscInt *ai=a->i,*aj=a->j;
1331: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1332: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1333: PetscReal zeropivot,rs,shiftnz;
1334: PetscReal shiftpd;
1335: ChShift_Ctx sctx;
1336: PetscInt newshift;
1340: shiftnz = info->shiftnz;
1341: shiftpd = info->shiftpd;
1342: zeropivot = info->zeropivot;
1344: ISGetIndices(ip,&rip);
1345: ISGetIndices(iip,&riip);
1346:
1347: /* initialization */
1348: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1349: PetscMalloc(nz,&il);
1350: jl = il + mbs;
1351: rtmp = (MatScalar*)(jl + mbs);
1353: sctx.shift_amount = 0;
1354: sctx.nshift = 0;
1355: do {
1356: sctx.chshift = PETSC_FALSE;
1357: for (i=0; i<mbs; i++) {
1358: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1359: }
1360:
1361: for (k = 0; k<mbs; k++){
1362: bval = ba + bi[k];
1363: /* initialize k-th row by the perm[k]-th row of A */
1364: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1365: for (j = jmin; j < jmax; j++){
1366: col = riip[aj[j]];
1367: if (col >= k){ /* only take upper triangular entry */
1368: rtmp[col] = aa[j];
1369: *bval++ = 0.0; /* for in-place factorization */
1370: }
1371: }
1372: /* shift the diagonal of the matrix */
1373: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1375: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1376: dk = rtmp[k];
1377: i = jl[k]; /* first row to be added to k_th row */
1379: while (i < k){
1380: nexti = jl[i]; /* next row to be added to k_th row */
1382: /* compute multiplier, update diag(k) and U(i,k) */
1383: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1384: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1385: dk += uikdi*ba[ili];
1386: ba[ili] = uikdi; /* -U(i,k) */
1388: /* add multiple of row i to k-th row */
1389: jmin = ili + 1; jmax = bi[i+1];
1390: if (jmin < jmax){
1391: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1392: /* update il and jl for row i */
1393: il[i] = jmin;
1394: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1395: }
1396: i = nexti;
1397: }
1399: /* shift the diagonals when zero pivot is detected */
1400: /* compute rs=sum of abs(off-diagonal) */
1401: rs = 0.0;
1402: jmin = bi[k]+1;
1403: nz = bi[k+1] - jmin;
1404: bcol = bj + jmin;
1405: while (nz--){
1406: rs += PetscAbsScalar(rtmp[*bcol]);
1407: bcol++;
1408: }
1410: sctx.rs = rs;
1411: sctx.pv = dk;
1412: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1414: if (newshift == 1) {
1415: if (!sctx.shift_amount) {
1416: sctx.shift_amount = 1e-5;
1417: }
1418: break;
1419: }
1420:
1421: /* copy data into U(k,:) */
1422: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1423: jmin = bi[k]+1; jmax = bi[k+1];
1424: if (jmin < jmax) {
1425: for (j=jmin; j<jmax; j++){
1426: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1427: }
1428: /* add the k-th row into il and jl */
1429: il[k] = jmin;
1430: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1431: }
1432: }
1433: } while (sctx.chshift);
1434: PetscFree(il);
1436: ISRestoreIndices(ip,&rip);
1437: ISRestoreIndices(iip,&riip);
1438: C->factor = FACTOR_CHOLESKY;
1439: C->assembled = PETSC_TRUE;
1440: C->preallocated = PETSC_TRUE;
1441: PetscLogFlops(C->rmap.n);
1442: if (sctx.nshift){
1443: if (shiftnz) {
1444: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1445: } else if (shiftpd) {
1446: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1447: }
1448: }
1449: return(0);
1450: }
1454: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1455: {
1456: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1457: Mat_SeqSBAIJ *b;
1458: Mat B;
1459: PetscErrorCode ierr;
1460: PetscTruth perm_identity,missing;
1461: PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1462: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1463: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
1464: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1465: PetscReal fill=info->fill,levels=info->levels;
1466: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1467: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1468: PetscBT lnkbt;
1469: IS iperm;
1470:
1472: MatMissingDiagonal(A,&missing,&d);
1473: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1474: ISIdentity(perm,&perm_identity);
1475: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1477: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1478: ui[0] = 0;
1480: /* ICC(0) without matrix ordering: simply copies fill pattern */
1481: if (!levels && perm_identity) {
1483: for (i=0; i<am; i++) {
1484: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1485: }
1486: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1487: cols = uj;
1488: for (i=0; i<am; i++) {
1489: aj = a->j + a->diag[i];
1490: ncols = ui[i+1] - ui[i];
1491: for (j=0; j<ncols; j++) *cols++ = *aj++;
1492: }
1493: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1494: ISGetIndices(iperm,&riip);
1495: ISGetIndices(perm,&rip);
1497: /* initialization */
1498: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
1500: /* jl: linked list for storing indices of the pivot rows
1501: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1502: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1503: il = jl + am;
1504: uj_ptr = (PetscInt**)(il + am);
1505: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1506: for (i=0; i<am; i++){
1507: jl[i] = am; il[i] = 0;
1508: }
1510: /* create and initialize a linked list for storing column indices of the active row k */
1511: nlnk = am + 1;
1512: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1514: /* initial FreeSpace size is fill*(ai[am]+1) */
1515: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1516: current_space = free_space;
1517: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1518: current_space_lvl = free_space_lvl;
1520: for (k=0; k<am; k++){ /* for each active row k */
1521: /* initialize lnk by the column indices of row rip[k] of A */
1522: nzk = 0;
1523: ncols = ai[rip[k]+1] - ai[rip[k]];
1524: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1525: ncols_upper = 0;
1526: for (j=0; j<ncols; j++){
1527: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1528: if (riip[i] >= k){ /* only take upper triangular entry */
1529: ajtmp[ncols_upper] = i;
1530: ncols_upper++;
1531: }
1532: }
1533: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
1534: nzk += nlnk;
1536: /* update lnk by computing fill-in for each pivot row to be merged in */
1537: prow = jl[k]; /* 1st pivot row */
1538:
1539: while (prow < k){
1540: nextprow = jl[prow];
1541:
1542: /* merge prow into k-th row */
1543: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1544: jmax = ui[prow+1];
1545: ncols = jmax-jmin;
1546: i = jmin - ui[prow];
1547: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1548: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
1549: j = *(uj - 1);
1550: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1551: nzk += nlnk;
1553: /* update il and jl for prow */
1554: if (jmin < jmax){
1555: il[prow] = jmin;
1556: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1557: }
1558: prow = nextprow;
1559: }
1561: /* if free space is not available, make more free space */
1562: if (current_space->local_remaining<nzk) {
1563: i = am - k + 1; /* num of unfactored rows */
1564: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1565: PetscFreeSpaceGet(i,¤t_space);
1566: PetscFreeSpaceGet(i,¤t_space_lvl);
1567: reallocs++;
1568: }
1570: /* copy data into free_space and free_space_lvl, then initialize lnk */
1571: if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1572: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1574: /* add the k-th row into il and jl */
1575: if (nzk > 1){
1576: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1577: jl[k] = jl[i]; jl[i] = k;
1578: il[k] = ui[k] + 1;
1579: }
1580: uj_ptr[k] = current_space->array;
1581: uj_lvl_ptr[k] = current_space_lvl->array;
1583: current_space->array += nzk;
1584: current_space->local_used += nzk;
1585: current_space->local_remaining -= nzk;
1587: current_space_lvl->array += nzk;
1588: current_space_lvl->local_used += nzk;
1589: current_space_lvl->local_remaining -= nzk;
1591: ui[k+1] = ui[k] + nzk;
1592: }
1594: #if defined(PETSC_USE_INFO)
1595: if (ai[am] != 0) {
1596: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1597: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1598: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1599: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1600: } else {
1601: PetscInfo(A,"Empty matrix.\n");
1602: }
1603: #endif
1605: ISRestoreIndices(perm,&rip);
1606: ISRestoreIndices(iperm,&riip);
1607: PetscFree(jl);
1608: PetscFree(ajtmp);
1610: /* destroy list of free space and other temporary array(s) */
1611: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1612: PetscFreeSpaceContiguous(&free_space,uj);
1613: PetscIncompleteLLDestroy(lnk,lnkbt);
1614: PetscFreeSpaceDestroy(free_space_lvl);
1616: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1618: /* put together the new matrix in MATSEQSBAIJ format */
1619: MatCreate(PETSC_COMM_SELF,fact);
1620: MatSetSizes(*fact,am,am,am,am);
1621: B = *fact;
1622: MatSetType(B,MATSEQSBAIJ);
1623: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1625: b = (Mat_SeqSBAIJ*)B->data;
1626: b->singlemalloc = PETSC_FALSE;
1627: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1628: b->j = uj;
1629: b->i = ui;
1630: b->diag = 0;
1631: b->ilen = 0;
1632: b->imax = 0;
1633: b->row = perm;
1634: b->col = perm;
1635: PetscObjectReference((PetscObject)perm);
1636: PetscObjectReference((PetscObject)perm);
1637: b->icol = iperm;
1638: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1639: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1640: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1641: b->maxnz = b->nz = ui[am];
1642: b->free_a = PETSC_TRUE;
1643: b->free_ij = PETSC_TRUE;
1644:
1645: B->factor = FACTOR_CHOLESKY;
1646: B->info.factor_mallocs = reallocs;
1647: B->info.fill_ratio_given = fill;
1648: if (ai[am] != 0) {
1649: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1650: } else {
1651: B->info.fill_ratio_needed = 0.0;
1652: }
1653: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1654: if (perm_identity){
1655: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1656: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1657: }
1658: return(0);
1659: }
1663: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1664: {
1665: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1666: Mat_SeqSBAIJ *b;
1667: Mat B;
1668: PetscErrorCode ierr;
1669: PetscTruth perm_identity;
1670: PetscReal fill = info->fill;
1671: PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1672: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1673: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1674: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1675: PetscBT lnkbt;
1676: IS iperm;
1679: /* check whether perm is the identity mapping */
1680: ISIdentity(perm,&perm_identity);
1681: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1682: ISGetIndices(iperm,&riip);
1683: ISGetIndices(perm,&rip);
1685: /* initialization */
1686: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1687: ui[0] = 0;
1689: /* jl: linked list for storing indices of the pivot rows
1690: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1691: PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1692: il = jl + am;
1693: cols = il + am;
1694: ui_ptr = (PetscInt**)(cols + am);
1695: for (i=0; i<am; i++){
1696: jl[i] = am; il[i] = 0;
1697: }
1699: /* create and initialize a linked list for storing column indices of the active row k */
1700: nlnk = am + 1;
1701: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
1703: /* initial FreeSpace size is fill*(ai[am]+1) */
1704: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1705: current_space = free_space;
1707: for (k=0; k<am; k++){ /* for each active row k */
1708: /* initialize lnk by the column indices of row rip[k] of A */
1709: nzk = 0;
1710: ncols = ai[rip[k]+1] - ai[rip[k]];
1711: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1712: ncols_upper = 0;
1713: for (j=0; j<ncols; j++){
1714: i = riip[*(aj + ai[rip[k]] + j)];
1715: if (i >= k){ /* only take upper triangular entry */
1716: cols[ncols_upper] = i;
1717: ncols_upper++;
1718: }
1719: }
1720: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1721: nzk += nlnk;
1723: /* update lnk by computing fill-in for each pivot row to be merged in */
1724: prow = jl[k]; /* 1st pivot row */
1725:
1726: while (prow < k){
1727: nextprow = jl[prow];
1728: /* merge prow into k-th row */
1729: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1730: jmax = ui[prow+1];
1731: ncols = jmax-jmin;
1732: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1733: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1734: nzk += nlnk;
1736: /* update il and jl for prow */
1737: if (jmin < jmax){
1738: il[prow] = jmin;
1739: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1740: }
1741: prow = nextprow;
1742: }
1744: /* if free space is not available, make more free space */
1745: if (current_space->local_remaining<nzk) {
1746: i = am - k + 1; /* num of unfactored rows */
1747: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1748: PetscFreeSpaceGet(i,¤t_space);
1749: reallocs++;
1750: }
1752: /* copy data into free space, then initialize lnk */
1753: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
1755: /* add the k-th row into il and jl */
1756: if (nzk-1 > 0){
1757: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1758: jl[k] = jl[i]; jl[i] = k;
1759: il[k] = ui[k] + 1;
1760: }
1761: ui_ptr[k] = current_space->array;
1762: current_space->array += nzk;
1763: current_space->local_used += nzk;
1764: current_space->local_remaining -= nzk;
1766: ui[k+1] = ui[k] + nzk;
1767: }
1769: #if defined(PETSC_USE_INFO)
1770: if (ai[am] != 0) {
1771: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1772: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1773: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1774: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1775: } else {
1776: PetscInfo(A,"Empty matrix.\n");
1777: }
1778: #endif
1780: ISRestoreIndices(perm,&rip);
1781: ISRestoreIndices(iperm,&riip);
1782: PetscFree(jl);
1784: /* destroy list of free space and other temporary array(s) */
1785: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1786: PetscFreeSpaceContiguous(&free_space,uj);
1787: PetscLLDestroy(lnk,lnkbt);
1789: /* put together the new matrix in MATSEQSBAIJ format */
1790: MatCreate(PETSC_COMM_SELF,fact);
1791: MatSetSizes(*fact,am,am,am,am);
1792: B = *fact;
1793: MatSetType(B,MATSEQSBAIJ);
1794: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1796: b = (Mat_SeqSBAIJ*)B->data;
1797: b->singlemalloc = PETSC_FALSE;
1798: b->free_a = PETSC_TRUE;
1799: b->free_ij = PETSC_TRUE;
1800: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1801: b->j = uj;
1802: b->i = ui;
1803: b->diag = 0;
1804: b->ilen = 0;
1805: b->imax = 0;
1806: b->row = perm;
1807: b->col = perm;
1808: PetscObjectReference((PetscObject)perm);
1809: PetscObjectReference((PetscObject)perm);
1810: b->icol = iperm;
1811: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1812: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1813: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1814: b->maxnz = b->nz = ui[am];
1815:
1816: B->factor = FACTOR_CHOLESKY;
1817: B->info.factor_mallocs = reallocs;
1818: B->info.fill_ratio_given = fill;
1819: if (ai[am] != 0) {
1820: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1821: } else {
1822: B->info.fill_ratio_needed = 0.0;
1823: }
1824: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1825: if (perm_identity){
1826: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1827: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1828: (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1829: (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1830: } else {
1831: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1;
1832: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1833: (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
1834: (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
1835: }
1836: return(0);
1837: }