Actual source code: aijfact.c
petsc-dev 2014-02-02
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
9: /*
10: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13: */
14: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15: {
16: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
17: PetscErrorCode ierr;
18: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19: const PetscInt *ai = a->i, *aj = a->j;
20: const PetscScalar *aa = a->a;
21: PetscBool *done;
22: PetscReal best,past = 0,future;
25: /* pick initial row */
26: best = -1;
27: for (i=0; i<n; i++) {
28: future = 0.0;
29: for (j=ai[i]; j<ai[i+1]; j++) {
30: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31: else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc1(n,&done);
41: PetscMemzero(done,n*sizeof(PetscBool));
42: PetscMalloc1(n,&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
96: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg)
97: {
99: *flg = PETSC_TRUE;
100: return(0);
101: }
105: PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
106: {
107: PetscInt n = A->rmap->n;
111: #if defined(PETSC_USE_COMPLEX)
112: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
113: #endif
114: MatCreate(PetscObjectComm((PetscObject)A),B);
115: MatSetSizes(*B,n,n,n,n);
116: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
117: MatSetType(*B,MATSEQAIJ);
119: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
122: MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);
123: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
124: MatSetType(*B,MATSEQSBAIJ);
125: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
127: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
128: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
129: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
130: (*B)->factortype = ftype;
131: return(0);
132: }
136: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
137: {
138: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
139: IS isicol;
140: PetscErrorCode ierr;
141: const PetscInt *r,*ic;
142: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
143: PetscInt *bi,*bj,*ajtmp;
144: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
145: PetscReal f;
146: PetscInt nlnk,*lnk,k,**bi_ptr;
147: PetscFreeSpaceList free_space=NULL,current_space=NULL;
148: PetscBT lnkbt;
151: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
152: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
153: ISGetIndices(isrow,&r);
154: ISGetIndices(isicol,&ic);
156: /* get new row pointers */
157: PetscMalloc1((n+1),&bi);
158: bi[0] = 0;
160: /* bdiag is location of diagonal in factor */
161: PetscMalloc1((n+1),&bdiag);
162: bdiag[0] = 0;
164: /* linked list for storing column indices of the active row */
165: nlnk = n + 1;
166: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
168: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
170: /* initial FreeSpace size is f*(ai[n]+1) */
171: f = info->fill;
172: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
173: current_space = free_space;
175: for (i=0; i<n; i++) {
176: /* copy previous fill into linked list */
177: nzi = 0;
178: nnz = ai[r[i]+1] - ai[r[i]];
179: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
180: ajtmp = aj + ai[r[i]];
181: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
182: nzi += nlnk;
184: /* add pivot rows into linked list */
185: row = lnk[n];
186: while (row < i) {
187: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
188: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
189: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
190: nzi += nlnk;
191: row = lnk[row];
192: }
193: bi[i+1] = bi[i] + nzi;
194: im[i] = nzi;
196: /* mark bdiag */
197: nzbd = 0;
198: nnz = nzi;
199: k = lnk[n];
200: while (nnz-- && k < i) {
201: nzbd++;
202: k = lnk[k];
203: }
204: bdiag[i] = bi[i] + nzbd;
206: /* if free space is not available, make more free space */
207: if (current_space->local_remaining<nzi) {
208: nnz = (n - i)*nzi; /* estimated and max additional space needed */
209: PetscFreeSpaceGet(nnz,¤t_space);
210: reallocs++;
211: }
213: /* copy data into free space, then initialize lnk */
214: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
216: bi_ptr[i] = current_space->array;
217: current_space->array += nzi;
218: current_space->local_used += nzi;
219: current_space->local_remaining -= nzi;
220: }
221: #if defined(PETSC_USE_INFO)
222: if (ai[n] != 0) {
223: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
224: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
225: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
226: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
227: PetscInfo(A,"for best performance.\n");
228: } else {
229: PetscInfo(A,"Empty matrix\n");
230: }
231: #endif
233: ISRestoreIndices(isrow,&r);
234: ISRestoreIndices(isicol,&ic);
236: /* destroy list of free space and other temporary array(s) */
237: PetscMalloc1((bi[n]+1),&bj);
238: PetscFreeSpaceContiguous(&free_space,bj);
239: PetscLLDestroy(lnk,lnkbt);
240: PetscFree2(bi_ptr,im);
242: /* put together the new matrix */
243: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
244: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
245: b = (Mat_SeqAIJ*)(B)->data;
247: b->free_a = PETSC_TRUE;
248: b->free_ij = PETSC_TRUE;
249: b->singlemalloc = PETSC_FALSE;
251: PetscMalloc1((bi[n]+1),&b->a);
252: b->j = bj;
253: b->i = bi;
254: b->diag = bdiag;
255: b->ilen = 0;
256: b->imax = 0;
257: b->row = isrow;
258: b->col = iscol;
259: PetscObjectReference((PetscObject)isrow);
260: PetscObjectReference((PetscObject)iscol);
261: b->icol = isicol;
262: PetscMalloc1((n+1),&b->solve_work);
264: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
265: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
266: b->maxnz = b->nz = bi[n];
268: (B)->factortype = MAT_FACTOR_LU;
269: (B)->info.factor_mallocs = reallocs;
270: (B)->info.fill_ratio_given = f;
272: if (ai[n]) {
273: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
274: } else {
275: (B)->info.fill_ratio_needed = 0.0;
276: }
277: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
278: if (a->inode.size) {
279: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
280: }
281: return(0);
282: }
286: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
287: {
288: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
289: IS isicol;
290: PetscErrorCode ierr;
291: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
292: PetscInt i,n=A->rmap->n;
293: PetscInt *bi,*bj;
294: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
295: PetscReal f;
296: PetscInt nlnk,*lnk,k,**bi_ptr;
297: PetscFreeSpaceList free_space=NULL,current_space=NULL;
298: PetscBT lnkbt;
301: /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
302: /*
303: PetscBool olddatastruct=PETSC_FALSE;
304: PetscOptionsGetBool(NULL,"-lu_old",&olddatastruct,NULL);
305: if (olddatastruct) {
306: MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);
307: return(0);
308: }
309: */
310: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
311: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
312: ISGetIndices(isrow,&r);
313: ISGetIndices(isicol,&ic);
315: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
316: PetscMalloc1((n+1),&bi);
317: PetscMalloc1((n+1),&bdiag);
318: bi[0] = bdiag[0] = 0;
320: /* linked list for storing column indices of the active row */
321: nlnk = n + 1;
322: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
324: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
326: /* initial FreeSpace size is f*(ai[n]+1) */
327: f = info->fill;
328: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
329: current_space = free_space;
331: for (i=0; i<n; i++) {
332: /* copy previous fill into linked list */
333: nzi = 0;
334: nnz = ai[r[i]+1] - ai[r[i]];
335: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
336: ajtmp = aj + ai[r[i]];
337: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
338: nzi += nlnk;
340: /* add pivot rows into linked list */
341: row = lnk[n];
342: while (row < i) {
343: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
344: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
345: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
346: nzi += nlnk;
347: row = lnk[row];
348: }
349: bi[i+1] = bi[i] + nzi;
350: im[i] = nzi;
352: /* mark bdiag */
353: nzbd = 0;
354: nnz = nzi;
355: k = lnk[n];
356: while (nnz-- && k < i) {
357: nzbd++;
358: k = lnk[k];
359: }
360: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
362: /* if free space is not available, make more free space */
363: if (current_space->local_remaining<nzi) {
364: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
365: PetscFreeSpaceGet(nnz,¤t_space);
366: reallocs++;
367: }
369: /* copy data into free space, then initialize lnk */
370: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
372: bi_ptr[i] = current_space->array;
373: current_space->array += nzi;
374: current_space->local_used += nzi;
375: current_space->local_remaining -= nzi;
376: }
378: ISRestoreIndices(isrow,&r);
379: ISRestoreIndices(isicol,&ic);
381: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
382: PetscMalloc1((bi[n]+1),&bj);
383: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
384: PetscLLDestroy(lnk,lnkbt);
385: PetscFree2(bi_ptr,im);
387: /* put together the new matrix */
388: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
389: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
390: b = (Mat_SeqAIJ*)(B)->data;
392: b->free_a = PETSC_TRUE;
393: b->free_ij = PETSC_TRUE;
394: b->singlemalloc = PETSC_FALSE;
396: PetscMalloc1((bdiag[0]+1),&b->a);
398: b->j = bj;
399: b->i = bi;
400: b->diag = bdiag;
401: b->ilen = 0;
402: b->imax = 0;
403: b->row = isrow;
404: b->col = iscol;
405: PetscObjectReference((PetscObject)isrow);
406: PetscObjectReference((PetscObject)iscol);
407: b->icol = isicol;
408: PetscMalloc1((n+1),&b->solve_work);
410: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
411: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
412: b->maxnz = b->nz = bdiag[0]+1;
414: B->factortype = MAT_FACTOR_LU;
415: B->info.factor_mallocs = reallocs;
416: B->info.fill_ratio_given = f;
418: if (ai[n]) {
419: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
420: } else {
421: B->info.fill_ratio_needed = 0.0;
422: }
423: #if defined(PETSC_USE_INFO)
424: if (ai[n] != 0) {
425: PetscReal af = B->info.fill_ratio_needed;
426: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
427: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
428: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
429: PetscInfo(A,"for best performance.\n");
430: } else {
431: PetscInfo(A,"Empty matrix\n");
432: }
433: #endif
434: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
435: if (a->inode.size) {
436: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
437: }
438: Mat_CheckInode_FactorLU(B);
439: return(0);
440: }
442: /*
443: Trouble in factorization, should we dump the original matrix?
444: */
447: PetscErrorCode MatFactorDumpMatrix(Mat A)
448: {
450: PetscBool flg = PETSC_FALSE;
453: PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);
454: if (flg) {
455: PetscViewer viewer;
456: char filename[PETSC_MAX_PATH_LEN];
458: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
459: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
460: MatView(A,viewer);
461: PetscViewerDestroy(&viewer);
462: }
463: return(0);
464: }
468: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
469: {
470: Mat C =B;
471: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
472: IS isrow = b->row,isicol = b->icol;
473: PetscErrorCode ierr;
474: const PetscInt *r,*ic,*ics;
475: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
476: PetscInt i,j,k,nz,nzL,row,*pj;
477: const PetscInt *ajtmp,*bjtmp;
478: MatScalar *rtmp,*pc,multiplier,*pv;
479: const MatScalar *aa=a->a,*v;
480: PetscBool row_identity,col_identity;
481: FactorShiftCtx sctx;
482: const PetscInt *ddiag;
483: PetscReal rs;
484: MatScalar d;
487: /* MatPivotSetUp(): initialize shift context sctx */
488: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
490: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
491: ddiag = a->diag;
492: sctx.shift_top = info->zeropivot;
493: for (i=0; i<n; i++) {
494: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
495: d = (aa)[ddiag[i]];
496: rs = -PetscAbsScalar(d) - PetscRealPart(d);
497: v = aa+ai[i];
498: nz = ai[i+1] - ai[i];
499: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
500: if (rs>sctx.shift_top) sctx.shift_top = rs;
501: }
502: sctx.shift_top *= 1.1;
503: sctx.nshift_max = 5;
504: sctx.shift_lo = 0.;
505: sctx.shift_hi = 1.;
506: }
508: ISGetIndices(isrow,&r);
509: ISGetIndices(isicol,&ic);
510: PetscMalloc1((n+1),&rtmp);
511: ics = ic;
513: do {
514: sctx.newshift = PETSC_FALSE;
515: for (i=0; i<n; i++) {
516: /* zero rtmp */
517: /* L part */
518: nz = bi[i+1] - bi[i];
519: bjtmp = bj + bi[i];
520: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
522: /* U part */
523: nz = bdiag[i]-bdiag[i+1];
524: bjtmp = bj + bdiag[i+1]+1;
525: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
527: /* load in initial (unfactored row) */
528: nz = ai[r[i]+1] - ai[r[i]];
529: ajtmp = aj + ai[r[i]];
530: v = aa + ai[r[i]];
531: for (j=0; j<nz; j++) {
532: rtmp[ics[ajtmp[j]]] = v[j];
533: }
534: /* ZeropivotApply() */
535: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
537: /* elimination */
538: bjtmp = bj + bi[i];
539: row = *bjtmp++;
540: nzL = bi[i+1] - bi[i];
541: for (k=0; k < nzL; k++) {
542: pc = rtmp + row;
543: if (*pc != 0.0) {
544: pv = b->a + bdiag[row];
545: multiplier = *pc * (*pv);
546: *pc = multiplier;
548: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
549: pv = b->a + bdiag[row+1]+1;
550: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
552: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
553: PetscLogFlops(1+2*nz);
554: }
555: row = *bjtmp++;
556: }
558: /* finished row so stick it into b->a */
559: rs = 0.0;
560: /* L part */
561: pv = b->a + bi[i];
562: pj = b->j + bi[i];
563: nz = bi[i+1] - bi[i];
564: for (j=0; j<nz; j++) {
565: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
566: }
568: /* U part */
569: pv = b->a + bdiag[i+1]+1;
570: pj = b->j + bdiag[i+1]+1;
571: nz = bdiag[i] - bdiag[i+1]-1;
572: for (j=0; j<nz; j++) {
573: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
574: }
576: sctx.rs = rs;
577: sctx.pv = rtmp[i];
578: MatPivotCheck(A,info,&sctx,i);
579: if (sctx.newshift) break; /* break for-loop */
580: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
582: /* Mark diagonal and invert diagonal for simplier triangular solves */
583: pv = b->a + bdiag[i];
584: *pv = 1.0/rtmp[i];
586: } /* endof for (i=0; i<n; i++) { */
588: /* MatPivotRefine() */
589: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
590: /*
591: * if no shift in this attempt & shifting & started shifting & can refine,
592: * then try lower shift
593: */
594: sctx.shift_hi = sctx.shift_fraction;
595: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
596: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
597: sctx.newshift = PETSC_TRUE;
598: sctx.nshift++;
599: }
600: } while (sctx.newshift);
602: PetscFree(rtmp);
603: ISRestoreIndices(isicol,&ic);
604: ISRestoreIndices(isrow,&r);
606: ISIdentity(isrow,&row_identity);
607: ISIdentity(isicol,&col_identity);
608: if (b->inode.size) {
609: C->ops->solve = MatSolve_SeqAIJ_Inode;
610: } else if (row_identity && col_identity) {
611: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
612: } else {
613: C->ops->solve = MatSolve_SeqAIJ;
614: }
615: C->ops->solveadd = MatSolveAdd_SeqAIJ;
616: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
617: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
618: C->ops->matsolve = MatMatSolve_SeqAIJ;
619: C->assembled = PETSC_TRUE;
620: C->preallocated = PETSC_TRUE;
622: PetscLogFlops(C->cmap->n);
624: /* MatShiftView(A,info,&sctx) */
625: if (sctx.nshift) {
626: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
627: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
628: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
629: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
630: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
631: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
632: }
633: }
634: return(0);
635: }
639: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
640: {
641: Mat C =B;
642: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
643: IS isrow = b->row,isicol = b->icol;
644: PetscErrorCode ierr;
645: const PetscInt *r,*ic,*ics;
646: PetscInt nz,row,i,j,n=A->rmap->n,diag;
647: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
648: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
649: MatScalar *pv,*rtmp,*pc,multiplier,d;
650: const MatScalar *v,*aa=a->a;
651: PetscReal rs=0.0;
652: FactorShiftCtx sctx;
653: const PetscInt *ddiag;
654: PetscBool row_identity, col_identity;
657: /* MatPivotSetUp(): initialize shift context sctx */
658: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
660: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
661: ddiag = a->diag;
662: sctx.shift_top = info->zeropivot;
663: for (i=0; i<n; i++) {
664: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
665: d = (aa)[ddiag[i]];
666: rs = -PetscAbsScalar(d) - PetscRealPart(d);
667: v = aa+ai[i];
668: nz = ai[i+1] - ai[i];
669: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
670: if (rs>sctx.shift_top) sctx.shift_top = rs;
671: }
672: sctx.shift_top *= 1.1;
673: sctx.nshift_max = 5;
674: sctx.shift_lo = 0.;
675: sctx.shift_hi = 1.;
676: }
678: ISGetIndices(isrow,&r);
679: ISGetIndices(isicol,&ic);
680: PetscMalloc1((n+1),&rtmp);
681: ics = ic;
683: do {
684: sctx.newshift = PETSC_FALSE;
685: for (i=0; i<n; i++) {
686: nz = bi[i+1] - bi[i];
687: bjtmp = bj + bi[i];
688: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
690: /* load in initial (unfactored row) */
691: nz = ai[r[i]+1] - ai[r[i]];
692: ajtmp = aj + ai[r[i]];
693: v = aa + ai[r[i]];
694: for (j=0; j<nz; j++) {
695: rtmp[ics[ajtmp[j]]] = v[j];
696: }
697: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
699: row = *bjtmp++;
700: while (row < i) {
701: pc = rtmp + row;
702: if (*pc != 0.0) {
703: pv = b->a + diag_offset[row];
704: pj = b->j + diag_offset[row] + 1;
705: multiplier = *pc / *pv++;
706: *pc = multiplier;
707: nz = bi[row+1] - diag_offset[row] - 1;
708: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
709: PetscLogFlops(1+2*nz);
710: }
711: row = *bjtmp++;
712: }
713: /* finished row so stick it into b->a */
714: pv = b->a + bi[i];
715: pj = b->j + bi[i];
716: nz = bi[i+1] - bi[i];
717: diag = diag_offset[i] - bi[i];
718: rs = 0.0;
719: for (j=0; j<nz; j++) {
720: pv[j] = rtmp[pj[j]];
721: rs += PetscAbsScalar(pv[j]);
722: }
723: rs -= PetscAbsScalar(pv[diag]);
725: sctx.rs = rs;
726: sctx.pv = pv[diag];
727: MatPivotCheck(A,info,&sctx,i);
728: if (sctx.newshift) break;
729: pv[diag] = sctx.pv;
730: }
732: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
733: /*
734: * if no shift in this attempt & shifting & started shifting & can refine,
735: * then try lower shift
736: */
737: sctx.shift_hi = sctx.shift_fraction;
738: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
739: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
740: sctx.newshift = PETSC_TRUE;
741: sctx.nshift++;
742: }
743: } while (sctx.newshift);
745: /* invert diagonal entries for simplier triangular solves */
746: for (i=0; i<n; i++) {
747: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
748: }
749: PetscFree(rtmp);
750: ISRestoreIndices(isicol,&ic);
751: ISRestoreIndices(isrow,&r);
753: ISIdentity(isrow,&row_identity);
754: ISIdentity(isicol,&col_identity);
755: if (row_identity && col_identity) {
756: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
757: } else {
758: C->ops->solve = MatSolve_SeqAIJ_inplace;
759: }
760: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
761: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
762: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
763: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
765: C->assembled = PETSC_TRUE;
766: C->preallocated = PETSC_TRUE;
768: PetscLogFlops(C->cmap->n);
769: if (sctx.nshift) {
770: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
771: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
772: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
773: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
774: }
775: }
776: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
777: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
779: Mat_CheckInode(C,PETSC_FALSE);
780: return(0);
781: }
783: /*
784: This routine implements inplace ILU(0) with row or/and column permutations.
785: Input:
786: A - original matrix
787: Output;
788: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
789: a->j (col index) is permuted by the inverse of colperm, then sorted
790: a->a reordered accordingly with a->j
791: a->diag (ptr to diagonal elements) is updated.
792: */
795: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
796: {
797: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
798: IS isrow = a->row,isicol = a->icol;
799: PetscErrorCode ierr;
800: const PetscInt *r,*ic,*ics;
801: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
802: PetscInt *ajtmp,nz,row;
803: PetscInt *diag = a->diag,nbdiag,*pj;
804: PetscScalar *rtmp,*pc,multiplier,d;
805: MatScalar *pv,*v;
806: PetscReal rs;
807: FactorShiftCtx sctx;
808: const MatScalar *aa=a->a,*vtmp;
811: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
813: /* MatPivotSetUp(): initialize shift context sctx */
814: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
816: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
817: const PetscInt *ddiag = a->diag;
818: sctx.shift_top = info->zeropivot;
819: for (i=0; i<n; i++) {
820: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
821: d = (aa)[ddiag[i]];
822: rs = -PetscAbsScalar(d) - PetscRealPart(d);
823: vtmp = aa+ai[i];
824: nz = ai[i+1] - ai[i];
825: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
826: if (rs>sctx.shift_top) sctx.shift_top = rs;
827: }
828: sctx.shift_top *= 1.1;
829: sctx.nshift_max = 5;
830: sctx.shift_lo = 0.;
831: sctx.shift_hi = 1.;
832: }
834: ISGetIndices(isrow,&r);
835: ISGetIndices(isicol,&ic);
836: PetscMalloc1((n+1),&rtmp);
837: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
838: ics = ic;
840: #if defined(MV)
841: sctx.shift_top = 0.;
842: sctx.nshift_max = 0;
843: sctx.shift_lo = 0.;
844: sctx.shift_hi = 0.;
845: sctx.shift_fraction = 0.;
847: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
848: sctx.shift_top = 0.;
849: for (i=0; i<n; i++) {
850: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
851: d = (a->a)[diag[i]];
852: rs = -PetscAbsScalar(d) - PetscRealPart(d);
853: v = a->a+ai[i];
854: nz = ai[i+1] - ai[i];
855: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
856: if (rs>sctx.shift_top) sctx.shift_top = rs;
857: }
858: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
859: sctx.shift_top *= 1.1;
860: sctx.nshift_max = 5;
861: sctx.shift_lo = 0.;
862: sctx.shift_hi = 1.;
863: }
865: sctx.shift_amount = 0.;
866: sctx.nshift = 0;
867: #endif
869: do {
870: sctx.newshift = PETSC_FALSE;
871: for (i=0; i<n; i++) {
872: /* load in initial unfactored row */
873: nz = ai[r[i]+1] - ai[r[i]];
874: ajtmp = aj + ai[r[i]];
875: v = a->a + ai[r[i]];
876: /* sort permuted ajtmp and values v accordingly */
877: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
878: PetscSortIntWithScalarArray(nz,ajtmp,v);
880: diag[r[i]] = ai[r[i]];
881: for (j=0; j<nz; j++) {
882: rtmp[ajtmp[j]] = v[j];
883: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
884: }
885: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
887: row = *ajtmp++;
888: while (row < i) {
889: pc = rtmp + row;
890: if (*pc != 0.0) {
891: pv = a->a + diag[r[row]];
892: pj = aj + diag[r[row]] + 1;
894: multiplier = *pc / *pv++;
895: *pc = multiplier;
896: nz = ai[r[row]+1] - diag[r[row]] - 1;
897: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
898: PetscLogFlops(1+2*nz);
899: }
900: row = *ajtmp++;
901: }
902: /* finished row so overwrite it onto a->a */
903: pv = a->a + ai[r[i]];
904: pj = aj + ai[r[i]];
905: nz = ai[r[i]+1] - ai[r[i]];
906: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
908: rs = 0.0;
909: for (j=0; j<nz; j++) {
910: pv[j] = rtmp[pj[j]];
911: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
912: }
914: sctx.rs = rs;
915: sctx.pv = pv[nbdiag];
916: MatPivotCheck(A,info,&sctx,i);
917: if (sctx.newshift) break;
918: pv[nbdiag] = sctx.pv;
919: }
921: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
922: /*
923: * if no shift in this attempt & shifting & started shifting & can refine,
924: * then try lower shift
925: */
926: sctx.shift_hi = sctx.shift_fraction;
927: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
928: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
929: sctx.newshift = PETSC_TRUE;
930: sctx.nshift++;
931: }
932: } while (sctx.newshift);
934: /* invert diagonal entries for simplier triangular solves */
935: for (i=0; i<n; i++) {
936: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
937: }
939: PetscFree(rtmp);
940: ISRestoreIndices(isicol,&ic);
941: ISRestoreIndices(isrow,&r);
943: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
944: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
945: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
946: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
948: A->assembled = PETSC_TRUE;
949: A->preallocated = PETSC_TRUE;
951: PetscLogFlops(A->cmap->n);
952: if (sctx.nshift) {
953: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
954: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
955: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
956: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
957: }
958: }
959: return(0);
960: }
962: /* ----------------------------------------------------------- */
965: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
966: {
968: Mat C;
971: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
972: MatLUFactorSymbolic(C,A,row,col,info);
973: MatLUFactorNumeric(C,A,info);
975: A->ops->solve = C->ops->solve;
976: A->ops->solvetranspose = C->ops->solvetranspose;
978: MatHeaderMerge(A,C);
979: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
980: return(0);
981: }
982: /* ----------------------------------------------------------- */
987: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
988: {
989: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
990: IS iscol = a->col,isrow = a->row;
991: PetscErrorCode ierr;
992: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
993: PetscInt nz;
994: const PetscInt *rout,*cout,*r,*c;
995: PetscScalar *x,*tmp,*tmps,sum;
996: const PetscScalar *b;
997: const MatScalar *aa = a->a,*v;
1000: if (!n) return(0);
1002: VecGetArrayRead(bb,&b);
1003: VecGetArray(xx,&x);
1004: tmp = a->solve_work;
1006: ISGetIndices(isrow,&rout); r = rout;
1007: ISGetIndices(iscol,&cout); c = cout + (n-1);
1009: /* forward solve the lower triangular */
1010: tmp[0] = b[*r++];
1011: tmps = tmp;
1012: for (i=1; i<n; i++) {
1013: v = aa + ai[i];
1014: vi = aj + ai[i];
1015: nz = a->diag[i] - ai[i];
1016: sum = b[*r++];
1017: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1018: tmp[i] = sum;
1019: }
1021: /* backward solve the upper triangular */
1022: for (i=n-1; i>=0; i--) {
1023: v = aa + a->diag[i] + 1;
1024: vi = aj + a->diag[i] + 1;
1025: nz = ai[i+1] - a->diag[i] - 1;
1026: sum = tmp[i];
1027: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1028: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1029: }
1031: ISRestoreIndices(isrow,&rout);
1032: ISRestoreIndices(iscol,&cout);
1033: VecRestoreArrayRead(bb,&b);
1034: VecRestoreArray(xx,&x);
1035: PetscLogFlops(2.0*a->nz - A->cmap->n);
1036: return(0);
1037: }
1041: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1042: {
1043: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1044: IS iscol = a->col,isrow = a->row;
1045: PetscErrorCode ierr;
1046: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1047: PetscInt nz,neq;
1048: const PetscInt *rout,*cout,*r,*c;
1049: PetscScalar *x,*b,*tmp,*tmps,sum;
1050: const MatScalar *aa = a->a,*v;
1051: PetscBool bisdense,xisdense;
1054: if (!n) return(0);
1056: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1057: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1058: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1059: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1061: MatDenseGetArray(B,&b);
1062: MatDenseGetArray(X,&x);
1064: tmp = a->solve_work;
1065: ISGetIndices(isrow,&rout); r = rout;
1066: ISGetIndices(iscol,&cout); c = cout;
1068: for (neq=0; neq<B->cmap->n; neq++) {
1069: /* forward solve the lower triangular */
1070: tmp[0] = b[r[0]];
1071: tmps = tmp;
1072: for (i=1; i<n; i++) {
1073: v = aa + ai[i];
1074: vi = aj + ai[i];
1075: nz = a->diag[i] - ai[i];
1076: sum = b[r[i]];
1077: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1078: tmp[i] = sum;
1079: }
1080: /* backward solve the upper triangular */
1081: for (i=n-1; i>=0; i--) {
1082: v = aa + a->diag[i] + 1;
1083: vi = aj + a->diag[i] + 1;
1084: nz = ai[i+1] - a->diag[i] - 1;
1085: sum = tmp[i];
1086: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1087: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1088: }
1090: b += n;
1091: x += n;
1092: }
1093: ISRestoreIndices(isrow,&rout);
1094: ISRestoreIndices(iscol,&cout);
1095: MatDenseRestoreArray(B,&b);
1096: MatDenseRestoreArray(X,&x);
1097: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1098: return(0);
1099: }
1103: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1104: {
1105: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1106: IS iscol = a->col,isrow = a->row;
1107: PetscErrorCode ierr;
1108: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1109: PetscInt nz,neq;
1110: const PetscInt *rout,*cout,*r,*c;
1111: PetscScalar *x,*b,*tmp,sum;
1112: const MatScalar *aa = a->a,*v;
1113: PetscBool bisdense,xisdense;
1116: if (!n) return(0);
1118: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1119: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1120: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1121: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1123: MatDenseGetArray(B,&b);
1124: MatDenseGetArray(X,&x);
1126: tmp = a->solve_work;
1127: ISGetIndices(isrow,&rout); r = rout;
1128: ISGetIndices(iscol,&cout); c = cout;
1130: for (neq=0; neq<B->cmap->n; neq++) {
1131: /* forward solve the lower triangular */
1132: tmp[0] = b[r[0]];
1133: v = aa;
1134: vi = aj;
1135: for (i=1; i<n; i++) {
1136: nz = ai[i+1] - ai[i];
1137: sum = b[r[i]];
1138: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1139: tmp[i] = sum;
1140: v += nz; vi += nz;
1141: }
1143: /* backward solve the upper triangular */
1144: for (i=n-1; i>=0; i--) {
1145: v = aa + adiag[i+1]+1;
1146: vi = aj + adiag[i+1]+1;
1147: nz = adiag[i]-adiag[i+1]-1;
1148: sum = tmp[i];
1149: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1150: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1151: }
1153: b += n;
1154: x += n;
1155: }
1156: ISRestoreIndices(isrow,&rout);
1157: ISRestoreIndices(iscol,&cout);
1158: MatDenseRestoreArray(B,&b);
1159: MatDenseRestoreArray(X,&x);
1160: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1161: return(0);
1162: }
1166: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1167: {
1168: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1169: IS iscol = a->col,isrow = a->row;
1170: PetscErrorCode ierr;
1171: const PetscInt *r,*c,*rout,*cout;
1172: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1173: PetscInt nz,row;
1174: PetscScalar *x,*b,*tmp,*tmps,sum;
1175: const MatScalar *aa = a->a,*v;
1178: if (!n) return(0);
1180: VecGetArray(bb,&b);
1181: VecGetArray(xx,&x);
1182: tmp = a->solve_work;
1184: ISGetIndices(isrow,&rout); r = rout;
1185: ISGetIndices(iscol,&cout); c = cout + (n-1);
1187: /* forward solve the lower triangular */
1188: tmp[0] = b[*r++];
1189: tmps = tmp;
1190: for (row=1; row<n; row++) {
1191: i = rout[row]; /* permuted row */
1192: v = aa + ai[i];
1193: vi = aj + ai[i];
1194: nz = a->diag[i] - ai[i];
1195: sum = b[*r++];
1196: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1197: tmp[row] = sum;
1198: }
1200: /* backward solve the upper triangular */
1201: for (row=n-1; row>=0; row--) {
1202: i = rout[row]; /* permuted row */
1203: v = aa + a->diag[i] + 1;
1204: vi = aj + a->diag[i] + 1;
1205: nz = ai[i+1] - a->diag[i] - 1;
1206: sum = tmp[row];
1207: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1208: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1209: }
1211: ISRestoreIndices(isrow,&rout);
1212: ISRestoreIndices(iscol,&cout);
1213: VecRestoreArray(bb,&b);
1214: VecRestoreArray(xx,&x);
1215: PetscLogFlops(2.0*a->nz - A->cmap->n);
1216: return(0);
1217: }
1219: /* ----------------------------------------------------------- */
1220: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1223: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1224: {
1225: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1226: PetscErrorCode ierr;
1227: PetscInt n = A->rmap->n;
1228: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1229: PetscScalar *x;
1230: const PetscScalar *b;
1231: const MatScalar *aa = a->a;
1232: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1233: PetscInt adiag_i,i,nz,ai_i;
1234: const PetscInt *vi;
1235: const MatScalar *v;
1236: PetscScalar sum;
1237: #endif
1240: if (!n) return(0);
1242: VecGetArrayRead(bb,&b);
1243: VecGetArray(xx,&x);
1245: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1246: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1247: #else
1248: /* forward solve the lower triangular */
1249: x[0] = b[0];
1250: for (i=1; i<n; i++) {
1251: ai_i = ai[i];
1252: v = aa + ai_i;
1253: vi = aj + ai_i;
1254: nz = adiag[i] - ai_i;
1255: sum = b[i];
1256: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1257: x[i] = sum;
1258: }
1260: /* backward solve the upper triangular */
1261: for (i=n-1; i>=0; i--) {
1262: adiag_i = adiag[i];
1263: v = aa + adiag_i + 1;
1264: vi = aj + adiag_i + 1;
1265: nz = ai[i+1] - adiag_i - 1;
1266: sum = x[i];
1267: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1268: x[i] = sum*aa[adiag_i];
1269: }
1270: #endif
1271: PetscLogFlops(2.0*a->nz - A->cmap->n);
1272: VecRestoreArrayRead(bb,&b);
1273: VecRestoreArray(xx,&x);
1274: return(0);
1275: }
1279: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1280: {
1281: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1282: IS iscol = a->col,isrow = a->row;
1283: PetscErrorCode ierr;
1284: PetscInt i, n = A->rmap->n,j;
1285: PetscInt nz;
1286: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1287: PetscScalar *x,*tmp,sum;
1288: const PetscScalar *b;
1289: const MatScalar *aa = a->a,*v;
1292: if (yy != xx) {VecCopy(yy,xx);}
1294: VecGetArrayRead(bb,&b);
1295: VecGetArray(xx,&x);
1296: tmp = a->solve_work;
1298: ISGetIndices(isrow,&rout); r = rout;
1299: ISGetIndices(iscol,&cout); c = cout + (n-1);
1301: /* forward solve the lower triangular */
1302: tmp[0] = b[*r++];
1303: for (i=1; i<n; i++) {
1304: v = aa + ai[i];
1305: vi = aj + ai[i];
1306: nz = a->diag[i] - ai[i];
1307: sum = b[*r++];
1308: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1309: tmp[i] = sum;
1310: }
1312: /* backward solve the upper triangular */
1313: for (i=n-1; i>=0; i--) {
1314: v = aa + a->diag[i] + 1;
1315: vi = aj + a->diag[i] + 1;
1316: nz = ai[i+1] - a->diag[i] - 1;
1317: sum = tmp[i];
1318: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1319: tmp[i] = sum*aa[a->diag[i]];
1320: x[*c--] += tmp[i];
1321: }
1323: ISRestoreIndices(isrow,&rout);
1324: ISRestoreIndices(iscol,&cout);
1325: VecRestoreArrayRead(bb,&b);
1326: VecRestoreArray(xx,&x);
1327: PetscLogFlops(2.0*a->nz);
1328: return(0);
1329: }
1333: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1334: {
1335: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1336: IS iscol = a->col,isrow = a->row;
1337: PetscErrorCode ierr;
1338: PetscInt i, n = A->rmap->n,j;
1339: PetscInt nz;
1340: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1341: PetscScalar *x,*tmp,sum;
1342: const PetscScalar *b;
1343: const MatScalar *aa = a->a,*v;
1346: if (yy != xx) {VecCopy(yy,xx);}
1348: VecGetArrayRead(bb,&b);
1349: VecGetArray(xx,&x);
1350: tmp = a->solve_work;
1352: ISGetIndices(isrow,&rout); r = rout;
1353: ISGetIndices(iscol,&cout); c = cout;
1355: /* forward solve the lower triangular */
1356: tmp[0] = b[r[0]];
1357: v = aa;
1358: vi = aj;
1359: for (i=1; i<n; i++) {
1360: nz = ai[i+1] - ai[i];
1361: sum = b[r[i]];
1362: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1363: tmp[i] = sum;
1364: v += nz;
1365: vi += nz;
1366: }
1368: /* backward solve the upper triangular */
1369: v = aa + adiag[n-1];
1370: vi = aj + adiag[n-1];
1371: for (i=n-1; i>=0; i--) {
1372: nz = adiag[i] - adiag[i+1] - 1;
1373: sum = tmp[i];
1374: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1375: tmp[i] = sum*v[nz];
1376: x[c[i]] += tmp[i];
1377: v += nz+1; vi += nz+1;
1378: }
1380: ISRestoreIndices(isrow,&rout);
1381: ISRestoreIndices(iscol,&cout);
1382: VecRestoreArrayRead(bb,&b);
1383: VecRestoreArray(xx,&x);
1384: PetscLogFlops(2.0*a->nz);
1385: return(0);
1386: }
1390: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1391: {
1392: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1393: IS iscol = a->col,isrow = a->row;
1394: PetscErrorCode ierr;
1395: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1396: PetscInt i,n = A->rmap->n,j;
1397: PetscInt nz;
1398: PetscScalar *x,*tmp,s1;
1399: const MatScalar *aa = a->a,*v;
1400: const PetscScalar *b;
1403: VecGetArrayRead(bb,&b);
1404: VecGetArray(xx,&x);
1405: tmp = a->solve_work;
1407: ISGetIndices(isrow,&rout); r = rout;
1408: ISGetIndices(iscol,&cout); c = cout;
1410: /* copy the b into temp work space according to permutation */
1411: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1413: /* forward solve the U^T */
1414: for (i=0; i<n; i++) {
1415: v = aa + diag[i];
1416: vi = aj + diag[i] + 1;
1417: nz = ai[i+1] - diag[i] - 1;
1418: s1 = tmp[i];
1419: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1420: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1421: tmp[i] = s1;
1422: }
1424: /* backward solve the L^T */
1425: for (i=n-1; i>=0; i--) {
1426: v = aa + diag[i] - 1;
1427: vi = aj + diag[i] - 1;
1428: nz = diag[i] - ai[i];
1429: s1 = tmp[i];
1430: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1431: }
1433: /* copy tmp into x according to permutation */
1434: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1436: ISRestoreIndices(isrow,&rout);
1437: ISRestoreIndices(iscol,&cout);
1438: VecRestoreArrayRead(bb,&b);
1439: VecRestoreArray(xx,&x);
1441: PetscLogFlops(2.0*a->nz-A->cmap->n);
1442: return(0);
1443: }
1447: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1448: {
1449: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1450: IS iscol = a->col,isrow = a->row;
1451: PetscErrorCode ierr;
1452: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1453: PetscInt i,n = A->rmap->n,j;
1454: PetscInt nz;
1455: PetscScalar *x,*tmp,s1;
1456: const MatScalar *aa = a->a,*v;
1457: const PetscScalar *b;
1460: VecGetArrayRead(bb,&b);
1461: VecGetArray(xx,&x);
1462: tmp = a->solve_work;
1464: ISGetIndices(isrow,&rout); r = rout;
1465: ISGetIndices(iscol,&cout); c = cout;
1467: /* copy the b into temp work space according to permutation */
1468: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1470: /* forward solve the U^T */
1471: for (i=0; i<n; i++) {
1472: v = aa + adiag[i+1] + 1;
1473: vi = aj + adiag[i+1] + 1;
1474: nz = adiag[i] - adiag[i+1] - 1;
1475: s1 = tmp[i];
1476: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1477: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1478: tmp[i] = s1;
1479: }
1481: /* backward solve the L^T */
1482: for (i=n-1; i>=0; i--) {
1483: v = aa + ai[i];
1484: vi = aj + ai[i];
1485: nz = ai[i+1] - ai[i];
1486: s1 = tmp[i];
1487: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1488: }
1490: /* copy tmp into x according to permutation */
1491: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1493: ISRestoreIndices(isrow,&rout);
1494: ISRestoreIndices(iscol,&cout);
1495: VecRestoreArrayRead(bb,&b);
1496: VecRestoreArray(xx,&x);
1498: PetscLogFlops(2.0*a->nz-A->cmap->n);
1499: return(0);
1500: }
1504: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1505: {
1506: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1507: IS iscol = a->col,isrow = a->row;
1508: PetscErrorCode ierr;
1509: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1510: PetscInt i,n = A->rmap->n,j;
1511: PetscInt nz;
1512: PetscScalar *x,*tmp,s1;
1513: const MatScalar *aa = a->a,*v;
1514: const PetscScalar *b;
1517: if (zz != xx) {VecCopy(zz,xx);}
1518: VecGetArrayRead(bb,&b);
1519: VecGetArray(xx,&x);
1520: tmp = a->solve_work;
1522: ISGetIndices(isrow,&rout); r = rout;
1523: ISGetIndices(iscol,&cout); c = cout;
1525: /* copy the b into temp work space according to permutation */
1526: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1528: /* forward solve the U^T */
1529: for (i=0; i<n; i++) {
1530: v = aa + diag[i];
1531: vi = aj + diag[i] + 1;
1532: nz = ai[i+1] - diag[i] - 1;
1533: s1 = tmp[i];
1534: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1535: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1536: tmp[i] = s1;
1537: }
1539: /* backward solve the L^T */
1540: for (i=n-1; i>=0; i--) {
1541: v = aa + diag[i] - 1;
1542: vi = aj + diag[i] - 1;
1543: nz = diag[i] - ai[i];
1544: s1 = tmp[i];
1545: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1546: }
1548: /* copy tmp into x according to permutation */
1549: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1551: ISRestoreIndices(isrow,&rout);
1552: ISRestoreIndices(iscol,&cout);
1553: VecRestoreArrayRead(bb,&b);
1554: VecRestoreArray(xx,&x);
1556: PetscLogFlops(2.0*a->nz-A->cmap->n);
1557: return(0);
1558: }
1562: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1563: {
1564: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1565: IS iscol = a->col,isrow = a->row;
1566: PetscErrorCode ierr;
1567: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1568: PetscInt i,n = A->rmap->n,j;
1569: PetscInt nz;
1570: PetscScalar *x,*tmp,s1;
1571: const MatScalar *aa = a->a,*v;
1572: const PetscScalar *b;
1575: if (zz != xx) {VecCopy(zz,xx);}
1576: VecGetArrayRead(bb,&b);
1577: VecGetArray(xx,&x);
1578: tmp = a->solve_work;
1580: ISGetIndices(isrow,&rout); r = rout;
1581: ISGetIndices(iscol,&cout); c = cout;
1583: /* copy the b into temp work space according to permutation */
1584: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1586: /* forward solve the U^T */
1587: for (i=0; i<n; i++) {
1588: v = aa + adiag[i+1] + 1;
1589: vi = aj + adiag[i+1] + 1;
1590: nz = adiag[i] - adiag[i+1] - 1;
1591: s1 = tmp[i];
1592: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1593: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1594: tmp[i] = s1;
1595: }
1598: /* backward solve the L^T */
1599: for (i=n-1; i>=0; i--) {
1600: v = aa + ai[i];
1601: vi = aj + ai[i];
1602: nz = ai[i+1] - ai[i];
1603: s1 = tmp[i];
1604: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1605: }
1607: /* copy tmp into x according to permutation */
1608: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1610: ISRestoreIndices(isrow,&rout);
1611: ISRestoreIndices(iscol,&cout);
1612: VecRestoreArrayRead(bb,&b);
1613: VecRestoreArray(xx,&x);
1615: PetscLogFlops(2.0*a->nz-A->cmap->n);
1616: return(0);
1617: }
1619: /* ----------------------------------------------------------------*/
1621: extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1623: /*
1624: ilu() under revised new data structure.
1625: Factored arrays bj and ba are stored as
1626: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1628: bi=fact->i is an array of size n+1, in which
1629: bi+
1630: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1631: bi[n]: points to L(n-1,n-1)+1
1633: bdiag=fact->diag is an array of size n+1,in which
1634: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1635: bdiag[n]: points to entry of U(n-1,0)-1
1637: U(i,:) contains bdiag[i] as its last entry, i.e.,
1638: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1639: */
1642: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1643: {
1645: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1647: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1648: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1649: PetscBool missing;
1650: IS isicol;
1653: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1654: MatMissingDiagonal(A,&missing,&i);
1655: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1656: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1657: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1658: b = (Mat_SeqAIJ*)(fact)->data;
1660: /* allocate matrix arrays for new data structure */
1661: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1662: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1664: b->singlemalloc = PETSC_TRUE;
1665: if (!b->diag) {
1666: PetscMalloc1((n+1),&b->diag);
1667: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1668: }
1669: bdiag = b->diag;
1671: if (n > 0) {
1672: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1673: }
1675: /* set bi and bj with new data structure */
1676: bi = b->i;
1677: bj = b->j;
1679: /* L part */
1680: bi[0] = 0;
1681: for (i=0; i<n; i++) {
1682: nz = adiag[i] - ai[i];
1683: bi[i+1] = bi[i] + nz;
1684: aj = a->j + ai[i];
1685: for (j=0; j<nz; j++) {
1686: /* *bj = aj[j]; bj++; */
1687: bj[k++] = aj[j];
1688: }
1689: }
1691: /* U part */
1692: bdiag[n] = bi[n]-1;
1693: for (i=n-1; i>=0; i--) {
1694: nz = ai[i+1] - adiag[i] - 1;
1695: aj = a->j + adiag[i] + 1;
1696: for (j=0; j<nz; j++) {
1697: /* *bj = aj[j]; bj++; */
1698: bj[k++] = aj[j];
1699: }
1700: /* diag[i] */
1701: /* *bj = i; bj++; */
1702: bj[k++] = i;
1703: bdiag[i] = bdiag[i+1] + nz + 1;
1704: }
1706: fact->factortype = MAT_FACTOR_ILU;
1707: fact->info.factor_mallocs = 0;
1708: fact->info.fill_ratio_given = info->fill;
1709: fact->info.fill_ratio_needed = 1.0;
1710: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1711: Mat_CheckInode_FactorLU(fact);
1713: b = (Mat_SeqAIJ*)(fact)->data;
1714: b->row = isrow;
1715: b->col = iscol;
1716: b->icol = isicol;
1717: PetscMalloc1((fact->rmap->n+1),&b->solve_work);
1718: PetscObjectReference((PetscObject)isrow);
1719: PetscObjectReference((PetscObject)iscol);
1720: return(0);
1721: }
1725: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1726: {
1727: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1728: IS isicol;
1729: PetscErrorCode ierr;
1730: const PetscInt *r,*ic;
1731: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1732: PetscInt *bi,*cols,nnz,*cols_lvl;
1733: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1734: PetscInt i,levels,diagonal_fill;
1735: PetscBool col_identity,row_identity;
1736: PetscReal f;
1737: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1738: PetscBT lnkbt;
1739: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1740: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1741: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1744: /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1745: /*
1746: PetscBool olddatastruct=PETSC_FALSE;
1747: PetscOptionsGetBool(NULL,"-ilu_old",&olddatastruct,NULL);
1748: if (olddatastruct) {
1749: MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);
1750: return(0);
1751: }
1752: */
1754: levels = (PetscInt)info->levels;
1755: ISIdentity(isrow,&row_identity);
1756: ISIdentity(iscol,&col_identity);
1757: if (!levels && row_identity && col_identity) {
1758: /* special case: ilu(0) with natural ordering */
1759: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1760: if (a->inode.size) {
1761: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1762: }
1763: return(0);
1764: }
1766: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1767: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1768: ISGetIndices(isrow,&r);
1769: ISGetIndices(isicol,&ic);
1771: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1772: PetscMalloc1((n+1),&bi);
1773: PetscMalloc1((n+1),&bdiag);
1774: bi[0] = bdiag[0] = 0;
1775: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1777: /* create a linked list for storing column indices of the active row */
1778: nlnk = n + 1;
1779: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1781: /* initial FreeSpace size is f*(ai[n]+1) */
1782: f = info->fill;
1783: diagonal_fill = (PetscInt)info->diagonal_fill;
1784: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1785: current_space = free_space;
1786: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1787: current_space_lvl = free_space_lvl;
1788: for (i=0; i<n; i++) {
1789: nzi = 0;
1790: /* copy current row into linked list */
1791: nnz = ai[r[i]+1] - ai[r[i]];
1792: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1793: cols = aj + ai[r[i]];
1794: lnk[i] = -1; /* marker to indicate if diagonal exists */
1795: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1796: nzi += nlnk;
1798: /* make sure diagonal entry is included */
1799: if (diagonal_fill && lnk[i] == -1) {
1800: fm = n;
1801: while (lnk[fm] < i) fm = lnk[fm];
1802: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1803: lnk[fm] = i;
1804: lnk_lvl[i] = 0;
1805: nzi++; dcount++;
1806: }
1808: /* add pivot rows into the active row */
1809: nzbd = 0;
1810: prow = lnk[n];
1811: while (prow < i) {
1812: nnz = bdiag[prow];
1813: cols = bj_ptr[prow] + nnz + 1;
1814: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1815: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1816: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1817: nzi += nlnk;
1818: prow = lnk[prow];
1819: nzbd++;
1820: }
1821: bdiag[i] = nzbd;
1822: bi[i+1] = bi[i] + nzi;
1823: /* if free space is not available, make more free space */
1824: if (current_space->local_remaining<nzi) {
1825: nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1826: PetscFreeSpaceGet(nnz,¤t_space);
1827: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1828: reallocs++;
1829: }
1831: /* copy data into free_space and free_space_lvl, then initialize lnk */
1832: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1833: bj_ptr[i] = current_space->array;
1834: bjlvl_ptr[i] = current_space_lvl->array;
1836: /* make sure the active row i has diagonal entry */
1837: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1839: current_space->array += nzi;
1840: current_space->local_used += nzi;
1841: current_space->local_remaining -= nzi;
1842: current_space_lvl->array += nzi;
1843: current_space_lvl->local_used += nzi;
1844: current_space_lvl->local_remaining -= nzi;
1845: }
1847: ISRestoreIndices(isrow,&r);
1848: ISRestoreIndices(isicol,&ic);
1849: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1850: PetscMalloc1((bi[n]+1),&bj);
1851: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1853: PetscIncompleteLLDestroy(lnk,lnkbt);
1854: PetscFreeSpaceDestroy(free_space_lvl);
1855: PetscFree2(bj_ptr,bjlvl_ptr);
1857: #if defined(PETSC_USE_INFO)
1858: {
1859: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1860: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1861: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1862: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1863: PetscInfo(A,"for best performance.\n");
1864: if (diagonal_fill) {
1865: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1866: }
1867: }
1868: #endif
1869: /* put together the new matrix */
1870: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1871: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1872: b = (Mat_SeqAIJ*)(fact)->data;
1874: b->free_a = PETSC_TRUE;
1875: b->free_ij = PETSC_TRUE;
1876: b->singlemalloc = PETSC_FALSE;
1878: PetscMalloc1((bdiag[0]+1),&b->a);
1880: b->j = bj;
1881: b->i = bi;
1882: b->diag = bdiag;
1883: b->ilen = 0;
1884: b->imax = 0;
1885: b->row = isrow;
1886: b->col = iscol;
1887: PetscObjectReference((PetscObject)isrow);
1888: PetscObjectReference((PetscObject)iscol);
1889: b->icol = isicol;
1891: PetscMalloc1((n+1),&b->solve_work);
1892: /* In b structure: Free imax, ilen, old a, old j.
1893: Allocate bdiag, solve_work, new a, new j */
1894: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1895: b->maxnz = b->nz = bdiag[0]+1;
1897: (fact)->info.factor_mallocs = reallocs;
1898: (fact)->info.fill_ratio_given = f;
1899: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1900: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1901: if (a->inode.size) {
1902: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1903: }
1904: Mat_CheckInode_FactorLU(fact);
1905: return(0);
1906: }
1910: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1911: {
1912: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1913: IS isicol;
1914: PetscErrorCode ierr;
1915: const PetscInt *r,*ic;
1916: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1917: PetscInt *bi,*cols,nnz,*cols_lvl;
1918: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1919: PetscInt i,levels,diagonal_fill;
1920: PetscBool col_identity,row_identity;
1921: PetscReal f;
1922: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1923: PetscBT lnkbt;
1924: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1925: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1926: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1927: PetscBool missing;
1930: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1931: f = info->fill;
1932: levels = (PetscInt)info->levels;
1933: diagonal_fill = (PetscInt)info->diagonal_fill;
1935: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1937: ISIdentity(isrow,&row_identity);
1938: ISIdentity(iscol,&col_identity);
1939: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1940: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1942: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1943: if (a->inode.size) {
1944: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1945: }
1946: fact->factortype = MAT_FACTOR_ILU;
1947: (fact)->info.factor_mallocs = 0;
1948: (fact)->info.fill_ratio_given = info->fill;
1949: (fact)->info.fill_ratio_needed = 1.0;
1951: b = (Mat_SeqAIJ*)(fact)->data;
1952: MatMissingDiagonal(A,&missing,&d);
1953: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1954: b->row = isrow;
1955: b->col = iscol;
1956: b->icol = isicol;
1957: PetscMalloc1(((fact)->rmap->n+1),&b->solve_work);
1958: PetscObjectReference((PetscObject)isrow);
1959: PetscObjectReference((PetscObject)iscol);
1960: return(0);
1961: }
1963: ISGetIndices(isrow,&r);
1964: ISGetIndices(isicol,&ic);
1966: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1967: PetscMalloc1((n+1),&bi);
1968: PetscMalloc1((n+1),&bdiag);
1969: bi[0] = bdiag[0] = 0;
1971: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1973: /* create a linked list for storing column indices of the active row */
1974: nlnk = n + 1;
1975: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1977: /* initial FreeSpace size is f*(ai[n]+1) */
1978: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1979: current_space = free_space;
1980: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1981: current_space_lvl = free_space_lvl;
1983: for (i=0; i<n; i++) {
1984: nzi = 0;
1985: /* copy current row into linked list */
1986: nnz = ai[r[i]+1] - ai[r[i]];
1987: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1988: cols = aj + ai[r[i]];
1989: lnk[i] = -1; /* marker to indicate if diagonal exists */
1990: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1991: nzi += nlnk;
1993: /* make sure diagonal entry is included */
1994: if (diagonal_fill && lnk[i] == -1) {
1995: fm = n;
1996: while (lnk[fm] < i) fm = lnk[fm];
1997: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1998: lnk[fm] = i;
1999: lnk_lvl[i] = 0;
2000: nzi++; dcount++;
2001: }
2003: /* add pivot rows into the active row */
2004: nzbd = 0;
2005: prow = lnk[n];
2006: while (prow < i) {
2007: nnz = bdiag[prow];
2008: cols = bj_ptr[prow] + nnz + 1;
2009: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
2010: nnz = bi[prow+1] - bi[prow] - nnz - 1;
2011: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
2012: nzi += nlnk;
2013: prow = lnk[prow];
2014: nzbd++;
2015: }
2016: bdiag[i] = nzbd;
2017: bi[i+1] = bi[i] + nzi;
2019: /* if free space is not available, make more free space */
2020: if (current_space->local_remaining<nzi) {
2021: nnz = nzi*(n - i); /* estimated and max additional space needed */
2022: PetscFreeSpaceGet(nnz,¤t_space);
2023: PetscFreeSpaceGet(nnz,¤t_space_lvl);
2024: reallocs++;
2025: }
2027: /* copy data into free_space and free_space_lvl, then initialize lnk */
2028: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2029: bj_ptr[i] = current_space->array;
2030: bjlvl_ptr[i] = current_space_lvl->array;
2032: /* make sure the active row i has diagonal entry */
2033: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2035: current_space->array += nzi;
2036: current_space->local_used += nzi;
2037: current_space->local_remaining -= nzi;
2038: current_space_lvl->array += nzi;
2039: current_space_lvl->local_used += nzi;
2040: current_space_lvl->local_remaining -= nzi;
2041: }
2043: ISRestoreIndices(isrow,&r);
2044: ISRestoreIndices(isicol,&ic);
2046: /* destroy list of free space and other temporary arrays */
2047: PetscMalloc1((bi[n]+1),&bj);
2048: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2049: PetscIncompleteLLDestroy(lnk,lnkbt);
2050: PetscFreeSpaceDestroy(free_space_lvl);
2051: PetscFree2(bj_ptr,bjlvl_ptr);
2053: #if defined(PETSC_USE_INFO)
2054: {
2055: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2056: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
2057: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
2058: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
2059: PetscInfo(A,"for best performance.\n");
2060: if (diagonal_fill) {
2061: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
2062: }
2063: }
2064: #endif
2066: /* put together the new matrix */
2067: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2068: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2069: b = (Mat_SeqAIJ*)(fact)->data;
2071: b->free_a = PETSC_TRUE;
2072: b->free_ij = PETSC_TRUE;
2073: b->singlemalloc = PETSC_FALSE;
2075: PetscMalloc1(bi[n],&b->a);
2076: b->j = bj;
2077: b->i = bi;
2078: for (i=0; i<n; i++) bdiag[i] += bi[i];
2079: b->diag = bdiag;
2080: b->ilen = 0;
2081: b->imax = 0;
2082: b->row = isrow;
2083: b->col = iscol;
2084: PetscObjectReference((PetscObject)isrow);
2085: PetscObjectReference((PetscObject)iscol);
2086: b->icol = isicol;
2087: PetscMalloc1((n+1),&b->solve_work);
2088: /* In b structure: Free imax, ilen, old a, old j.
2089: Allocate bdiag, solve_work, new a, new j */
2090: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2091: b->maxnz = b->nz = bi[n];
2093: (fact)->info.factor_mallocs = reallocs;
2094: (fact)->info.fill_ratio_given = f;
2095: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2096: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2097: if (a->inode.size) {
2098: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2099: }
2100: return(0);
2101: }
2105: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2106: {
2107: Mat C = B;
2108: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2109: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2110: IS ip=b->row,iip = b->icol;
2112: const PetscInt *rip,*riip;
2113: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2114: PetscInt *ai=a->i,*aj=a->j;
2115: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2116: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2117: PetscBool perm_identity;
2118: FactorShiftCtx sctx;
2119: PetscReal rs;
2120: MatScalar d,*v;
2123: /* MatPivotSetUp(): initialize shift context sctx */
2124: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2126: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2127: sctx.shift_top = info->zeropivot;
2128: for (i=0; i<mbs; i++) {
2129: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2130: d = (aa)[a->diag[i]];
2131: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2132: v = aa+ai[i];
2133: nz = ai[i+1] - ai[i];
2134: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2135: if (rs>sctx.shift_top) sctx.shift_top = rs;
2136: }
2137: sctx.shift_top *= 1.1;
2138: sctx.nshift_max = 5;
2139: sctx.shift_lo = 0.;
2140: sctx.shift_hi = 1.;
2141: }
2143: ISGetIndices(ip,&rip);
2144: ISGetIndices(iip,&riip);
2146: /* allocate working arrays
2147: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2148: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2149: */
2150: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2152: do {
2153: sctx.newshift = PETSC_FALSE;
2155: for (i=0; i<mbs; i++) c2r[i] = mbs;
2156: if (mbs) il[0] = 0;
2158: for (k = 0; k<mbs; k++) {
2159: /* zero rtmp */
2160: nz = bi[k+1] - bi[k];
2161: bjtmp = bj + bi[k];
2162: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2164: /* load in initial unfactored row */
2165: bval = ba + bi[k];
2166: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2167: for (j = jmin; j < jmax; j++) {
2168: col = riip[aj[j]];
2169: if (col >= k) { /* only take upper triangular entry */
2170: rtmp[col] = aa[j];
2171: *bval++ = 0.0; /* for in-place factorization */
2172: }
2173: }
2174: /* shift the diagonal of the matrix: ZeropivotApply() */
2175: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2177: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2178: dk = rtmp[k];
2179: i = c2r[k]; /* first row to be added to k_th row */
2181: while (i < k) {
2182: nexti = c2r[i]; /* next row to be added to k_th row */
2184: /* compute multiplier, update diag(k) and U(i,k) */
2185: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2186: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2187: dk += uikdi*ba[ili]; /* update diag[k] */
2188: ba[ili] = uikdi; /* -U(i,k) */
2190: /* add multiple of row i to k-th row */
2191: jmin = ili + 1; jmax = bi[i+1];
2192: if (jmin < jmax) {
2193: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2194: /* update il and c2r for row i */
2195: il[i] = jmin;
2196: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2197: }
2198: i = nexti;
2199: }
2201: /* copy data into U(k,:) */
2202: rs = 0.0;
2203: jmin = bi[k]; jmax = bi[k+1]-1;
2204: if (jmin < jmax) {
2205: for (j=jmin; j<jmax; j++) {
2206: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2207: }
2208: /* add the k-th row into il and c2r */
2209: il[k] = jmin;
2210: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2211: }
2213: /* MatPivotCheck() */
2214: sctx.rs = rs;
2215: sctx.pv = dk;
2216: MatPivotCheck(A,info,&sctx,i);
2217: if (sctx.newshift) break;
2218: dk = sctx.pv;
2220: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2221: }
2222: } while (sctx.newshift);
2224: PetscFree3(rtmp,il,c2r);
2225: ISRestoreIndices(ip,&rip);
2226: ISRestoreIndices(iip,&riip);
2228: ISIdentity(ip,&perm_identity);
2229: if (perm_identity) {
2230: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2231: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2232: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2233: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2234: } else {
2235: B->ops->solve = MatSolve_SeqSBAIJ_1;
2236: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2237: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2238: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2239: }
2241: C->assembled = PETSC_TRUE;
2242: C->preallocated = PETSC_TRUE;
2244: PetscLogFlops(C->rmap->n);
2246: /* MatPivotView() */
2247: if (sctx.nshift) {
2248: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2249: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2250: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2251: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2252: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2253: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2254: }
2255: }
2256: return(0);
2257: }
2261: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2262: {
2263: Mat C = B;
2264: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2265: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2266: IS ip=b->row,iip = b->icol;
2268: const PetscInt *rip,*riip;
2269: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2270: PetscInt *ai=a->i,*aj=a->j;
2271: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2272: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2273: PetscBool perm_identity;
2274: FactorShiftCtx sctx;
2275: PetscReal rs;
2276: MatScalar d,*v;
2279: /* MatPivotSetUp(): initialize shift context sctx */
2280: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2282: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2283: sctx.shift_top = info->zeropivot;
2284: for (i=0; i<mbs; i++) {
2285: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2286: d = (aa)[a->diag[i]];
2287: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2288: v = aa+ai[i];
2289: nz = ai[i+1] - ai[i];
2290: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2291: if (rs>sctx.shift_top) sctx.shift_top = rs;
2292: }
2293: sctx.shift_top *= 1.1;
2294: sctx.nshift_max = 5;
2295: sctx.shift_lo = 0.;
2296: sctx.shift_hi = 1.;
2297: }
2299: ISGetIndices(ip,&rip);
2300: ISGetIndices(iip,&riip);
2302: /* initialization */
2303: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2305: do {
2306: sctx.newshift = PETSC_FALSE;
2308: for (i=0; i<mbs; i++) jl[i] = mbs;
2309: il[0] = 0;
2311: for (k = 0; k<mbs; k++) {
2312: /* zero rtmp */
2313: nz = bi[k+1] - bi[k];
2314: bjtmp = bj + bi[k];
2315: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2317: bval = ba + bi[k];
2318: /* initialize k-th row by the perm[k]-th row of A */
2319: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2320: for (j = jmin; j < jmax; j++) {
2321: col = riip[aj[j]];
2322: if (col >= k) { /* only take upper triangular entry */
2323: rtmp[col] = aa[j];
2324: *bval++ = 0.0; /* for in-place factorization */
2325: }
2326: }
2327: /* shift the diagonal of the matrix */
2328: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2330: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2331: dk = rtmp[k];
2332: i = jl[k]; /* first row to be added to k_th row */
2334: while (i < k) {
2335: nexti = jl[i]; /* next row to be added to k_th row */
2337: /* compute multiplier, update diag(k) and U(i,k) */
2338: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2339: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2340: dk += uikdi*ba[ili];
2341: ba[ili] = uikdi; /* -U(i,k) */
2343: /* add multiple of row i to k-th row */
2344: jmin = ili + 1; jmax = bi[i+1];
2345: if (jmin < jmax) {
2346: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2347: /* update il and jl for row i */
2348: il[i] = jmin;
2349: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2350: }
2351: i = nexti;
2352: }
2354: /* shift the diagonals when zero pivot is detected */
2355: /* compute rs=sum of abs(off-diagonal) */
2356: rs = 0.0;
2357: jmin = bi[k]+1;
2358: nz = bi[k+1] - jmin;
2359: bcol = bj + jmin;
2360: for (j=0; j<nz; j++) {
2361: rs += PetscAbsScalar(rtmp[bcol[j]]);
2362: }
2364: sctx.rs = rs;
2365: sctx.pv = dk;
2366: MatPivotCheck(A,info,&sctx,k);
2367: if (sctx.newshift) break;
2368: dk = sctx.pv;
2370: /* copy data into U(k,:) */
2371: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2372: jmin = bi[k]+1; jmax = bi[k+1];
2373: if (jmin < jmax) {
2374: for (j=jmin; j<jmax; j++) {
2375: col = bj[j]; ba[j] = rtmp[col];
2376: }
2377: /* add the k-th row into il and jl */
2378: il[k] = jmin;
2379: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2380: }
2381: }
2382: } while (sctx.newshift);
2384: PetscFree3(rtmp,il,jl);
2385: ISRestoreIndices(ip,&rip);
2386: ISRestoreIndices(iip,&riip);
2388: ISIdentity(ip,&perm_identity);
2389: if (perm_identity) {
2390: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2391: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2392: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2393: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2394: } else {
2395: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2396: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2397: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2398: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2399: }
2401: C->assembled = PETSC_TRUE;
2402: C->preallocated = PETSC_TRUE;
2404: PetscLogFlops(C->rmap->n);
2405: if (sctx.nshift) {
2406: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2407: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2408: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2409: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2410: }
2411: }
2412: return(0);
2413: }
2415: /*
2416: icc() under revised new data structure.
2417: Factored arrays bj and ba are stored as
2418: U(0,:),...,U(i,:),U(n-1,:)
2420: ui=fact->i is an array of size n+1, in which
2421: ui+
2422: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2423: ui[n]: points to U(n-1,n-1)+1
2425: udiag=fact->diag is an array of size n,in which
2426: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2428: U(i,:) contains udiag[i] as its last entry, i.e.,
2429: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2430: */
2434: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2435: {
2436: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2437: Mat_SeqSBAIJ *b;
2438: PetscErrorCode ierr;
2439: PetscBool perm_identity,missing;
2440: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2441: const PetscInt *rip,*riip;
2442: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2443: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2444: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2445: PetscReal fill =info->fill,levels=info->levels;
2446: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2447: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2448: PetscBT lnkbt;
2449: IS iperm;
2452: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2453: MatMissingDiagonal(A,&missing,&d);
2454: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2455: ISIdentity(perm,&perm_identity);
2456: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2458: PetscMalloc1((am+1),&ui);
2459: PetscMalloc1((am+1),&udiag);
2460: ui[0] = 0;
2462: /* ICC(0) without matrix ordering: simply rearrange column indices */
2463: if (!levels && perm_identity) {
2464: for (i=0; i<am; i++) {
2465: ncols = ai[i+1] - a->diag[i];
2466: ui[i+1] = ui[i] + ncols;
2467: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2468: }
2469: PetscMalloc1((ui[am]+1),&uj);
2470: cols = uj;
2471: for (i=0; i<am; i++) {
2472: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2473: ncols = ai[i+1] - a->diag[i] -1;
2474: for (j=0; j<ncols; j++) *cols++ = aj[j];
2475: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2476: }
2477: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2478: ISGetIndices(iperm,&riip);
2479: ISGetIndices(perm,&rip);
2481: /* initialization */
2482: PetscMalloc1((am+1),&ajtmp);
2484: /* jl: linked list for storing indices of the pivot rows
2485: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2486: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2487: for (i=0; i<am; i++) {
2488: jl[i] = am; il[i] = 0;
2489: }
2491: /* create and initialize a linked list for storing column indices of the active row k */
2492: nlnk = am + 1;
2493: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2495: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2496: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2497: current_space = free_space;
2498: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);
2499: current_space_lvl = free_space_lvl;
2501: for (k=0; k<am; k++) { /* for each active row k */
2502: /* initialize lnk by the column indices of row rip[k] of A */
2503: nzk = 0;
2504: ncols = ai[rip[k]+1] - ai[rip[k]];
2505: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2506: ncols_upper = 0;
2507: for (j=0; j<ncols; j++) {
2508: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2509: if (riip[i] >= k) { /* only take upper triangular entry */
2510: ajtmp[ncols_upper] = i;
2511: ncols_upper++;
2512: }
2513: }
2514: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2515: nzk += nlnk;
2517: /* update lnk by computing fill-in for each pivot row to be merged in */
2518: prow = jl[k]; /* 1st pivot row */
2520: while (prow < k) {
2521: nextprow = jl[prow];
2523: /* merge prow into k-th row */
2524: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2525: jmax = ui[prow+1];
2526: ncols = jmax-jmin;
2527: i = jmin - ui[prow];
2528: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2529: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2530: j = *(uj - 1);
2531: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2532: nzk += nlnk;
2534: /* update il and jl for prow */
2535: if (jmin < jmax) {
2536: il[prow] = jmin;
2537: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2538: }
2539: prow = nextprow;
2540: }
2542: /* if free space is not available, make more free space */
2543: if (current_space->local_remaining<nzk) {
2544: i = am - k + 1; /* num of unfactored rows */
2545: i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2546: PetscFreeSpaceGet(i,¤t_space);
2547: PetscFreeSpaceGet(i,¤t_space_lvl);
2548: reallocs++;
2549: }
2551: /* copy data into free_space and free_space_lvl, then initialize lnk */
2552: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2553: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2555: /* add the k-th row into il and jl */
2556: if (nzk > 1) {
2557: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2558: jl[k] = jl[i]; jl[i] = k;
2559: il[k] = ui[k] + 1;
2560: }
2561: uj_ptr[k] = current_space->array;
2562: uj_lvl_ptr[k] = current_space_lvl->array;
2564: current_space->array += nzk;
2565: current_space->local_used += nzk;
2566: current_space->local_remaining -= nzk;
2568: current_space_lvl->array += nzk;
2569: current_space_lvl->local_used += nzk;
2570: current_space_lvl->local_remaining -= nzk;
2572: ui[k+1] = ui[k] + nzk;
2573: }
2575: ISRestoreIndices(perm,&rip);
2576: ISRestoreIndices(iperm,&riip);
2577: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2578: PetscFree(ajtmp);
2580: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2581: PetscMalloc1((ui[am]+1),&uj);
2582: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2583: PetscIncompleteLLDestroy(lnk,lnkbt);
2584: PetscFreeSpaceDestroy(free_space_lvl);
2586: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2588: /* put together the new matrix in MATSEQSBAIJ format */
2589: b = (Mat_SeqSBAIJ*)(fact)->data;
2590: b->singlemalloc = PETSC_FALSE;
2592: PetscMalloc1((ui[am]+1),&b->a);
2594: b->j = uj;
2595: b->i = ui;
2596: b->diag = udiag;
2597: b->free_diag = PETSC_TRUE;
2598: b->ilen = 0;
2599: b->imax = 0;
2600: b->row = perm;
2601: b->col = perm;
2602: PetscObjectReference((PetscObject)perm);
2603: PetscObjectReference((PetscObject)perm);
2604: b->icol = iperm;
2605: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2607: PetscMalloc1((am+1),&b->solve_work);
2608: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2610: b->maxnz = b->nz = ui[am];
2611: b->free_a = PETSC_TRUE;
2612: b->free_ij = PETSC_TRUE;
2614: fact->info.factor_mallocs = reallocs;
2615: fact->info.fill_ratio_given = fill;
2616: if (ai[am] != 0) {
2617: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2618: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2619: } else {
2620: fact->info.fill_ratio_needed = 0.0;
2621: }
2622: #if defined(PETSC_USE_INFO)
2623: if (ai[am] != 0) {
2624: PetscReal af = fact->info.fill_ratio_needed;
2625: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2626: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2627: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2628: } else {
2629: PetscInfo(A,"Empty matrix.\n");
2630: }
2631: #endif
2632: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2633: return(0);
2634: }
2638: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2639: {
2640: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2641: Mat_SeqSBAIJ *b;
2642: PetscErrorCode ierr;
2643: PetscBool perm_identity,missing;
2644: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2645: const PetscInt *rip,*riip;
2646: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2647: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2648: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2649: PetscReal fill =info->fill,levels=info->levels;
2650: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2651: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2652: PetscBT lnkbt;
2653: IS iperm;
2656: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2657: MatMissingDiagonal(A,&missing,&d);
2658: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2659: ISIdentity(perm,&perm_identity);
2660: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2662: PetscMalloc1((am+1),&ui);
2663: PetscMalloc1((am+1),&udiag);
2664: ui[0] = 0;
2666: /* ICC(0) without matrix ordering: simply copies fill pattern */
2667: if (!levels && perm_identity) {
2669: for (i=0; i<am; i++) {
2670: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2671: udiag[i] = ui[i];
2672: }
2673: PetscMalloc1((ui[am]+1),&uj);
2674: cols = uj;
2675: for (i=0; i<am; i++) {
2676: aj = a->j + a->diag[i];
2677: ncols = ui[i+1] - ui[i];
2678: for (j=0; j<ncols; j++) *cols++ = *aj++;
2679: }
2680: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2681: ISGetIndices(iperm,&riip);
2682: ISGetIndices(perm,&rip);
2684: /* initialization */
2685: PetscMalloc1((am+1),&ajtmp);
2687: /* jl: linked list for storing indices of the pivot rows
2688: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2689: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2690: for (i=0; i<am; i++) {
2691: jl[i] = am; il[i] = 0;
2692: }
2694: /* create and initialize a linked list for storing column indices of the active row k */
2695: nlnk = am + 1;
2696: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2698: /* initial FreeSpace size is fill*(ai[am]+1) */
2699: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2700: current_space = free_space;
2701: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2702: current_space_lvl = free_space_lvl;
2704: for (k=0; k<am; k++) { /* for each active row k */
2705: /* initialize lnk by the column indices of row rip[k] of A */
2706: nzk = 0;
2707: ncols = ai[rip[k]+1] - ai[rip[k]];
2708: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2709: ncols_upper = 0;
2710: for (j=0; j<ncols; j++) {
2711: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2712: if (riip[i] >= k) { /* only take upper triangular entry */
2713: ajtmp[ncols_upper] = i;
2714: ncols_upper++;
2715: }
2716: }
2717: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2718: nzk += nlnk;
2720: /* update lnk by computing fill-in for each pivot row to be merged in */
2721: prow = jl[k]; /* 1st pivot row */
2723: while (prow < k) {
2724: nextprow = jl[prow];
2726: /* merge prow into k-th row */
2727: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2728: jmax = ui[prow+1];
2729: ncols = jmax-jmin;
2730: i = jmin - ui[prow];
2731: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2732: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2733: j = *(uj - 1);
2734: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2735: nzk += nlnk;
2737: /* update il and jl for prow */
2738: if (jmin < jmax) {
2739: il[prow] = jmin;
2740: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2741: }
2742: prow = nextprow;
2743: }
2745: /* if free space is not available, make more free space */
2746: if (current_space->local_remaining<nzk) {
2747: i = am - k + 1; /* num of unfactored rows */
2748: i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2749: PetscFreeSpaceGet(i,¤t_space);
2750: PetscFreeSpaceGet(i,¤t_space_lvl);
2751: reallocs++;
2752: }
2754: /* copy data into free_space and free_space_lvl, then initialize lnk */
2755: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2756: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2758: /* add the k-th row into il and jl */
2759: if (nzk > 1) {
2760: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2761: jl[k] = jl[i]; jl[i] = k;
2762: il[k] = ui[k] + 1;
2763: }
2764: uj_ptr[k] = current_space->array;
2765: uj_lvl_ptr[k] = current_space_lvl->array;
2767: current_space->array += nzk;
2768: current_space->local_used += nzk;
2769: current_space->local_remaining -= nzk;
2771: current_space_lvl->array += nzk;
2772: current_space_lvl->local_used += nzk;
2773: current_space_lvl->local_remaining -= nzk;
2775: ui[k+1] = ui[k] + nzk;
2776: }
2778: #if defined(PETSC_USE_INFO)
2779: if (ai[am] != 0) {
2780: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2781: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2782: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2783: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2784: } else {
2785: PetscInfo(A,"Empty matrix.\n");
2786: }
2787: #endif
2789: ISRestoreIndices(perm,&rip);
2790: ISRestoreIndices(iperm,&riip);
2791: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2792: PetscFree(ajtmp);
2794: /* destroy list of free space and other temporary array(s) */
2795: PetscMalloc1((ui[am]+1),&uj);
2796: PetscFreeSpaceContiguous(&free_space,uj);
2797: PetscIncompleteLLDestroy(lnk,lnkbt);
2798: PetscFreeSpaceDestroy(free_space_lvl);
2800: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2802: /* put together the new matrix in MATSEQSBAIJ format */
2804: b = (Mat_SeqSBAIJ*)fact->data;
2805: b->singlemalloc = PETSC_FALSE;
2807: PetscMalloc1((ui[am]+1),&b->a);
2809: b->j = uj;
2810: b->i = ui;
2811: b->diag = udiag;
2812: b->free_diag = PETSC_TRUE;
2813: b->ilen = 0;
2814: b->imax = 0;
2815: b->row = perm;
2816: b->col = perm;
2818: PetscObjectReference((PetscObject)perm);
2819: PetscObjectReference((PetscObject)perm);
2821: b->icol = iperm;
2822: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2823: PetscMalloc1((am+1),&b->solve_work);
2824: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2825: b->maxnz = b->nz = ui[am];
2826: b->free_a = PETSC_TRUE;
2827: b->free_ij = PETSC_TRUE;
2829: fact->info.factor_mallocs = reallocs;
2830: fact->info.fill_ratio_given = fill;
2831: if (ai[am] != 0) {
2832: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2833: } else {
2834: fact->info.fill_ratio_needed = 0.0;
2835: }
2836: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2837: return(0);
2838: }
2842: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2843: {
2844: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2845: Mat_SeqSBAIJ *b;
2846: PetscErrorCode ierr;
2847: PetscBool perm_identity;
2848: PetscReal fill = info->fill;
2849: const PetscInt *rip,*riip;
2850: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2851: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2852: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2853: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2854: PetscBT lnkbt;
2855: IS iperm;
2858: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2859: /* check whether perm is the identity mapping */
2860: ISIdentity(perm,&perm_identity);
2861: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2862: ISGetIndices(iperm,&riip);
2863: ISGetIndices(perm,&rip);
2865: /* initialization */
2866: PetscMalloc1((am+1),&ui);
2867: PetscMalloc1((am+1),&udiag);
2868: ui[0] = 0;
2870: /* jl: linked list for storing indices of the pivot rows
2871: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2872: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2873: for (i=0; i<am; i++) {
2874: jl[i] = am; il[i] = 0;
2875: }
2877: /* create and initialize a linked list for storing column indices of the active row k */
2878: nlnk = am + 1;
2879: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2881: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2882: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2883: current_space = free_space;
2885: for (k=0; k<am; k++) { /* for each active row k */
2886: /* initialize lnk by the column indices of row rip[k] of A */
2887: nzk = 0;
2888: ncols = ai[rip[k]+1] - ai[rip[k]];
2889: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2890: ncols_upper = 0;
2891: for (j=0; j<ncols; j++) {
2892: i = riip[*(aj + ai[rip[k]] + j)];
2893: if (i >= k) { /* only take upper triangular entry */
2894: cols[ncols_upper] = i;
2895: ncols_upper++;
2896: }
2897: }
2898: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2899: nzk += nlnk;
2901: /* update lnk by computing fill-in for each pivot row to be merged in */
2902: prow = jl[k]; /* 1st pivot row */
2904: while (prow < k) {
2905: nextprow = jl[prow];
2906: /* merge prow into k-th row */
2907: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2908: jmax = ui[prow+1];
2909: ncols = jmax-jmin;
2910: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2911: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2912: nzk += nlnk;
2914: /* update il and jl for prow */
2915: if (jmin < jmax) {
2916: il[prow] = jmin;
2917: j = *uj_ptr;
2918: jl[prow] = jl[j];
2919: jl[j] = prow;
2920: }
2921: prow = nextprow;
2922: }
2924: /* if free space is not available, make more free space */
2925: if (current_space->local_remaining<nzk) {
2926: i = am - k + 1; /* num of unfactored rows */
2927: i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2928: PetscFreeSpaceGet(i,¤t_space);
2929: reallocs++;
2930: }
2932: /* copy data into free space, then initialize lnk */
2933: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2935: /* add the k-th row into il and jl */
2936: if (nzk > 1) {
2937: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2938: jl[k] = jl[i]; jl[i] = k;
2939: il[k] = ui[k] + 1;
2940: }
2941: ui_ptr[k] = current_space->array;
2943: current_space->array += nzk;
2944: current_space->local_used += nzk;
2945: current_space->local_remaining -= nzk;
2947: ui[k+1] = ui[k] + nzk;
2948: }
2950: ISRestoreIndices(perm,&rip);
2951: ISRestoreIndices(iperm,&riip);
2952: PetscFree4(ui_ptr,jl,il,cols);
2954: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2955: PetscMalloc1((ui[am]+1),&uj);
2956: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2957: PetscLLDestroy(lnk,lnkbt);
2959: /* put together the new matrix in MATSEQSBAIJ format */
2961: b = (Mat_SeqSBAIJ*)fact->data;
2962: b->singlemalloc = PETSC_FALSE;
2963: b->free_a = PETSC_TRUE;
2964: b->free_ij = PETSC_TRUE;
2966: PetscMalloc1((ui[am]+1),&b->a);
2968: b->j = uj;
2969: b->i = ui;
2970: b->diag = udiag;
2971: b->free_diag = PETSC_TRUE;
2972: b->ilen = 0;
2973: b->imax = 0;
2974: b->row = perm;
2975: b->col = perm;
2977: PetscObjectReference((PetscObject)perm);
2978: PetscObjectReference((PetscObject)perm);
2980: b->icol = iperm;
2981: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2983: PetscMalloc1((am+1),&b->solve_work);
2984: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2986: b->maxnz = b->nz = ui[am];
2988: fact->info.factor_mallocs = reallocs;
2989: fact->info.fill_ratio_given = fill;
2990: if (ai[am] != 0) {
2991: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2992: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2993: } else {
2994: fact->info.fill_ratio_needed = 0.0;
2995: }
2996: #if defined(PETSC_USE_INFO)
2997: if (ai[am] != 0) {
2998: PetscReal af = fact->info.fill_ratio_needed;
2999: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3000: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3001: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3002: } else {
3003: PetscInfo(A,"Empty matrix.\n");
3004: }
3005: #endif
3006: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
3007: return(0);
3008: }
3012: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
3013: {
3014: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3015: Mat_SeqSBAIJ *b;
3016: PetscErrorCode ierr;
3017: PetscBool perm_identity;
3018: PetscReal fill = info->fill;
3019: const PetscInt *rip,*riip;
3020: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3021: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3022: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3023: PetscFreeSpaceList free_space=NULL,current_space=NULL;
3024: PetscBT lnkbt;
3025: IS iperm;
3028: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3029: /* check whether perm is the identity mapping */
3030: ISIdentity(perm,&perm_identity);
3031: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
3032: ISGetIndices(iperm,&riip);
3033: ISGetIndices(perm,&rip);
3035: /* initialization */
3036: PetscMalloc1((am+1),&ui);
3037: ui[0] = 0;
3039: /* jl: linked list for storing indices of the pivot rows
3040: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3041: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
3042: for (i=0; i<am; i++) {
3043: jl[i] = am; il[i] = 0;
3044: }
3046: /* create and initialize a linked list for storing column indices of the active row k */
3047: nlnk = am + 1;
3048: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
3050: /* initial FreeSpace size is fill*(ai[am]+1) */
3051: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
3052: current_space = free_space;
3054: for (k=0; k<am; k++) { /* for each active row k */
3055: /* initialize lnk by the column indices of row rip[k] of A */
3056: nzk = 0;
3057: ncols = ai[rip[k]+1] - ai[rip[k]];
3058: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3059: ncols_upper = 0;
3060: for (j=0; j<ncols; j++) {
3061: i = riip[*(aj + ai[rip[k]] + j)];
3062: if (i >= k) { /* only take upper triangular entry */
3063: cols[ncols_upper] = i;
3064: ncols_upper++;
3065: }
3066: }
3067: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3068: nzk += nlnk;
3070: /* update lnk by computing fill-in for each pivot row to be merged in */
3071: prow = jl[k]; /* 1st pivot row */
3073: while (prow < k) {
3074: nextprow = jl[prow];
3075: /* merge prow into k-th row */
3076: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3077: jmax = ui[prow+1];
3078: ncols = jmax-jmin;
3079: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3080: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3081: nzk += nlnk;
3083: /* update il and jl for prow */
3084: if (jmin < jmax) {
3085: il[prow] = jmin;
3086: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3087: }
3088: prow = nextprow;
3089: }
3091: /* if free space is not available, make more free space */
3092: if (current_space->local_remaining<nzk) {
3093: i = am - k + 1; /* num of unfactored rows */
3094: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3095: PetscFreeSpaceGet(i,¤t_space);
3096: reallocs++;
3097: }
3099: /* copy data into free space, then initialize lnk */
3100: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3102: /* add the k-th row into il and jl */
3103: if (nzk-1 > 0) {
3104: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3105: jl[k] = jl[i]; jl[i] = k;
3106: il[k] = ui[k] + 1;
3107: }
3108: ui_ptr[k] = current_space->array;
3110: current_space->array += nzk;
3111: current_space->local_used += nzk;
3112: current_space->local_remaining -= nzk;
3114: ui[k+1] = ui[k] + nzk;
3115: }
3117: #if defined(PETSC_USE_INFO)
3118: if (ai[am] != 0) {
3119: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3120: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3121: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3122: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3123: } else {
3124: PetscInfo(A,"Empty matrix.\n");
3125: }
3126: #endif
3128: ISRestoreIndices(perm,&rip);
3129: ISRestoreIndices(iperm,&riip);
3130: PetscFree4(ui_ptr,jl,il,cols);
3132: /* destroy list of free space and other temporary array(s) */
3133: PetscMalloc1((ui[am]+1),&uj);
3134: PetscFreeSpaceContiguous(&free_space,uj);
3135: PetscLLDestroy(lnk,lnkbt);
3137: /* put together the new matrix in MATSEQSBAIJ format */
3139: b = (Mat_SeqSBAIJ*)fact->data;
3140: b->singlemalloc = PETSC_FALSE;
3141: b->free_a = PETSC_TRUE;
3142: b->free_ij = PETSC_TRUE;
3144: PetscMalloc1((ui[am]+1),&b->a);
3146: b->j = uj;
3147: b->i = ui;
3148: b->diag = 0;
3149: b->ilen = 0;
3150: b->imax = 0;
3151: b->row = perm;
3152: b->col = perm;
3154: PetscObjectReference((PetscObject)perm);
3155: PetscObjectReference((PetscObject)perm);
3157: b->icol = iperm;
3158: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3160: PetscMalloc1((am+1),&b->solve_work);
3161: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3162: b->maxnz = b->nz = ui[am];
3164: fact->info.factor_mallocs = reallocs;
3165: fact->info.fill_ratio_given = fill;
3166: if (ai[am] != 0) {
3167: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3168: } else {
3169: fact->info.fill_ratio_needed = 0.0;
3170: }
3171: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3172: return(0);
3173: }
3177: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3178: {
3179: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3180: PetscErrorCode ierr;
3181: PetscInt n = A->rmap->n;
3182: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3183: PetscScalar *x,sum;
3184: const PetscScalar *b;
3185: const MatScalar *aa = a->a,*v;
3186: PetscInt i,nz;
3189: if (!n) return(0);
3191: VecGetArrayRead(bb,&b);
3192: VecGetArray(xx,&x);
3194: /* forward solve the lower triangular */
3195: x[0] = b[0];
3196: v = aa;
3197: vi = aj;
3198: for (i=1; i<n; i++) {
3199: nz = ai[i+1] - ai[i];
3200: sum = b[i];
3201: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3202: v += nz;
3203: vi += nz;
3204: x[i] = sum;
3205: }
3207: /* backward solve the upper triangular */
3208: for (i=n-1; i>=0; i--) {
3209: v = aa + adiag[i+1] + 1;
3210: vi = aj + adiag[i+1] + 1;
3211: nz = adiag[i] - adiag[i+1]-1;
3212: sum = x[i];
3213: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3214: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3215: }
3217: PetscLogFlops(2.0*a->nz - A->cmap->n);
3218: VecRestoreArrayRead(bb,&b);
3219: VecRestoreArray(xx,&x);
3220: return(0);
3221: }
3225: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3226: {
3227: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3228: IS iscol = a->col,isrow = a->row;
3229: PetscErrorCode ierr;
3230: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3231: const PetscInt *rout,*cout,*r,*c;
3232: PetscScalar *x,*tmp,sum;
3233: const PetscScalar *b;
3234: const MatScalar *aa = a->a,*v;
3237: if (!n) return(0);
3239: VecGetArrayRead(bb,&b);
3240: VecGetArray(xx,&x);
3241: tmp = a->solve_work;
3243: ISGetIndices(isrow,&rout); r = rout;
3244: ISGetIndices(iscol,&cout); c = cout;
3246: /* forward solve the lower triangular */
3247: tmp[0] = b[r[0]];
3248: v = aa;
3249: vi = aj;
3250: for (i=1; i<n; i++) {
3251: nz = ai[i+1] - ai[i];
3252: sum = b[r[i]];
3253: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3254: tmp[i] = sum;
3255: v += nz; vi += nz;
3256: }
3258: /* backward solve the upper triangular */
3259: for (i=n-1; i>=0; i--) {
3260: v = aa + adiag[i+1]+1;
3261: vi = aj + adiag[i+1]+1;
3262: nz = adiag[i]-adiag[i+1]-1;
3263: sum = tmp[i];
3264: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3265: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3266: }
3268: ISRestoreIndices(isrow,&rout);
3269: ISRestoreIndices(iscol,&cout);
3270: VecRestoreArrayRead(bb,&b);
3271: VecRestoreArray(xx,&x);
3272: PetscLogFlops(2*a->nz - A->cmap->n);
3273: return(0);
3274: }
3278: /*
3279: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3280: */
3281: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3282: {
3283: Mat B = *fact;
3284: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3285: IS isicol;
3287: const PetscInt *r,*ic;
3288: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3289: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3290: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3291: PetscInt nlnk,*lnk;
3292: PetscBT lnkbt;
3293: PetscBool row_identity,icol_identity;
3294: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3295: const PetscInt *ics;
3296: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3297: PetscReal dt =info->dt,shift=info->shiftamount;
3298: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3299: PetscBool missing;
3302: if (dt == PETSC_DEFAULT) dt = 0.005;
3303: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3305: /* ------- symbolic factorization, can be reused ---------*/
3306: MatMissingDiagonal(A,&missing,&i);
3307: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3308: adiag=a->diag;
3310: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3312: /* bdiag is location of diagonal in factor */
3313: PetscMalloc1((n+1),&bdiag); /* becomes b->diag */
3314: PetscMalloc1((n+1),&bdiag_rev); /* temporary */
3316: /* allocate row pointers bi */
3317: PetscMalloc1((2*n+2),&bi);
3319: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3320: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3321: nnz_max = ai[n]+2*n*dtcount+2;
3323: PetscMalloc1((nnz_max+1),&bj);
3324: PetscMalloc1((nnz_max+1),&ba);
3326: /* put together the new matrix */
3327: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3328: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3329: b = (Mat_SeqAIJ*)B->data;
3331: b->free_a = PETSC_TRUE;
3332: b->free_ij = PETSC_TRUE;
3333: b->singlemalloc = PETSC_FALSE;
3335: b->a = ba;
3336: b->j = bj;
3337: b->i = bi;
3338: b->diag = bdiag;
3339: b->ilen = 0;
3340: b->imax = 0;
3341: b->row = isrow;
3342: b->col = iscol;
3343: PetscObjectReference((PetscObject)isrow);
3344: PetscObjectReference((PetscObject)iscol);
3345: b->icol = isicol;
3347: PetscMalloc1((n+1),&b->solve_work);
3348: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3349: b->maxnz = nnz_max;
3351: B->factortype = MAT_FACTOR_ILUDT;
3352: B->info.factor_mallocs = 0;
3353: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3354: /* ------- end of symbolic factorization ---------*/
3356: ISGetIndices(isrow,&r);
3357: ISGetIndices(isicol,&ic);
3358: ics = ic;
3360: /* linked list for storing column indices of the active row */
3361: nlnk = n + 1;
3362: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3364: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3365: PetscMalloc2(n,&im,n,&jtmp);
3366: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3367: PetscMalloc2(n,&rtmp,n,&vtmp);
3368: PetscMemzero(rtmp,n*sizeof(MatScalar));
3370: bi[0] = 0;
3371: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3372: bdiag_rev[n] = bdiag[0];
3373: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3374: for (i=0; i<n; i++) {
3375: /* copy initial fill into linked list */
3376: nzi = ai[r[i]+1] - ai[r[i]];
3377: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3378: nzi_al = adiag[r[i]] - ai[r[i]];
3379: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3380: ajtmp = aj + ai[r[i]];
3381: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3383: /* load in initial (unfactored row) */
3384: aatmp = a->a + ai[r[i]];
3385: for (j=0; j<nzi; j++) {
3386: rtmp[ics[*ajtmp++]] = *aatmp++;
3387: }
3389: /* add pivot rows into linked list */
3390: row = lnk[n];
3391: while (row < i) {
3392: nzi_bl = bi[row+1] - bi[row] + 1;
3393: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3394: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3395: nzi += nlnk;
3396: row = lnk[row];
3397: }
3399: /* copy data from lnk into jtmp, then initialize lnk */
3400: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3402: /* numerical factorization */
3403: bjtmp = jtmp;
3404: row = *bjtmp++; /* 1st pivot row */
3405: while (row < i) {
3406: pc = rtmp + row;
3407: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3408: multiplier = (*pc) * (*pv);
3409: *pc = multiplier;
3410: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3411: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3412: pv = ba + bdiag[row+1] + 1;
3413: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3414: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3415: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3416: PetscLogFlops(1+2*nz);
3417: }
3418: row = *bjtmp++;
3419: }
3421: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3422: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3423: nzi_bl = 0; j = 0;
3424: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3425: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3426: nzi_bl++; j++;
3427: }
3428: nzi_bu = nzi - nzi_bl -1;
3429: while (j < nzi) {
3430: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3431: j++;
3432: }
3434: bjtmp = bj + bi[i];
3435: batmp = ba + bi[i];
3436: /* apply level dropping rule to L part */
3437: ncut = nzi_al + dtcount;
3438: if (ncut < nzi_bl) {
3439: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3440: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3441: } else {
3442: ncut = nzi_bl;
3443: }
3444: for (j=0; j<ncut; j++) {
3445: bjtmp[j] = jtmp[j];
3446: batmp[j] = vtmp[j];
3447: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3448: }
3449: bi[i+1] = bi[i] + ncut;
3450: nzi = ncut + 1;
3452: /* apply level dropping rule to U part */
3453: ncut = nzi_au + dtcount;
3454: if (ncut < nzi_bu) {
3455: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3456: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3457: } else {
3458: ncut = nzi_bu;
3459: }
3460: nzi += ncut;
3462: /* mark bdiagonal */
3463: bdiag[i+1] = bdiag[i] - (ncut + 1);
3464: bdiag_rev[n-i-1] = bdiag[i+1];
3465: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3466: bjtmp = bj + bdiag[i];
3467: batmp = ba + bdiag[i];
3468: *bjtmp = i;
3469: *batmp = diag_tmp; /* rtmp[i]; */
3470: if (*batmp == 0.0) {
3471: *batmp = dt+shift;
3472: /* printf(" row %d add shift %g\n",i,shift); */
3473: }
3474: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3475: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3477: bjtmp = bj + bdiag[i+1]+1;
3478: batmp = ba + bdiag[i+1]+1;
3479: for (k=0; k<ncut; k++) {
3480: bjtmp[k] = jtmp[nzi_bl+1+k];
3481: batmp[k] = vtmp[nzi_bl+1+k];
3482: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3483: }
3484: /* printf("\n"); */
3486: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3487: /*
3488: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3489: printf(" ----------------------------\n");
3490: */
3491: } /* for (i=0; i<n; i++) */
3492: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3493: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3495: ISRestoreIndices(isrow,&r);
3496: ISRestoreIndices(isicol,&ic);
3498: PetscLLDestroy(lnk,lnkbt);
3499: PetscFree2(im,jtmp);
3500: PetscFree2(rtmp,vtmp);
3501: PetscFree(bdiag_rev);
3503: PetscLogFlops(B->cmap->n);
3504: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3506: ISIdentity(isrow,&row_identity);
3507: ISIdentity(isicol,&icol_identity);
3508: if (row_identity && icol_identity) {
3509: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3510: } else {
3511: B->ops->solve = MatSolve_SeqAIJ;
3512: }
3514: B->ops->solveadd = 0;
3515: B->ops->solvetranspose = 0;
3516: B->ops->solvetransposeadd = 0;
3517: B->ops->matsolve = 0;
3518: B->assembled = PETSC_TRUE;
3519: B->preallocated = PETSC_TRUE;
3520: return(0);
3521: }
3523: /* a wraper of MatILUDTFactor_SeqAIJ() */
3526: /*
3527: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3528: */
3530: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3531: {
3535: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3536: return(0);
3537: }
3539: /*
3540: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3541: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3542: */
3545: /*
3546: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3547: */
3549: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3550: {
3551: Mat C =fact;
3552: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3553: IS isrow = b->row,isicol = b->icol;
3555: const PetscInt *r,*ic,*ics;
3556: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3557: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3558: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3559: PetscReal dt=info->dt,shift=info->shiftamount;
3560: PetscBool row_identity, col_identity;
3563: ISGetIndices(isrow,&r);
3564: ISGetIndices(isicol,&ic);
3565: PetscMalloc1((n+1),&rtmp);
3566: ics = ic;
3568: for (i=0; i<n; i++) {
3569: /* initialize rtmp array */
3570: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3571: bjtmp = bj + bi[i];
3572: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3573: rtmp[i] = 0.0;
3574: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3575: bjtmp = bj + bdiag[i+1] + 1;
3576: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3578: /* load in initial unfactored row of A */
3579: /* printf("row %d\n",i); */
3580: nz = ai[r[i]+1] - ai[r[i]];
3581: ajtmp = aj + ai[r[i]];
3582: v = aa + ai[r[i]];
3583: for (j=0; j<nz; j++) {
3584: rtmp[ics[*ajtmp++]] = v[j];
3585: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3586: }
3587: /* printf("\n"); */
3589: /* numerical factorization */
3590: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3591: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3592: k = 0;
3593: while (k < nzl) {
3594: row = *bjtmp++;
3595: /* printf(" prow %d\n",row); */
3596: pc = rtmp + row;
3597: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3598: multiplier = (*pc) * (*pv);
3599: *pc = multiplier;
3600: if (PetscAbsScalar(multiplier) > dt) {
3601: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3602: pv = b->a + bdiag[row+1] + 1;
3603: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3604: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3605: PetscLogFlops(1+2*nz);
3606: }
3607: k++;
3608: }
3610: /* finished row so stick it into b->a */
3611: /* L-part */
3612: pv = b->a + bi[i];
3613: pj = bj + bi[i];
3614: nzl = bi[i+1] - bi[i];
3615: for (j=0; j<nzl; j++) {
3616: pv[j] = rtmp[pj[j]];
3617: /* printf(" (%d,%g),",pj[j],pv[j]); */
3618: }
3620: /* diagonal: invert diagonal entries for simplier triangular solves */
3621: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3622: b->a[bdiag[i]] = 1.0/rtmp[i];
3623: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3625: /* U-part */
3626: pv = b->a + bdiag[i+1] + 1;
3627: pj = bj + bdiag[i+1] + 1;
3628: nzu = bdiag[i] - bdiag[i+1] - 1;
3629: for (j=0; j<nzu; j++) {
3630: pv[j] = rtmp[pj[j]];
3631: /* printf(" (%d,%g),",pj[j],pv[j]); */
3632: }
3633: /* printf("\n"); */
3634: }
3636: PetscFree(rtmp);
3637: ISRestoreIndices(isicol,&ic);
3638: ISRestoreIndices(isrow,&r);
3640: ISIdentity(isrow,&row_identity);
3641: ISIdentity(isicol,&col_identity);
3642: if (row_identity && col_identity) {
3643: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3644: } else {
3645: C->ops->solve = MatSolve_SeqAIJ;
3646: }
3647: C->ops->solveadd = 0;
3648: C->ops->solvetranspose = 0;
3649: C->ops->solvetransposeadd = 0;
3650: C->ops->matsolve = 0;
3651: C->assembled = PETSC_TRUE;
3652: C->preallocated = PETSC_TRUE;
3654: PetscLogFlops(C->cmap->n);
3655: return(0);
3656: }