Actual source code: sbaijfact.c
1: /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/
3: #include src/mat/impls/baij/seq/baij.h
4: #include src/mat/impls/sbaij/seq/sbaij.h
5: #include src/vec/vecimpl.h
6: #include src/inline/ilu.h
7: #include include/petscis.h
9: #if !defined(PETSC_USE_COMPLEX)
10: /*
11: input:
12: F -- numeric factor
13: output:
14: nneg, nzero, npos: matrix inertia
15: */
19: int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos)
20: {
21: Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
22: PetscScalar *dd = fact_ptr->a;
23: int m = F->m,i;
26: if (nneig){
27: *nneig = 0;
28: for (i=0; i<m; i++){
29: if (PetscRealPart(dd[i]) < 0.0) (*nneig)++;
30: }
31: }
32: if (nzero){
33: *nzero = 0;
34: for (i=0; i<m; i++){
35: if (PetscRealPart(dd[i]) == 0.0) (*nzero)++;
36: }
37: }
38: if (npos){
39: *npos = 0;
40: for (i=0; i<m; i++){
41: if (PetscRealPart(dd[i]) > 0.0) (*npos)++;
42: }
43: }
44: return(0);
45: }
46: #endif /* !defined(PETSC_USE_COMPLEX) */
48: /* Using Modified Sparse Row (MSR) storage.
49: See page 85, "Iterative Methods ..." by Saad. */
50: /*
51: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
52: */
53: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
56: int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *B)
57: {
58: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
59: int *rip,ierr,i,mbs = a->mbs,*ai,*aj;
60: int *jutmp,bs = a->bs,bs2=a->bs2;
61: int m,realloc = 0,prow;
62: int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
63: int *il,ili,nextprow;
64: PetscReal f = info->fill;
65: PetscTruth perm_identity;
68: /* check whether perm is the identity mapping */
69: ISIdentity(perm,&perm_identity);
71: /* -- inplace factorization, i.e., use sbaij for *B -- */
72: if (perm_identity && bs==1 ){
73: if (!perm_identity) a->permute = PETSC_TRUE;
74:
75: ISGetIndices(perm,&rip);
76:
77: if (perm_identity){ /* without permutation */
78: ai = a->i; aj = a->j;
79: } else { /* non-trivial permutation */
80: MatReorderingSeqSBAIJ(A,perm);
81: ai = a->inew; aj = a->jnew;
82: }
83:
84: /* initialization */
85: PetscMalloc((mbs+1)*sizeof(int),&iu);
86: umax = (int)(f*ai[mbs] + 1);
87: PetscMalloc(umax*sizeof(int),&ju);
88: iu[0] = 0;
89: juidx = 0; /* index for ju */
90: PetscMalloc((3*mbs+1)*sizeof(int),&jl); /* linked list for getting pivot row */
91: q = jl + mbs; /* linked list for col index of active row */
92: il = q + mbs;
93: for (i=0; i<mbs; i++){
94: jl[i] = mbs;
95: q[i] = 0;
96: il[i] = 0;
97: }
99: /* for each row k */
100: for (k=0; k<mbs; k++){
101: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
102: q[k] = mbs;
103: /* initialize nonzero structure of k-th row to row rip[k] of A */
104: jmin = ai[rip[k]] +1; /* exclude diag[k] */
105: jmax = ai[rip[k]+1];
106: for (j=jmin; j<jmax; j++){
107: vj = rip[aj[j]]; /* col. value */
108: if(vj > k){
109: qm = k;
110: do {
111: m = qm; qm = q[m];
112: } while(qm < vj);
113: if (qm == vj) {
114: SETERRQ(1," error: duplicate entry in A\n");
115: }
116: nzk++;
117: q[m] = vj;
118: q[vj] = qm;
119: } /* if(vj > k) */
120: } /* for (j=jmin; j<jmax; j++) */
122: /* modify nonzero structure of k-th row by computing fill-in
123: for each row i to be merged in */
124: prow = k;
125: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
126:
127: while (prow < k){
128: nextprow = jl[prow];
129:
130: /* merge row prow into k-th row */
131: ili = il[prow];
132: jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */
133: jmax = iu[prow+1];
134: qm = k;
135: for (j=jmin; j<jmax; j++){
136: vj = ju[j];
137: do {
138: m = qm; qm = q[m];
139: } while (qm < vj);
140: if (qm != vj){ /* a fill */
141: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
142: }
143: } /* end of for (j=jmin; j<jmax; j++) */
144: if (jmin < jmax){
145: il[prow] = jmin;
146: j = ju[jmin];
147: jl[prow] = jl[j]; jl[j] = prow; /* update jl */
148: }
149: prow = nextprow;
150: }
151:
152: /* update il and jl */
153: if (nzk > 0){
154: i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
155: jl[k] = jl[i]; jl[i] = k;
156: il[k] = iu[k] + 1;
157: }
158: iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */
160: /* allocate more space to ju if needed */
161: if (iu[k+1] > umax) {
162: /* estimate how much additional space we will need */
163: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
164: /* just double the memory each time */
165: maxadd = umax;
166: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
167: umax += maxadd;
169: /* allocate a longer ju */
170: PetscMalloc(umax*sizeof(int),&jutmp);
171: PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
172: PetscFree(ju);
173: ju = jutmp;
174: realloc++; /* count how many times we realloc */
175: }
177: /* save nonzero structure of k-th row in ju */
178: ju[juidx++] = k; /* diag[k] */
179: i = k;
180: while (nzk --) {
181: i = q[i];
182: ju[juidx++] = i;
183: }
184: }
186: if (ai[mbs] != 0) {
187: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
188: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
189: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
190: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
191: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
192: } else {
193: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
194: }
196: ISRestoreIndices(perm,&rip);
197: /* PetscFree(q); */
198: PetscFree(jl);
200: /* put together the new matrix */
201: MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
202: /* PetscLogObjectParent(*B,iperm); */
203: b = (Mat_SeqSBAIJ*)(*B)->data;
204: PetscFree(b->imax);
205: b->singlemalloc = PETSC_FALSE;
206: /* the next line frees the default space generated by the Create() */
207: PetscFree(b->a);
208: PetscFree(b->ilen);
209: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
210: b->j = ju;
211: b->i = iu;
212: b->diag = 0;
213: b->ilen = 0;
214: b->imax = 0;
215: b->row = perm;
216: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
217: PetscObjectReference((PetscObject)perm);
218: b->icol = perm;
219: PetscObjectReference((PetscObject)perm);
220: PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
221: /* In b structure: Free imax, ilen, old a, old j.
222: Allocate idnew, solve_work, new a, new j */
223: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
224: b->s_maxnz = b->s_nz = iu[mbs];
225:
226: (*B)->factor = FACTOR_CHOLESKY;
227: (*B)->info.factor_mallocs = realloc;
228: (*B)->info.fill_ratio_given = f;
229: if (ai[mbs] != 0) {
230: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
231: } else {
232: (*B)->info.fill_ratio_needed = 0.0;
233: }
236: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
237: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
238: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
239:
240: return(0);
241: }
242: /* ----------- end of new code --------------------*/
245: if (!perm_identity) a->permute = PETSC_TRUE;
246:
247: ISGetIndices(perm,&rip);
248:
249: if (perm_identity){ /* without permutation */
250: ai = a->i; aj = a->j;
251: } else { /* non-trivial permutation */
252: MatReorderingSeqSBAIJ(A,perm);
253: ai = a->inew; aj = a->jnew;
254: }
255:
256: /* initialization */
257: PetscMalloc((mbs+1)*sizeof(int),&iu);
258: umax = (int)(f*ai[mbs] + 1); umax += mbs + 1;
259: PetscMalloc(umax*sizeof(int),&ju);
260: iu[0] = mbs+1;
261: juidx = mbs + 1; /* index for ju */
262: PetscMalloc(2*mbs*sizeof(int),&jl); /* linked list for pivot row */
263: q = jl + mbs; /* linked list for col index */
264: for (i=0; i<mbs; i++){
265: jl[i] = mbs;
266: q[i] = 0;
267: }
269: /* for each row k */
270: for (k=0; k<mbs; k++){
271: for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */
272: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
273: q[k] = mbs;
274: /* initialize nonzero structure of k-th row to row rip[k] of A */
275: jmin = ai[rip[k]] +1; /* exclude diag[k] */
276: jmax = ai[rip[k]+1];
277: for (j=jmin; j<jmax; j++){
278: vj = rip[aj[j]]; /* col. value */
279: if(vj > k){
280: qm = k;
281: do {
282: m = qm; qm = q[m];
283: } while(qm < vj);
284: if (qm == vj) {
285: SETERRQ(1," error: duplicate entry in A\n");
286: }
287: nzk++;
288: q[m] = vj;
289: q[vj] = qm;
290: } /* if(vj > k) */
291: } /* for (j=jmin; j<jmax; j++) */
293: /* modify nonzero structure of k-th row by computing fill-in
294: for each row i to be merged in */
295: prow = k;
296: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
297:
298: while (prow < k){
299: /* merge row prow into k-th row */
300: jmin = iu[prow] + 1; jmax = iu[prow+1];
301: qm = k;
302: for (j=jmin; j<jmax; j++){
303: vj = ju[j];
304: do {
305: m = qm; qm = q[m];
306: } while (qm < vj);
307: if (qm != vj){
308: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
309: }
310: }
311: prow = jl[prow]; /* next pivot row */
312: }
313:
314: /* add k to row list for first nonzero element in k-th row */
315: if (nzk > 0){
316: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
317: jl[k] = jl[i]; jl[i] = k;
318: }
319: iu[k+1] = iu[k] + nzk;
321: /* allocate more space to ju if needed */
322: if (iu[k+1] > umax) {
323: /* estimate how much additional space we will need */
324: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
325: /* just double the memory each time */
326: maxadd = umax;
327: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
328: umax += maxadd;
330: /* allocate a longer ju */
331: PetscMalloc(umax*sizeof(int),&jutmp);
332: PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
333: PetscFree(ju);
334: ju = jutmp;
335: realloc++; /* count how many times we realloc */
336: }
338: /* save nonzero structure of k-th row in ju */
339: i=k;
340: while (nzk --) {
341: i = q[i];
342: ju[juidx++] = i;
343: }
344: }
346: if (ai[mbs] != 0) {
347: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
348: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %g\n",realloc,f,af);
349: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use \n",af);
350: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);\n",af);
351: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.\n");
352: } else {
353: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.\n");
354: }
356: ISRestoreIndices(perm,&rip);
357: /* PetscFree(q); */
358: PetscFree(jl);
360: /* put together the new matrix */
361: MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
362: /* PetscLogObjectParent(*B,iperm); */
363: b = (Mat_SeqSBAIJ*)(*B)->data;
364: PetscFree(b->imax);
365: b->singlemalloc = PETSC_FALSE;
366: /* the next line frees the default space generated by the Create() */
367: PetscFree(b->a);
368: PetscFree(b->ilen);
369: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
370: b->j = ju;
371: b->i = iu;
372: b->diag = 0;
373: b->ilen = 0;
374: b->imax = 0;
375: b->row = perm;
376: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
377: PetscObjectReference((PetscObject)perm);
378: b->icol = perm;
379: PetscObjectReference((PetscObject)perm);
380: PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
381: /* In b structure: Free imax, ilen, old a, old j.
382: Allocate idnew, solve_work, new a, new j */
383: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
384: b->s_maxnz = b->s_nz = iu[mbs];
385:
386: (*B)->factor = FACTOR_CHOLESKY;
387: (*B)->info.factor_mallocs = realloc;
388: (*B)->info.fill_ratio_given = f;
389: if (ai[mbs] != 0) {
390: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
391: } else {
392: (*B)->info.fill_ratio_needed = 0.0;
393: }
395: if (perm_identity){
396: switch (bs) {
397: case 1:
398: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
399: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
400: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1\n");
401: break;
402: case 2:
403: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
404: (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
405: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
406: break;
407: case 3:
408: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
409: (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
410: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3\n");
411: break;
412: case 4:
413: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
414: (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
415: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
416: break;
417: case 5:
418: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
419: (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
420: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
421: break;
422: case 6:
423: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
424: (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
425: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
426: break;
427: case 7:
428: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
429: (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
430: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
431: break;
432: default:
433: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
434: (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
435: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7\n");
436: break;
437: }
438: }
440: return(0);
441: }
446: int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
447: {
448: Mat C = *B;
449: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
450: IS perm = b->row;
451: int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
452: int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
453: int bs=a->bs,bs2 = a->bs2;
454: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
455: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
456: MatScalar *work;
457: int *pivots;
461: /* initialization */
462: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
463: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
464: PetscMalloc(2*mbs*sizeof(int),&il);
465: jl = il + mbs;
466: for (i=0; i<mbs; i++) {
467: jl[i] = mbs; il[0] = 0;
468: }
469: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
470: uik = dk + bs2;
471: work = uik + bs2;
472: PetscMalloc(bs*sizeof(int),&pivots);
473:
474: ISGetIndices(perm,&perm_ptr);
475:
476: /* check permutation */
477: if (!a->permute){
478: ai = a->i; aj = a->j; aa = a->a;
479: } else {
480: ai = a->inew; aj = a->jnew;
481: PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
482: PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
483: PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
484: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));
486: for (i=0; i<mbs; i++){
487: jmin = ai[i]; jmax = ai[i+1];
488: for (j=jmin; j<jmax; j++){
489: while (a2anew[j] != j){
490: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
491: for (k1=0; k1<bs2; k1++){
492: dk[k1] = aa[k*bs2+k1];
493: aa[k*bs2+k1] = aa[j*bs2+k1];
494: aa[j*bs2+k1] = dk[k1];
495: }
496: }
497: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
498: if (i > aj[j]){
499: /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
500: ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */
501: for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
502: for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */
503: for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
504: }
505: }
506: }
507: }
508: PetscFree(a2anew);
509: }
510:
511: /* for each row k */
512: for (k = 0; k<mbs; k++){
514: /*initialize k-th row with elements nonzero in row perm(k) of A */
515: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
516:
517: ap = aa + jmin*bs2;
518: for (j = jmin; j < jmax; j++){
519: vj = perm_ptr[aj[j]]; /* block col. index */
520: rtmp_ptr = rtmp + vj*bs2;
521: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
522: }
524: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
525: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
526: i = jl[k]; /* first row to be added to k_th row */
528: while (i < k){
529: nexti = jl[i]; /* next row to be added to k_th row */
531: /* compute multiplier */
532: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
534: /* uik = -inv(Di)*U_bar(i,k) */
535: diag = ba + i*bs2;
536: u = ba + ili*bs2;
537: PetscMemzero(uik,bs2*sizeof(MatScalar));
538: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
539:
540: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
541: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
542:
543: /* update -U(i,k) */
544: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
546: /* add multiple of row i to k-th row ... */
547: jmin = ili + 1; jmax = bi[i+1];
548: if (jmin < jmax){
549: for (j=jmin; j<jmax; j++) {
550: /* rtmp += -U(i,k)^T * U_bar(i,j) */
551: rtmp_ptr = rtmp + bj[j]*bs2;
552: u = ba + j*bs2;
553: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
554: }
555:
556: /* ... add i to row list for next nonzero entry */
557: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
558: j = bj[jmin];
559: jl[i] = jl[j]; jl[j] = i; /* update jl */
560: }
561: i = nexti;
562: }
564: /* save nonzero entries in k-th row of U ... */
566: /* invert diagonal block */
567: diag = ba+k*bs2;
568: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
569: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
570:
571: jmin = bi[k]; jmax = bi[k+1];
572: if (jmin < jmax) {
573: for (j=jmin; j<jmax; j++){
574: vj = bj[j]; /* block col. index of U */
575: u = ba + j*bs2;
576: rtmp_ptr = rtmp + vj*bs2;
577: for (k1=0; k1<bs2; k1++){
578: *u++ = *rtmp_ptr;
579: *rtmp_ptr++ = 0.0;
580: }
581: }
582:
583: /* ... add k to row list for first nonzero entry in k-th row */
584: il[k] = jmin;
585: i = bj[jmin];
586: jl[k] = jl[i]; jl[i] = k;
587: }
588: }
590: PetscFree(rtmp);
591: PetscFree(il);
592: PetscFree(dk);
593: PetscFree(pivots);
594: if (a->permute){
595: PetscFree(aa);
596: }
598: ISRestoreIndices(perm,&perm_ptr);
599: C->factor = FACTOR_CHOLESKY;
600: C->assembled = PETSC_TRUE;
601: C->preallocated = PETSC_TRUE;
602: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
603: return(0);
604: }
608: int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
609: {
610: Mat C = *B;
611: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
612: int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
613: int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
614: int bs=a->bs,bs2 = a->bs2;
615: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
616: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
617: MatScalar *work;
618: int *pivots;
622: /* initialization */
623:
624: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
625: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
626: PetscMalloc(2*mbs*sizeof(int),&il);
627: jl = il + mbs;
628: for (i=0; i<mbs; i++) {
629: jl[i] = mbs; il[0] = 0;
630: }
631: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
632: uik = dk + bs2;
633: work = uik + bs2;
634: PetscMalloc(bs*sizeof(int),&pivots);
635:
636: ai = a->i; aj = a->j; aa = a->a;
637:
638: /* for each row k */
639: for (k = 0; k<mbs; k++){
641: /*initialize k-th row with elements nonzero in row k of A */
642: jmin = ai[k]; jmax = ai[k+1];
643: ap = aa + jmin*bs2;
644: for (j = jmin; j < jmax; j++){
645: vj = aj[j]; /* block col. index */
646: rtmp_ptr = rtmp + vj*bs2;
647: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
648: }
650: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
651: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
652: i = jl[k]; /* first row to be added to k_th row */
654: while (i < k){
655: nexti = jl[i]; /* next row to be added to k_th row */
657: /* compute multiplier */
658: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
660: /* uik = -inv(Di)*U_bar(i,k) */
661: diag = ba + i*bs2;
662: u = ba + ili*bs2;
663: PetscMemzero(uik,bs2*sizeof(MatScalar));
664: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
665:
666: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
667: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
668:
669: /* update -U(i,k) */
670: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
672: /* add multiple of row i to k-th row ... */
673: jmin = ili + 1; jmax = bi[i+1];
674: if (jmin < jmax){
675: for (j=jmin; j<jmax; j++) {
676: /* rtmp += -U(i,k)^T * U_bar(i,j) */
677: rtmp_ptr = rtmp + bj[j]*bs2;
678: u = ba + j*bs2;
679: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
680: }
681:
682: /* ... add i to row list for next nonzero entry */
683: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
684: j = bj[jmin];
685: jl[i] = jl[j]; jl[j] = i; /* update jl */
686: }
687: i = nexti;
688: }
690: /* save nonzero entries in k-th row of U ... */
692: /* invert diagonal block */
693: diag = ba+k*bs2;
694: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
695: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
696:
697: jmin = bi[k]; jmax = bi[k+1];
698: if (jmin < jmax) {
699: for (j=jmin; j<jmax; j++){
700: vj = bj[j]; /* block col. index of U */
701: u = ba + j*bs2;
702: rtmp_ptr = rtmp + vj*bs2;
703: for (k1=0; k1<bs2; k1++){
704: *u++ = *rtmp_ptr;
705: *rtmp_ptr++ = 0.0;
706: }
707: }
708:
709: /* ... add k to row list for first nonzero entry in k-th row */
710: il[k] = jmin;
711: i = bj[jmin];
712: jl[k] = jl[i]; jl[i] = k;
713: }
714: }
716: PetscFree(rtmp);
717: PetscFree(il);
718: PetscFree(dk);
719: PetscFree(pivots);
721: C->factor = FACTOR_CHOLESKY;
722: C->assembled = PETSC_TRUE;
723: C->preallocated = PETSC_TRUE;
724: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
725: return(0);
726: }
728: /*
729: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
730: Version for blocks 2 by 2.
731: */
734: int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
735: {
736: Mat C = *B;
737: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
738: IS perm = b->row;
739: int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
740: int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
741: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
742: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
745:
746: /* initialization */
747: /* il and jl record the first nonzero element in each row of the accessing
748: window U(0:k, k:mbs-1).
749: jl: list of rows to be added to uneliminated rows
750: i>= k: jl(i) is the first row to be added to row i
751: i< k: jl(i) is the row following row i in some list of rows
752: jl(i) = mbs indicates the end of a list
753: il(i): points to the first nonzero element in columns k,...,mbs-1 of
754: row i of U */
755: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
756: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
757: PetscMalloc(2*mbs*sizeof(int),&il);
758: jl = il + mbs;
759: for (i=0; i<mbs; i++) {
760: jl[i] = mbs; il[0] = 0;
761: }
762: PetscMalloc(8*sizeof(MatScalar),&dk);
763: uik = dk + 4;
764: ISGetIndices(perm,&perm_ptr);
766: /* check permutation */
767: if (!a->permute){
768: ai = a->i; aj = a->j; aa = a->a;
769: } else {
770: ai = a->inew; aj = a->jnew;
771: PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
772: PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
773: PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
774: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));
776: for (i=0; i<mbs; i++){
777: jmin = ai[i]; jmax = ai[i+1];
778: for (j=jmin; j<jmax; j++){
779: while (a2anew[j] != j){
780: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
781: for (k1=0; k1<4; k1++){
782: dk[k1] = aa[k*4+k1];
783: aa[k*4+k1] = aa[j*4+k1];
784: aa[j*4+k1] = dk[k1];
785: }
786: }
787: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
788: if (i > aj[j]){
789: /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
790: ap = aa + j*4; /* ptr to the beginning of the block */
791: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
792: ap[1] = ap[2];
793: ap[2] = dk[1];
794: }
795: }
796: }
797: PetscFree(a2anew);
798: }
800: /* for each row k */
801: for (k = 0; k<mbs; k++){
803: /*initialize k-th row with elements nonzero in row perm(k) of A */
804: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
805: ap = aa + jmin*4;
806: for (j = jmin; j < jmax; j++){
807: vj = perm_ptr[aj[j]]; /* block col. index */
808: rtmp_ptr = rtmp + vj*4;
809: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
810: }
812: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
813: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
814: i = jl[k]; /* first row to be added to k_th row */
816: while (i < k){
817: nexti = jl[i]; /* next row to be added to k_th row */
819: /* compute multiplier */
820: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
822: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
823: diag = ba + i*4;
824: u = ba + ili*4;
825: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
826: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
827: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
828: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
829:
830: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
831: dk[0] += uik[0]*u[0] + uik[1]*u[1];
832: dk[1] += uik[2]*u[0] + uik[3]*u[1];
833: dk[2] += uik[0]*u[2] + uik[1]*u[3];
834: dk[3] += uik[2]*u[2] + uik[3]*u[3];
836: /* update -U(i,k): ba[ili] = uik */
837: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
839: /* add multiple of row i to k-th row ... */
840: jmin = ili + 1; jmax = bi[i+1];
841: if (jmin < jmax){
842: for (j=jmin; j<jmax; j++) {
843: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
844: rtmp_ptr = rtmp + bj[j]*4;
845: u = ba + j*4;
846: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
847: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
848: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
849: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
850: }
851:
852: /* ... add i to row list for next nonzero entry */
853: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
854: j = bj[jmin];
855: jl[i] = jl[j]; jl[j] = i; /* update jl */
856: }
857: i = nexti;
858: }
860: /* save nonzero entries in k-th row of U ... */
862: /* invert diagonal block */
863: diag = ba+k*4;
864: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
865: Kernel_A_gets_inverse_A_2(diag);
866:
867: jmin = bi[k]; jmax = bi[k+1];
868: if (jmin < jmax) {
869: for (j=jmin; j<jmax; j++){
870: vj = bj[j]; /* block col. index of U */
871: u = ba + j*4;
872: rtmp_ptr = rtmp + vj*4;
873: for (k1=0; k1<4; k1++){
874: *u++ = *rtmp_ptr;
875: *rtmp_ptr++ = 0.0;
876: }
877: }
878:
879: /* ... add k to row list for first nonzero entry in k-th row */
880: il[k] = jmin;
881: i = bj[jmin];
882: jl[k] = jl[i]; jl[i] = k;
883: }
884: }
886: PetscFree(rtmp);
887: PetscFree(il);
888: PetscFree(dk);
889: if (a->permute) {
890: PetscFree(aa);
891: }
892: ISRestoreIndices(perm,&perm_ptr);
893: C->factor = FACTOR_CHOLESKY;
894: C->assembled = PETSC_TRUE;
895: C->preallocated = PETSC_TRUE;
896: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
897: return(0);
898: }
900: /*
901: Version for when blocks are 2 by 2 Using natural ordering
902: */
905: int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
906: {
907: Mat C = *B;
908: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
909: int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
910: int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
911: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
912: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
915:
916: /* initialization */
917: /* il and jl record the first nonzero element in each row of the accessing
918: window U(0:k, k:mbs-1).
919: jl: list of rows to be added to uneliminated rows
920: i>= k: jl(i) is the first row to be added to row i
921: i< k: jl(i) is the row following row i in some list of rows
922: jl(i) = mbs indicates the end of a list
923: il(i): points to the first nonzero element in columns k,...,mbs-1 of
924: row i of U */
925: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
926: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
927: PetscMalloc(2*mbs*sizeof(int),&il);
928: jl = il + mbs;
929: for (i=0; i<mbs; i++) {
930: jl[i] = mbs; il[0] = 0;
931: }
932: PetscMalloc(8*sizeof(MatScalar),&dk);
933: uik = dk + 4;
934:
935: ai = a->i; aj = a->j; aa = a->a;
937: /* for each row k */
938: for (k = 0; k<mbs; k++){
940: /*initialize k-th row with elements nonzero in row k of A */
941: jmin = ai[k]; jmax = ai[k+1];
942: ap = aa + jmin*4;
943: for (j = jmin; j < jmax; j++){
944: vj = aj[j]; /* block col. index */
945: rtmp_ptr = rtmp + vj*4;
946: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
947: }
948:
949: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
950: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
951: i = jl[k]; /* first row to be added to k_th row */
953: while (i < k){
954: nexti = jl[i]; /* next row to be added to k_th row */
956: /* compute multiplier */
957: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
959: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
960: diag = ba + i*4;
961: u = ba + ili*4;
962: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
963: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
964: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
965: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
966:
967: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
968: dk[0] += uik[0]*u[0] + uik[1]*u[1];
969: dk[1] += uik[2]*u[0] + uik[3]*u[1];
970: dk[2] += uik[0]*u[2] + uik[1]*u[3];
971: dk[3] += uik[2]*u[2] + uik[3]*u[3];
973: /* update -U(i,k): ba[ili] = uik */
974: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
976: /* add multiple of row i to k-th row ... */
977: jmin = ili + 1; jmax = bi[i+1];
978: if (jmin < jmax){
979: for (j=jmin; j<jmax; j++) {
980: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
981: rtmp_ptr = rtmp + bj[j]*4;
982: u = ba + j*4;
983: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
984: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
985: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
986: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
987: }
988:
989: /* ... add i to row list for next nonzero entry */
990: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
991: j = bj[jmin];
992: jl[i] = jl[j]; jl[j] = i; /* update jl */
993: }
994: i = nexti;
995: }
997: /* save nonzero entries in k-th row of U ... */
999: /* invert diagonal block */
1000: diag = ba+k*4;
1001: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1002: Kernel_A_gets_inverse_A_2(diag);
1003:
1004: jmin = bi[k]; jmax = bi[k+1];
1005: if (jmin < jmax) {
1006: for (j=jmin; j<jmax; j++){
1007: vj = bj[j]; /* block col. index of U */
1008: u = ba + j*4;
1009: rtmp_ptr = rtmp + vj*4;
1010: for (k1=0; k1<4; k1++){
1011: *u++ = *rtmp_ptr;
1012: *rtmp_ptr++ = 0.0;
1013: }
1014: }
1015:
1016: /* ... add k to row list for first nonzero entry in k-th row */
1017: il[k] = jmin;
1018: i = bj[jmin];
1019: jl[k] = jl[i]; jl[i] = k;
1020: }
1021: }
1023: PetscFree(rtmp);
1024: PetscFree(il);
1025: PetscFree(dk);
1027: C->factor = FACTOR_CHOLESKY;
1028: C->assembled = PETSC_TRUE;
1029: C->preallocated = PETSC_TRUE;
1030: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1031: return(0);
1032: }
1034: /*
1035: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1036: Version for blocks are 1 by 1.
1037: */
1040: int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1041: {
1042: Mat C = *B;
1043: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1044: IS ip = b->row;
1045: int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1046: int *ai,*aj,*r;
1047: int k,jmin,jmax,*jl,*il,vj,nexti,ili;
1048: MatScalar *rtmp;
1049: MatScalar *ba = b->a,*aa,ak;
1050: MatScalar dk,uikdi;
1053: ISGetIndices(ip,&rip);
1054: if (!a->permute){
1055: ai = a->i; aj = a->j; aa = a->a;
1056: } else {
1057: ai = a->inew; aj = a->jnew;
1058: PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);
1059: PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));
1060: PetscMalloc(ai[mbs]*sizeof(int),&r);
1061: ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));
1063: jmin = ai[0]; jmax = ai[mbs];
1064: for (j=jmin; j<jmax; j++){
1065: while (r[j] != j){
1066: k = r[j]; r[j] = r[k]; r[k] = k;
1067: ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
1068: }
1069: }
1070: PetscFree(r);
1071: }
1072:
1073: /* initialization */
1074: /* il and jl record the first nonzero element in each row of the accessing
1075: window U(0:k, k:mbs-1).
1076: jl: list of rows to be added to uneliminated rows
1077: i>= k: jl(i) is the first row to be added to row i
1078: i< k: jl(i) is the row following row i in some list of rows
1079: jl(i) = mbs indicates the end of a list
1080: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1081: row i of U */
1082: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1083: PetscMalloc(2*mbs*sizeof(int),&il);
1084: jl = il + mbs;
1085: for (i=0; i<mbs; i++) {
1086: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1087: }
1089: /* for each row k */
1090: for (k = 0; k<mbs; k++){
1092: /*initialize k-th row with elements nonzero in row perm(k) of A */
1093: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1094:
1095: for (j = jmin; j < jmax; j++){
1096: vj = rip[aj[j]];
1097: rtmp[vj] = aa[j];
1098: }
1100: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1101: dk = rtmp[k];
1102: i = jl[k]; /* first row to be added to k_th row */
1104: while (i < k){
1105: nexti = jl[i]; /* next row to be added to k_th row */
1107: /* compute multiplier, update D(k) and U(i,k) */
1108: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1109: uikdi = - ba[ili]*ba[i];
1110: dk += uikdi*ba[ili];
1111: ba[ili] = uikdi; /* -U(i,k) */
1113: /* add multiple of row i to k-th row ... */
1114: jmin = ili + 1; jmax = bi[i+1];
1115: if (jmin < jmax){
1116: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1117: /* ... add i to row list for next nonzero entry */
1118: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1119: j = bj[jmin];
1120: jl[i] = jl[j]; jl[j] = i; /* update jl */
1121: }
1122: i = nexti;
1123: }
1125: /* check for zero pivot and save diagoanl element */
1126: if (dk == 0.0){
1127: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1128: /*
1129: } else if (PetscRealPart(dk) < 0.0){
1130: SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %g\n",k,dk);
1131: */
1132: }
1134: /* save nonzero entries in k-th row of U ... */
1135: ba[k] = 1.0/dk;
1136: jmin = bi[k]; jmax = bi[k+1];
1137: if (jmin < jmax) {
1138: for (j=jmin; j<jmax; j++){
1139: vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1140: }
1141: /* ... add k to row list for first nonzero entry in k-th row */
1142: il[k] = jmin;
1143: i = bj[jmin];
1144: jl[k] = jl[i]; jl[i] = k;
1145: }
1146: }
1147:
1148: PetscFree(rtmp);
1149: PetscFree(il);
1150: if (a->permute){
1151: PetscFree(aa);
1152: }
1154: ISRestoreIndices(ip,&rip);
1155: C->factor = FACTOR_CHOLESKY;
1156: C->assembled = PETSC_TRUE;
1157: C->preallocated = PETSC_TRUE;
1158: PetscLogFlops(b->mbs);
1159: return(0);
1160: }
1162: /*
1163: Version for when blocks are 1 by 1 Using natural ordering
1164: */
1167: int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1168: {
1169: Mat C = *B;
1170: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1171: int ierr,i,j,mbs = a->mbs;
1172: int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1173: int k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz,ndamp = 0;
1174: MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1175: PetscReal damping=b->factor_damping, zeropivot=b->factor_zeropivot,shift_amount;
1176: PetscTruth damp,chshift;
1177: int nshift=0;
1180: /* initialization */
1181: /* il and jl record the first nonzero element in each row of the accessing
1182: window U(0:k, k:mbs-1).
1183: jl: list of rows to be added to uneliminated rows
1184: i>= k: jl(i) is the first row to be added to row i
1185: i< k: jl(i) is the row following row i in some list of rows
1186: jl(i) = mbs indicates the end of a list
1187: il(i): points to the first nonzero element in U(i,k:mbs-1)
1188: */
1189: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1190: PetscMalloc(2*mbs*sizeof(int),&il);
1191: jl = il + mbs;
1193: shift_amount = 0;
1194: do {
1195: damp = PETSC_FALSE;
1196: chshift = PETSC_FALSE;
1197: for (i=0; i<mbs; i++) {
1198: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1199: }
1201: for (k = 0; k<mbs; k++){ /* row k */
1202: /*initialize k-th row with elements nonzero in row perm(k) of A */
1203: nz = ai[k+1] - ai[k];
1204: acol = aj + ai[k];
1205: aval = aa + ai[k];
1206: bval = ba + bi[k];
1207: while (nz -- ){
1208: rtmp[*acol++] = *aval++;
1209: *bval++ = 0.0; /* for in-place factorization */
1210: }
1211: /* damp the diagonal of the matrix */
1212: if (ndamp||nshift) rtmp[k] += damping+shift_amount;
1213:
1214: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1215: dk = rtmp[k];
1216: i = jl[k]; /* first row to be added to k_th row */
1218: while (i < k){
1219: nexti = jl[i]; /* next row to be added to k_th row */
1220:
1221: /* compute multiplier, update D(k) and U(i,k) */
1222: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1223: uikdi = - ba[ili]*ba[bi[i]];
1224: dk += uikdi*ba[ili];
1225: ba[ili] = uikdi; /* -U(i,k) */
1227: /* add multiple of row i to k-th row ... */
1228: jmin = ili + 1;
1229: nz = bi[i+1] - jmin;
1230: if (nz > 0){
1231: bcol = bj + jmin;
1232: bval = ba + jmin;
1233: while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1234: /* ... add i to row list for next nonzero entry */
1235: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1236: j = bj[jmin];
1237: jl[i] = jl[j]; jl[j] = i; /* update jl */
1238: }
1239: i = nexti;
1240: }
1242: /* check for zero pivot and save diagonal element */
1243: if (PetscRealPart(dk) < zeropivot && b->factor_shift){
1244: PetscReal rs = -PetscRealPart(dk);
1245: jmin = bi[k]+1;
1246: nz = bi[k+1] - jmin;
1247: if (nz){
1248: bcol = bj + jmin;
1249: bval = ba + jmin;
1250: while (nz--){
1251: rs += PetscAbsScalar(rtmp[*bcol++]);
1252: }
1253: }
1254: shift_amount = rs;
1255: chshift = PETSC_TRUE;
1256: nshift++;
1257: break;
1258: }
1259: if (PetscRealPart(dk) < zeropivot){
1260: if (damping == (PetscReal) PETSC_DECIDE) damping = -PetscRealPart(dk)/(k+1);
1261: if (damping > 0.0) {
1262: if (ndamp) damping *= 2.0;
1263: damp = PETSC_TRUE;
1264: ndamp++;
1265: break;
1266: } else if (PetscAbsScalar(dk) < zeropivot){
1267: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",k,PetscRealPart(dk),zeropivot);
1268: } else {
1269: PetscLogInfo((PetscObject)A,"Negative pivot %g in row %d of Cholesky factorization\n",PetscRealPart(dk),k);
1270: }
1271: }
1272:
1273: /* save nonzero entries in k-th row of U ... */
1274: /* printf("%d, dk: %g, 1/dk: %g\n",k,dk,1/dk); */
1275: ba[bi[k]] = 1.0/dk;
1276: jmin = bi[k]+1;
1277: nz = bi[k+1] - jmin;
1278: if (nz){
1279: bcol = bj + jmin;
1280: bval = ba + jmin;
1281: while (nz--){
1282: *bval++ = rtmp[*bcol];
1283: rtmp[*bcol++] = 0.0;
1284: }
1285: /* ... add k to row list for first nonzero entry in k-th row */
1286: il[k] = jmin;
1287: i = bj[jmin];
1288: jl[k] = jl[i]; jl[i] = k;
1289: }
1290: } /* end of for (k = 0; k<mbs; k++) */
1291: } while (damp||chshift);
1292: PetscFree(rtmp);
1293: PetscFree(il);
1294:
1295: C->factor = FACTOR_CHOLESKY;
1296: C->assembled = PETSC_TRUE;
1297: C->preallocated = PETSC_TRUE;
1298: PetscLogFlops(b->mbs);
1299: if (ndamp) {
1300: PetscLogInfo(0,"MatCholeskyFactorNumerical_SeqSBAIJ_1_NaturalOrdering: number of damping tries %d damping value %g\n",ndamp,damping);
1301: }
1302: return(0);
1303: }
1307: int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1308: {
1310: Mat C;
1313: MatCholeskyFactorSymbolic(A,perm,info,&C);
1314: MatCholeskyFactorNumeric(A,&C);
1315: MatHeaderCopy(A,C);
1316: return(0);
1317: }