Actual source code: sbaijfact.c
1: /*$Id: sbaijfact.c,v 1.61 2001/08/06 21:15:47 bsmith Exp $*/
3: #include src/mat/impls/baij/seq/baij.h
4: #include src/mat/impls/sbaij/seq/sbaij.h
5: #include src/vec/vecimpl.h
6: #include src/inline/ilu.h
7: #include include/petscis.h
9: #if !defined(PETSC_USE_COMPLEX)
10: /*
11: input:
12: F -- numeric factor
13: output:
14: nneg, nzero, npos: matrix inertia
15: */
17: #undef __FUNCT__
19: int MatGetInertia_SeqSBAIJ(Mat F,int *nneig,int *nzero,int *npos)
20: {
21: Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
22: PetscScalar *dd = fact_ptr->a;
23: int m = F->m,i;
26: if (nneig){
27: *nneig = 0;
28: for (i=0; i<m; i++){
29: if (PetscRealPart(dd[i]) < 0.0) (*nneig)++;
30: }
31: }
32: if (nzero){
33: *nzero = 0;
34: for (i=0; i<m; i++){
35: if (PetscRealPart(dd[i]) == 0.0) (*nzero)++;
36: }
37: }
38: if (npos){
39: *npos = 0;
40: for (i=0; i<m; i++){
41: if (PetscRealPart(dd[i]) > 0.0) (*npos)++;
42: }
43: }
44: return(0);
45: }
46: #endif /* !defined(PETSC_USE_COMPLEX) */
48: /* Using Modified Sparse Row (MSR) storage.
49: See page 85, "Iterative Methods ..." by Saad. */
50: /*
51: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
52: */
53: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
54: #undef __FUNCT__
56: int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,Mat *B)
57: {
58: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
59: int *rip,ierr,i,mbs = a->mbs,*ai,*aj;
60: int *jutmp,bs = a->bs,bs2=a->bs2;
61: int m,realloc = 0,prow;
62: int *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
63: int *il,ili,nextprow;
64: PetscTruth perm_identity;
67: /* check whether perm is the identity mapping */
68: ISIdentity(perm,&perm_identity);
70: /* -- inplace factorization, i.e., use sbaij for *B -- */
71: if (perm_identity && bs==1 ){
72: if (!perm_identity) a->permute = PETSC_TRUE;
73:
74: ISGetIndices(perm,&rip);
75:
76: if (perm_identity){ /* without permutation */
77: ai = a->i; aj = a->j;
78: } else { /* non-trivial permutation */
79: MatReorderingSeqSBAIJ(A,perm);
80: ai = a->inew; aj = a->jnew;
81: }
82:
83: /* initialization */
84: ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);
85: umax = (int)(f*ai[mbs] + 1);
86: ierr = PetscMalloc(umax*sizeof(int),&ju);
87: iu[0] = 0;
88: juidx = 0; /* index for ju */
89: ierr = PetscMalloc((3*mbs+1)*sizeof(int),&jl); /* linked list for getting pivot row */
90: q = jl + mbs; /* linked list for col index of active row */
91: il = q + mbs;
92: for (i=0; i<mbs; i++){
93: jl[i] = mbs;
94: q[i] = 0;
95: il[i] = 0;
96: }
98: /* for each row k */
99: for (k=0; k<mbs; k++){
100: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
101: q[k] = mbs;
102: /* initialize nonzero structure of k-th row to row rip[k] of A */
103: jmin = ai[rip[k]] +1; /* exclude diag[k] */
104: jmax = ai[rip[k]+1];
105: for (j=jmin; j<jmax; j++){
106: vj = rip[aj[j]]; /* col. value */
107: if(vj > k){
108: qm = k;
109: do {
110: m = qm; qm = q[m];
111: } while(qm < vj);
112: if (qm == vj) {
113: SETERRQ(1," error: duplicate entry in An");
114: }
115: nzk++;
116: q[m] = vj;
117: q[vj] = qm;
118: } /* if(vj > k) */
119: } /* for (j=jmin; j<jmax; j++) */
121: /* modify nonzero structure of k-th row by computing fill-in
122: for each row i to be merged in */
123: prow = k;
124: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
125:
126: while (prow < k){
127: nextprow = jl[prow];
128:
129: /* merge row prow into k-th row */
130: ili = il[prow];
131: jmin = ili + 1; /* points to 2nd nzero entry in U(prow,k:mbs-1) */
132: jmax = iu[prow+1];
133: qm = k;
134: for (j=jmin; j<jmax; j++){
135: vj = ju[j];
136: do {
137: m = qm; qm = q[m];
138: } while (qm < vj);
139: if (qm != vj){ /* a fill */
140: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
141: }
142: } /* end of for (j=jmin; j<jmax; j++) */
143: if (jmin < jmax){
144: il[prow] = jmin;
145: j = ju[jmin];
146: jl[prow] = jl[j]; jl[j] = prow; /* update jl */
147: }
148: prow = nextprow;
149: }
150:
151: /* update il and jl */
152: if (nzk > 0){
153: i = q[k]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
154: jl[k] = jl[i]; jl[i] = k;
155: il[k] = iu[k] + 1;
156: }
157: iu[k+1] = iu[k] + nzk + 1; /* include diag[k] */
159: /* allocate more space to ju if needed */
160: if (iu[k+1] > umax) {
161: /* estimate how much additional space we will need */
162: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
163: /* just double the memory each time */
164: maxadd = umax;
165: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
166: umax += maxadd;
168: /* allocate a longer ju */
169: PetscMalloc(umax*sizeof(int),&jutmp);
170: PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
171: PetscFree(ju);
172: ju = jutmp;
173: realloc++; /* count how many times we realloc */
174: }
176: /* save nonzero structure of k-th row in ju */
177: ju[juidx++] = k; /* diag[k] */
178: i = k;
179: while (nzk --) {
180: i = q[i];
181: ju[juidx++] = i;
182: }
183: }
185: if (ai[mbs] != 0) {
186: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
187: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
188: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use n",af);
189: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);n",af);
190: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.n");
191: } else {
192: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.n");
193: }
195: ISRestoreIndices(perm,&rip);
196: /* PetscFree(q); */
197: PetscFree(jl);
199: /* put together the new matrix */
200: MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
201: /* PetscLogObjectParent(*B,iperm); */
202: b = (Mat_SeqSBAIJ*)(*B)->data;
203: PetscFree(b->imax);
204: b->singlemalloc = PETSC_FALSE;
205: /* the next line frees the default space generated by the Create() */
206: PetscFree(b->a);
207: PetscFree(b->ilen);
208: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
209: b->j = ju;
210: b->i = iu;
211: b->diag = 0;
212: b->ilen = 0;
213: b->imax = 0;
214: b->row = perm;
215: b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */
216: ierr = PetscObjectReference((PetscObject)perm);
217: b->icol = perm;
218: ierr = PetscObjectReference((PetscObject)perm);
219: ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
220: /* In b structure: Free imax, ilen, old a, old j.
221: Allocate idnew, solve_work, new a, new j */
222: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
223: b->s_maxnz = b->s_nz = iu[mbs];
224:
225: (*B)->factor = FACTOR_CHOLESKY;
226: (*B)->info.factor_mallocs = realloc;
227: (*B)->info.fill_ratio_given = f;
228: if (ai[mbs] != 0) {
229: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
230: } else {
231: (*B)->info.fill_ratio_needed = 0.0;
232: }
235: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace;
236: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
237: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1n");
238:
239: return(0);
240: }
241: /* ----------- end of new code --------------------*/
244: if (!perm_identity) a->permute = PETSC_TRUE;
245:
246: ISGetIndices(perm,&rip);
247:
248: if (perm_identity){ /* without permutation */
249: ai = a->i; aj = a->j;
250: } else { /* non-trivial permutation */
251: MatReorderingSeqSBAIJ(A,perm);
252: ai = a->inew; aj = a->jnew;
253: }
254:
255: /* initialization */
256: ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);
257: umax = (int)(f*ai[mbs] + 1); umax += mbs + 1;
258: ierr = PetscMalloc(umax*sizeof(int),&ju);
259: iu[0] = mbs+1;
260: juidx = mbs + 1; /* index for ju */
261: ierr = PetscMalloc(2*mbs*sizeof(int),&jl); /* linked list for pivot row */
262: q = jl + mbs; /* linked list for col index */
263: for (i=0; i<mbs; i++){
264: jl[i] = mbs;
265: q[i] = 0;
266: }
268: /* for each row k */
269: for (k=0; k<mbs; k++){
270: for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */
271: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
272: q[k] = mbs;
273: /* initialize nonzero structure of k-th row to row rip[k] of A */
274: jmin = ai[rip[k]] +1; /* exclude diag[k] */
275: jmax = ai[rip[k]+1];
276: for (j=jmin; j<jmax; j++){
277: vj = rip[aj[j]]; /* col. value */
278: if(vj > k){
279: qm = k;
280: do {
281: m = qm; qm = q[m];
282: } while(qm < vj);
283: if (qm == vj) {
284: SETERRQ(1," error: duplicate entry in An");
285: }
286: nzk++;
287: q[m] = vj;
288: q[vj] = qm;
289: } /* if(vj > k) */
290: } /* for (j=jmin; j<jmax; j++) */
292: /* modify nonzero structure of k-th row by computing fill-in
293: for each row i to be merged in */
294: prow = k;
295: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
296:
297: while (prow < k){
298: /* merge row prow into k-th row */
299: jmin = iu[prow] + 1; jmax = iu[prow+1];
300: qm = k;
301: for (j=jmin; j<jmax; j++){
302: vj = ju[j];
303: do {
304: m = qm; qm = q[m];
305: } while (qm < vj);
306: if (qm != vj){
307: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
308: }
309: }
310: prow = jl[prow]; /* next pivot row */
311: }
312:
313: /* add k to row list for first nonzero element in k-th row */
314: if (nzk > 0){
315: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
316: jl[k] = jl[i]; jl[i] = k;
317: }
318: iu[k+1] = iu[k] + nzk;
320: /* allocate more space to ju if needed */
321: if (iu[k+1] > umax) {
322: /* estimate how much additional space we will need */
323: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
324: /* just double the memory each time */
325: maxadd = umax;
326: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
327: umax += maxadd;
329: /* allocate a longer ju */
330: PetscMalloc(umax*sizeof(int),&jutmp);
331: PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
332: PetscFree(ju);
333: ju = jutmp;
334: realloc++; /* count how many times we realloc */
335: }
337: /* save nonzero structure of k-th row in ju */
338: i=k;
339: while (nzk --) {
340: i = q[i];
341: ju[juidx++] = i;
342: }
343: }
345: if (ai[mbs] != 0) {
346: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
347: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
348: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Run with -pc_cholesky_fill %g or use n",af);
349: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:PCCholeskySetFill(pc,%g);n",af);
350: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:for best performance.n");
351: } else {
352: PetscLogInfo(A,"MatCholeskyFactorSymbolic_SeqSBAIJ:Empty matrix.n");
353: }
355: ISRestoreIndices(perm,&rip);
356: /* PetscFree(q); */
357: PetscFree(jl);
359: /* put together the new matrix */
360: MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
361: /* PetscLogObjectParent(*B,iperm); */
362: b = (Mat_SeqSBAIJ*)(*B)->data;
363: PetscFree(b->imax);
364: b->singlemalloc = PETSC_FALSE;
365: /* the next line frees the default space generated by the Create() */
366: PetscFree(b->a);
367: PetscFree(b->ilen);
368: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
369: b->j = ju;
370: b->i = iu;
371: b->diag = 0;
372: b->ilen = 0;
373: b->imax = 0;
374: b->row = perm;
375: b->pivotinblocks = PETSC_FALSE; /* need to get from MatCholeskyInfo */
376: ierr = PetscObjectReference((PetscObject)perm);
377: b->icol = perm;
378: ierr = PetscObjectReference((PetscObject)perm);
379: ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
380: /* In b structure: Free imax, ilen, old a, old j.
381: Allocate idnew, solve_work, new a, new j */
382: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
383: b->s_maxnz = b->s_nz = iu[mbs];
384:
385: (*B)->factor = FACTOR_CHOLESKY;
386: (*B)->info.factor_mallocs = realloc;
387: (*B)->info.fill_ratio_given = f;
388: if (ai[mbs] != 0) {
389: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
390: } else {
391: (*B)->info.fill_ratio_needed = 0.0;
392: }
394: if (perm_identity){
395: switch (bs) {
396: case 1:
397: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
398: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
399: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1n");
400: break;
401: case 2:
402: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
403: (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
404: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2n");
405: break;
406: case 3:
407: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
408: (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
409: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3n");
410: break;
411: case 4:
412: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
413: (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
414: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4n");
415: break;
416: case 5:
417: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
418: (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
419: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5n");
420: break;
421: case 6:
422: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
423: (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
424: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6n");
425: break;
426: case 7:
427: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
428: (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
429: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7n");
430: break;
431: default:
432: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
433: (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
434: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7n");
435: break;
436: }
437: }
439: return(0);
440: }
443: #undef __FUNCT__
445: int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,Mat *B)
446: {
447: Mat C = *B;
448: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
449: IS perm = b->row;
450: int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
451: int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
452: int bs=a->bs,bs2 = a->bs2;
453: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
454: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
455: MatScalar *work;
456: int *pivots;
460: /* initialization */
461: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
462: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
463: PetscMalloc(2*mbs*sizeof(int),&il);
464: jl = il + mbs;
465: for (i=0; i<mbs; i++) {
466: jl[i] = mbs; il[0] = 0;
467: }
468: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
469: uik = dk + bs2;
470: work = uik + bs2;
471: PetscMalloc(bs*sizeof(int),&pivots);
472:
473: ierr = ISGetIndices(perm,&perm_ptr);
474:
475: /* check permutation */
476: if (!a->permute){
477: ai = a->i; aj = a->j; aa = a->a;
478: } else {
479: ai = a->inew; aj = a->jnew;
480: PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
481: PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
482: PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
483: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));
485: for (i=0; i<mbs; i++){
486: jmin = ai[i]; jmax = ai[i+1];
487: for (j=jmin; j<jmax; j++){
488: while (a2anew[j] != j){
489: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
490: for (k1=0; k1<bs2; k1++){
491: dk[k1] = aa[k*bs2+k1];
492: aa[k*bs2+k1] = aa[j*bs2+k1];
493: aa[j*bs2+k1] = dk[k1];
494: }
495: }
496: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
497: if (i > aj[j]){
498: /* printf("change orientation, row: %d, col: %dn",i,aj[j]); */
499: ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */
500: for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
501: for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */
502: for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
503: }
504: }
505: }
506: }
507: PetscFree(a2anew);
508: }
509:
510: /* for each row k */
511: for (k = 0; k<mbs; k++){
513: /*initialize k-th row with elements nonzero in row perm(k) of A */
514: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
515:
516: ap = aa + jmin*bs2;
517: for (j = jmin; j < jmax; j++){
518: vj = perm_ptr[aj[j]]; /* block col. index */
519: rtmp_ptr = rtmp + vj*bs2;
520: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
521: }
523: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
524: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
525: i = jl[k]; /* first row to be added to k_th row */
527: while (i < k){
528: nexti = jl[i]; /* next row to be added to k_th row */
530: /* compute multiplier */
531: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
533: /* uik = -inv(Di)*U_bar(i,k) */
534: diag = ba + i*bs2;
535: u = ba + ili*bs2;
536: PetscMemzero(uik,bs2*sizeof(MatScalar));
537: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
538:
539: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
540: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
541:
542: /* update -U(i,k) */
543: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
545: /* add multiple of row i to k-th row ... */
546: jmin = ili + 1; jmax = bi[i+1];
547: if (jmin < jmax){
548: for (j=jmin; j<jmax; j++) {
549: /* rtmp += -U(i,k)^T * U_bar(i,j) */
550: rtmp_ptr = rtmp + bj[j]*bs2;
551: u = ba + j*bs2;
552: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
553: }
554:
555: /* ... add i to row list for next nonzero entry */
556: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
557: j = bj[jmin];
558: jl[i] = jl[j]; jl[j] = i; /* update jl */
559: }
560: i = nexti;
561: }
563: /* save nonzero entries in k-th row of U ... */
565: /* invert diagonal block */
566: diag = ba+k*bs2;
567: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
568: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
569:
570: jmin = bi[k]; jmax = bi[k+1];
571: if (jmin < jmax) {
572: for (j=jmin; j<jmax; j++){
573: vj = bj[j]; /* block col. index of U */
574: u = ba + j*bs2;
575: rtmp_ptr = rtmp + vj*bs2;
576: for (k1=0; k1<bs2; k1++){
577: *u++ = *rtmp_ptr;
578: *rtmp_ptr++ = 0.0;
579: }
580: }
581:
582: /* ... add k to row list for first nonzero entry in k-th row */
583: il[k] = jmin;
584: i = bj[jmin];
585: jl[k] = jl[i]; jl[i] = k;
586: }
587: }
589: PetscFree(rtmp);
590: PetscFree(il);
591: PetscFree(dk);
592: PetscFree(pivots);
593: if (a->permute){
594: PetscFree(aa);
595: }
597: ISRestoreIndices(perm,&perm_ptr);
598: C->factor = FACTOR_CHOLESKY;
599: C->assembled = PETSC_TRUE;
600: C->preallocated = PETSC_TRUE;
601: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
602: return(0);
603: }
605: #undef __FUNCT__
607: int MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,Mat *B)
608: {
609: Mat C = *B;
610: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
611: int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
612: int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
613: int bs=a->bs,bs2 = a->bs2;
614: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
615: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
616: MatScalar *work;
617: int *pivots;
621: /* initialization */
622:
623: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
624: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
625: PetscMalloc(2*mbs*sizeof(int),&il);
626: jl = il + mbs;
627: for (i=0; i<mbs; i++) {
628: jl[i] = mbs; il[0] = 0;
629: }
630: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
631: uik = dk + bs2;
632: work = uik + bs2;
633: PetscMalloc(bs*sizeof(int),&pivots);
634:
635: ai = a->i; aj = a->j; aa = a->a;
636:
637: /* for each row k */
638: for (k = 0; k<mbs; k++){
640: /*initialize k-th row with elements nonzero in row k of A */
641: jmin = ai[k]; jmax = ai[k+1];
642: ap = aa + jmin*bs2;
643: for (j = jmin; j < jmax; j++){
644: vj = aj[j]; /* block col. index */
645: rtmp_ptr = rtmp + vj*bs2;
646: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
647: }
649: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
650: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
651: i = jl[k]; /* first row to be added to k_th row */
653: while (i < k){
654: nexti = jl[i]; /* next row to be added to k_th row */
656: /* compute multiplier */
657: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
659: /* uik = -inv(Di)*U_bar(i,k) */
660: diag = ba + i*bs2;
661: u = ba + ili*bs2;
662: PetscMemzero(uik,bs2*sizeof(MatScalar));
663: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
664:
665: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
666: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
667:
668: /* update -U(i,k) */
669: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
671: /* add multiple of row i to k-th row ... */
672: jmin = ili + 1; jmax = bi[i+1];
673: if (jmin < jmax){
674: for (j=jmin; j<jmax; j++) {
675: /* rtmp += -U(i,k)^T * U_bar(i,j) */
676: rtmp_ptr = rtmp + bj[j]*bs2;
677: u = ba + j*bs2;
678: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
679: }
680:
681: /* ... add i to row list for next nonzero entry */
682: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
683: j = bj[jmin];
684: jl[i] = jl[j]; jl[j] = i; /* update jl */
685: }
686: i = nexti;
687: }
689: /* save nonzero entries in k-th row of U ... */
691: /* invert diagonal block */
692: diag = ba+k*bs2;
693: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
694: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
695:
696: jmin = bi[k]; jmax = bi[k+1];
697: if (jmin < jmax) {
698: for (j=jmin; j<jmax; j++){
699: vj = bj[j]; /* block col. index of U */
700: u = ba + j*bs2;
701: rtmp_ptr = rtmp + vj*bs2;
702: for (k1=0; k1<bs2; k1++){
703: *u++ = *rtmp_ptr;
704: *rtmp_ptr++ = 0.0;
705: }
706: }
707:
708: /* ... add k to row list for first nonzero entry in k-th row */
709: il[k] = jmin;
710: i = bj[jmin];
711: jl[k] = jl[i]; jl[i] = k;
712: }
713: }
715: PetscFree(rtmp);
716: PetscFree(il);
717: PetscFree(dk);
718: PetscFree(pivots);
720: C->factor = FACTOR_CHOLESKY;
721: C->assembled = PETSC_TRUE;
722: C->preallocated = PETSC_TRUE;
723: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
724: return(0);
725: }
727: /*
728: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
729: Version for blocks 2 by 2.
730: */
731: #undef __FUNCT__
733: int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,Mat *B)
734: {
735: Mat C = *B;
736: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
737: IS perm = b->row;
738: int *perm_ptr,ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
739: int *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
740: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
741: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
744:
745: /* initialization */
746: /* il and jl record the first nonzero element in each row of the accessing
747: window U(0:k, k:mbs-1).
748: jl: list of rows to be added to uneliminated rows
749: i>= k: jl(i) is the first row to be added to row i
750: i< k: jl(i) is the row following row i in some list of rows
751: jl(i) = mbs indicates the end of a list
752: il(i): points to the first nonzero element in columns k,...,mbs-1 of
753: row i of U */
754: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
755: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
756: PetscMalloc(2*mbs*sizeof(int),&il);
757: jl = il + mbs;
758: for (i=0; i<mbs; i++) {
759: jl[i] = mbs; il[0] = 0;
760: }
761: PetscMalloc(8*sizeof(MatScalar),&dk);
762: uik = dk + 4;
763: ISGetIndices(perm,&perm_ptr);
765: /* check permutation */
766: if (!a->permute){
767: ai = a->i; aj = a->j; aa = a->a;
768: } else {
769: ai = a->inew; aj = a->jnew;
770: PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
771: PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
772: PetscMalloc(ai[mbs]*sizeof(int),&a2anew);
773: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(int));
775: for (i=0; i<mbs; i++){
776: jmin = ai[i]; jmax = ai[i+1];
777: for (j=jmin; j<jmax; j++){
778: while (a2anew[j] != j){
779: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
780: for (k1=0; k1<4; k1++){
781: dk[k1] = aa[k*4+k1];
782: aa[k*4+k1] = aa[j*4+k1];
783: aa[j*4+k1] = dk[k1];
784: }
785: }
786: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
787: if (i > aj[j]){
788: /* printf("change orientation, row: %d, col: %dn",i,aj[j]); */
789: ap = aa + j*4; /* ptr to the beginning of the block */
790: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
791: ap[1] = ap[2];
792: ap[2] = dk[1];
793: }
794: }
795: }
796: PetscFree(a2anew);
797: }
799: /* for each row k */
800: for (k = 0; k<mbs; k++){
802: /*initialize k-th row with elements nonzero in row perm(k) of A */
803: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
804: ap = aa + jmin*4;
805: for (j = jmin; j < jmax; j++){
806: vj = perm_ptr[aj[j]]; /* block col. index */
807: rtmp_ptr = rtmp + vj*4;
808: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
809: }
811: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
812: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
813: i = jl[k]; /* first row to be added to k_th row */
815: while (i < k){
816: nexti = jl[i]; /* next row to be added to k_th row */
818: /* compute multiplier */
819: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
821: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
822: diag = ba + i*4;
823: u = ba + ili*4;
824: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
825: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
826: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
827: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
828:
829: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
830: dk[0] += uik[0]*u[0] + uik[1]*u[1];
831: dk[1] += uik[2]*u[0] + uik[3]*u[1];
832: dk[2] += uik[0]*u[2] + uik[1]*u[3];
833: dk[3] += uik[2]*u[2] + uik[3]*u[3];
835: /* update -U(i,k): ba[ili] = uik */
836: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
838: /* add multiple of row i to k-th row ... */
839: jmin = ili + 1; jmax = bi[i+1];
840: if (jmin < jmax){
841: for (j=jmin; j<jmax; j++) {
842: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
843: rtmp_ptr = rtmp + bj[j]*4;
844: u = ba + j*4;
845: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
846: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
847: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
848: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
849: }
850:
851: /* ... add i to row list for next nonzero entry */
852: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
853: j = bj[jmin];
854: jl[i] = jl[j]; jl[j] = i; /* update jl */
855: }
856: i = nexti;
857: }
859: /* save nonzero entries in k-th row of U ... */
861: /* invert diagonal block */
862: diag = ba+k*4;
863: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
864: Kernel_A_gets_inverse_A_2(diag);
865:
866: jmin = bi[k]; jmax = bi[k+1];
867: if (jmin < jmax) {
868: for (j=jmin; j<jmax; j++){
869: vj = bj[j]; /* block col. index of U */
870: u = ba + j*4;
871: rtmp_ptr = rtmp + vj*4;
872: for (k1=0; k1<4; k1++){
873: *u++ = *rtmp_ptr;
874: *rtmp_ptr++ = 0.0;
875: }
876: }
877:
878: /* ... add k to row list for first nonzero entry in k-th row */
879: il[k] = jmin;
880: i = bj[jmin];
881: jl[k] = jl[i]; jl[i] = k;
882: }
883: }
885: PetscFree(rtmp);
886: PetscFree(il);
887: PetscFree(dk);
888: if (a->permute) {
889: PetscFree(aa);
890: }
891: ISRestoreIndices(perm,&perm_ptr);
892: C->factor = FACTOR_CHOLESKY;
893: C->assembled = PETSC_TRUE;
894: C->preallocated = PETSC_TRUE;
895: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
896: return(0);
897: }
899: /*
900: Version for when blocks are 2 by 2 Using natural ordering
901: */
902: #undef __FUNCT__
904: int MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,Mat *B)
905: {
906: Mat C = *B;
907: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
908: int ierr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
909: int *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
910: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
911: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
914:
915: /* initialization */
916: /* il and jl record the first nonzero element in each row of the accessing
917: window U(0:k, k:mbs-1).
918: jl: list of rows to be added to uneliminated rows
919: i>= k: jl(i) is the first row to be added to row i
920: i< k: jl(i) is the row following row i in some list of rows
921: jl(i) = mbs indicates the end of a list
922: il(i): points to the first nonzero element in columns k,...,mbs-1 of
923: row i of U */
924: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
925: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
926: PetscMalloc(2*mbs*sizeof(int),&il);
927: jl = il + mbs;
928: for (i=0; i<mbs; i++) {
929: jl[i] = mbs; il[0] = 0;
930: }
931: PetscMalloc(8*sizeof(MatScalar),&dk);
932: uik = dk + 4;
933:
934: ai = a->i; aj = a->j; aa = a->a;
936: /* for each row k */
937: for (k = 0; k<mbs; k++){
939: /*initialize k-th row with elements nonzero in row k of A */
940: jmin = ai[k]; jmax = ai[k+1];
941: ap = aa + jmin*4;
942: for (j = jmin; j < jmax; j++){
943: vj = aj[j]; /* block col. index */
944: rtmp_ptr = rtmp + vj*4;
945: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
946: }
947:
948: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
949: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
950: i = jl[k]; /* first row to be added to k_th row */
952: while (i < k){
953: nexti = jl[i]; /* next row to be added to k_th row */
955: /* compute multiplier */
956: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
958: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
959: diag = ba + i*4;
960: u = ba + ili*4;
961: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
962: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
963: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
964: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
965:
966: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
967: dk[0] += uik[0]*u[0] + uik[1]*u[1];
968: dk[1] += uik[2]*u[0] + uik[3]*u[1];
969: dk[2] += uik[0]*u[2] + uik[1]*u[3];
970: dk[3] += uik[2]*u[2] + uik[3]*u[3];
972: /* update -U(i,k): ba[ili] = uik */
973: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
975: /* add multiple of row i to k-th row ... */
976: jmin = ili + 1; jmax = bi[i+1];
977: if (jmin < jmax){
978: for (j=jmin; j<jmax; j++) {
979: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
980: rtmp_ptr = rtmp + bj[j]*4;
981: u = ba + j*4;
982: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
983: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
984: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
985: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
986: }
987:
988: /* ... add i to row list for next nonzero entry */
989: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
990: j = bj[jmin];
991: jl[i] = jl[j]; jl[j] = i; /* update jl */
992: }
993: i = nexti;
994: }
996: /* save nonzero entries in k-th row of U ... */
998: /* invert diagonal block */
999: diag = ba+k*4;
1000: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1001: Kernel_A_gets_inverse_A_2(diag);
1002:
1003: jmin = bi[k]; jmax = bi[k+1];
1004: if (jmin < jmax) {
1005: for (j=jmin; j<jmax; j++){
1006: vj = bj[j]; /* block col. index of U */
1007: u = ba + j*4;
1008: rtmp_ptr = rtmp + vj*4;
1009: for (k1=0; k1<4; k1++){
1010: *u++ = *rtmp_ptr;
1011: *rtmp_ptr++ = 0.0;
1012: }
1013: }
1014:
1015: /* ... add k to row list for first nonzero entry in k-th row */
1016: il[k] = jmin;
1017: i = bj[jmin];
1018: jl[k] = jl[i]; jl[i] = k;
1019: }
1020: }
1022: PetscFree(rtmp);
1023: PetscFree(il);
1024: PetscFree(dk);
1026: C->factor = FACTOR_CHOLESKY;
1027: C->assembled = PETSC_TRUE;
1028: C->preallocated = PETSC_TRUE;
1029: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1030: return(0);
1031: }
1033: /*
1034: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
1035: Version for blocks are 1 by 1.
1036: */
1037: #undef __FUNCT__
1039: int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,Mat *B)
1040: {
1041: Mat C = *B;
1042: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1043: IS ip = b->row;
1044: int *rip,ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1045: int *ai,*aj,*r;
1046: int k,jmin,jmax,*jl,*il,vj,nexti,ili;
1047: MatScalar *rtmp;
1048: MatScalar *ba = b->a,*aa,ak;
1049: MatScalar dk,uikdi;
1052: ierr = ISGetIndices(ip,&rip);
1053: if (!a->permute){
1054: ai = a->i; aj = a->j; aa = a->a;
1055: } else {
1056: ai = a->inew; aj = a->jnew;
1057: PetscMalloc(ai[mbs]*sizeof(MatScalar),&aa);
1058: PetscMemcpy(aa,a->a,ai[mbs]*sizeof(MatScalar));
1059: PetscMalloc(ai[mbs]*sizeof(int),&r);
1060: ierr= PetscMemcpy(r,a->a2anew,(ai[mbs])*sizeof(int));
1062: jmin = ai[0]; jmax = ai[mbs];
1063: for (j=jmin; j<jmax; j++){
1064: while (r[j] != j){
1065: k = r[j]; r[j] = r[k]; r[k] = k;
1066: ak = aa[k]; aa[k] = aa[j]; aa[j] = ak;
1067: }
1068: }
1069: PetscFree(r);
1070: }
1071:
1072: /* initialization */
1073: /* il and jl record the first nonzero element in each row of the accessing
1074: window U(0:k, k:mbs-1).
1075: jl: list of rows to be added to uneliminated rows
1076: i>= k: jl(i) is the first row to be added to row i
1077: i< k: jl(i) is the row following row i in some list of rows
1078: jl(i) = mbs indicates the end of a list
1079: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1080: row i of U */
1081: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1082: PetscMalloc(2*mbs*sizeof(int),&il);
1083: jl = il + mbs;
1084: for (i=0; i<mbs; i++) {
1085: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1086: }
1088: /* for each row k */
1089: for (k = 0; k<mbs; k++){
1091: /*initialize k-th row with elements nonzero in row perm(k) of A */
1092: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1093:
1094: for (j = jmin; j < jmax; j++){
1095: vj = rip[aj[j]];
1096: rtmp[vj] = aa[j];
1097: }
1099: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1100: dk = rtmp[k];
1101: i = jl[k]; /* first row to be added to k_th row */
1103: while (i < k){
1104: nexti = jl[i]; /* next row to be added to k_th row */
1106: /* compute multiplier, update D(k) and U(i,k) */
1107: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1108: uikdi = - ba[ili]*ba[i];
1109: dk += uikdi*ba[ili];
1110: ba[ili] = uikdi; /* -U(i,k) */
1112: /* add multiple of row i to k-th row ... */
1113: jmin = ili + 1; jmax = bi[i+1];
1114: if (jmin < jmax){
1115: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1116: /* ... add i to row list for next nonzero entry */
1117: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1118: j = bj[jmin];
1119: jl[i] = jl[j]; jl[j] = i; /* update jl */
1120: }
1121: i = nexti;
1122: }
1124: /* check for zero pivot and save diagoanl element */
1125: if (dk == 0.0){
1126: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1127: /*
1128: } else if (PetscRealPart(dk) < 0.0){
1129: SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %gn",k,dk);
1130: */
1131: }
1133: /* save nonzero entries in k-th row of U ... */
1134: ba[k] = 1.0/dk;
1135: jmin = bi[k]; jmax = bi[k+1];
1136: if (jmin < jmax) {
1137: for (j=jmin; j<jmax; j++){
1138: vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1139: }
1140: /* ... add k to row list for first nonzero entry in k-th row */
1141: il[k] = jmin;
1142: i = bj[jmin];
1143: jl[k] = jl[i]; jl[i] = k;
1144: }
1145: }
1146:
1147: PetscFree(rtmp);
1148: PetscFree(il);
1149: if (a->permute){
1150: PetscFree(aa);
1151: }
1153: ISRestoreIndices(ip,&rip);
1154: C->factor = FACTOR_CHOLESKY;
1155: C->assembled = PETSC_TRUE;
1156: C->preallocated = PETSC_TRUE;
1157: PetscLogFlops(b->mbs);
1158: return(0);
1159: }
1161: /*
1162: Version for when blocks are 1 by 1 Using natural ordering
1163: */
1164: #undef __FUNCT__
1166: int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,Mat *B)
1167: {
1168: Mat C = *B;
1169: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
1170: int ierr,i,j,mbs = a->mbs,*bi = b->i,*bj = b->j;
1171: int *ai,*aj;
1172: int k,jmin,jmax,*jl,*il,vj,nexti,ili;
1173: MatScalar *rtmp,*ba = b->a,*aa,dk,uikdi;
1176:
1177: /* initialization */
1178: /* il and jl record the first nonzero element in each row of the accessing
1179: window U(0:k, k:mbs-1).
1180: jl: list of rows to be added to uneliminated rows
1181: i>= k: jl(i) is the first row to be added to row i
1182: i< k: jl(i) is the row following row i in some list of rows
1183: jl(i) = mbs indicates the end of a list
1184: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1185: row i of U */
1186: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1187: PetscMalloc(2*mbs*sizeof(int),&il);
1188: jl = il + mbs;
1189: for (i=0; i<mbs; i++) {
1190: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1191: }
1193: ai = a->i; aj = a->j; aa = a->a;
1195: /* for each row k */
1196: for (k = 0; k<mbs; k++){
1198: /*initialize k-th row with elements nonzero in row perm(k) of A */
1199: jmin = ai[k]; jmax = ai[k+1];
1200:
1201: for (j = jmin; j < jmax; j++){
1202: vj = aj[j];
1203: rtmp[vj] = aa[j];
1204: }
1205:
1206: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1207: dk = rtmp[k];
1208: i = jl[k]; /* first row to be added to k_th row */
1210: while (i < k){
1211: nexti = jl[i]; /* next row to be added to k_th row */
1213: /* compute multiplier, update D(k) and U(i,k) */
1214: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1215: uikdi = - ba[ili]*ba[i];
1216: dk += uikdi*ba[ili];
1217: ba[ili] = uikdi; /* -U(i,k) */
1219: /* add multiple of row i to k-th row ... */
1220: jmin = ili + 1; jmax = bi[i+1];
1221: if (jmin < jmax){
1222: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1223: /* ... add i to row list for next nonzero entry */
1224: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1225: j = bj[jmin];
1226: jl[i] = jl[j]; jl[j] = i; /* update jl */
1227: }
1228: i = nexti;
1229: }
1231: /* check for zero pivot and save diagoanl element */
1232: if (dk == 0.0){
1233: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1234: /*
1235: } else if (PetscRealPart(dk) < 0){
1236: SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Negative pivot: d[%d] = %gn",k,dk);
1237: */
1238: }
1240: /* save nonzero entries in k-th row of U ... */
1241: ba[k] = 1.0/dk;
1242: jmin = bi[k]; jmax = bi[k+1];
1243: if (jmin < jmax) {
1244: for (j=jmin; j<jmax; j++){
1245: vj = bj[j]; ba[j] = rtmp[vj]; rtmp[vj] = 0.0;
1246: }
1247: /* ... add k to row list for first nonzero entry in k-th row */
1248: il[k] = jmin;
1249: i = bj[jmin];
1250: jl[k] = jl[i]; jl[i] = k;
1251: }
1252: }
1253:
1254: PetscFree(rtmp);
1255: PetscFree(il);
1256:
1257: C->factor = FACTOR_CHOLESKY;
1258: C->assembled = PETSC_TRUE;
1259: C->preallocated = PETSC_TRUE;
1260: PetscLogFlops(b->mbs);
1261: return(0);
1262: }
1264: #undef __FUNCT__
1266: int MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat A,Mat *B)
1267: {
1268: Mat C = *B;
1269: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1270: int ierr,i,j,mbs = a->mbs;
1271: int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1272: int k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1273: MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1276: /* initialization */
1277: /* il and jl record the first nonzero element in each row of the accessing
1278: window U(0:k, k:mbs-1).
1279: jl: list of rows to be added to uneliminated rows
1280: i>= k: jl(i) is the first row to be added to row i
1281: i< k: jl(i) is the row following row i in some list of rows
1282: jl(i) = mbs indicates the end of a list
1283: il(i): points to the first nonzero element in U(i,k:mbs-1)
1284: */
1285: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1286: PetscMalloc(2*mbs*sizeof(int),&il);
1287: jl = il + mbs;
1288: for (i=0; i<mbs; i++) {
1289: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1290: }
1292: /* for each row k */
1293: for (k = 0; k<mbs; k++){
1295: /*initialize k-th row with elements nonzero in row perm(k) of A */
1296: nz = ai[k+1] - ai[k];
1297: acol = aj + ai[k];
1298: aval = aa + ai[k];
1299: bval = ba + bi[k];
1300: while (nz -- ){
1301: rtmp[*acol++] = *aval++;
1302: *bval++ = 0.0; /* for in-place factorization */
1303: }
1304:
1305: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1306: dk = rtmp[k];
1307: i = jl[k]; /* first row to be added to k_th row */
1309: while (i < k){
1310: nexti = jl[i]; /* next row to be added to k_th row */
1311: /* printf("factnum, k %d, i %dn", k,i); */
1313: /* compute multiplier, update D(k) and U(i,k) */
1314: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1315: uikdi = - ba[ili]*ba[bi[i]];
1316: dk += uikdi*ba[ili];
1317: ba[ili] = uikdi; /* -U(i,k) */
1319: /* add multiple of row i to k-th row ... */
1320: jmin = ili + 1;
1321: nz = bi[i+1] - jmin;
1322: if (nz > 0){
1323: bcol = bj + jmin;
1324: bval = ba + jmin;
1325: while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1326: /* ... add i to row list for next nonzero entry */
1327: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1328: j = bj[jmin];
1329: jl[i] = jl[j]; jl[j] = i; /* update jl */
1330: }
1331: i = nexti;
1332: }
1334: /* check for zero pivot and save diagoanl element */
1335: if (PetscRealPart(dk) == 0.0){
1336: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
1337: } else if( PetscRealPart(dk) < 0.0){
1338: PetscLogInfo((PetscObject)A,"Negative pivot %g in Cholesky factorizationn",1./PetscRealPart(dk));
1339: }
1341: /* save nonzero entries in k-th row of U ... */
1342: ba[bi[k]] = 1.0/dk;
1343: jmin = bi[k]+1;
1344: nz = bi[k+1] - jmin;
1345: if (nz){
1346: bcol = bj + jmin;
1347: bval = ba + jmin;
1348: while (nz--){
1349: *bval++ = rtmp[*bcol];
1350: rtmp[*bcol++] = 0.0;
1351: }
1352: /* ... add k to row list for first nonzero entry in k-th row */
1353: il[k] = jmin;
1354: i = bj[jmin];
1355: jl[k] = jl[i]; jl[i] = k;
1356: }
1357: }
1358:
1359: PetscFree(rtmp);
1360: PetscFree(il);
1361:
1362: C->factor = FACTOR_CHOLESKY;
1363: C->assembled = PETSC_TRUE;
1364: C->preallocated = PETSC_TRUE;
1365: PetscLogFlops(b->mbs);
1366: return(0);
1367: }
1369: #undef __FUNCT__
1371: int MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,PetscReal f)
1372: {
1374: Mat C;
1377: MatCholeskyFactorSymbolic(A,perm,f,&C);
1378: MatCholeskyFactorNumeric(A,&C);
1379: MatHeaderCopy(A,C);
1380: return(0);
1381: }