Actual source code: baij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/inline/spops.h
9: #include petscsys.h
11: #include src/inline/ilu.h
15: /*@
16: MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
18: Collective on Mat
20: Input Parameters:
21: . mat - the matrix
23: Level: advanced
24: @*/
25: PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
26: {
27: PetscErrorCode ierr,(*f)(Mat);
31: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
34: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
35: if (f) {
36: (*f)(mat);
37: } else {
38: SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39: }
40: return(0);
41: }
46: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47: {
48: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
50: PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51: PetscScalar *v = a->a,*odiag,*diag,*mdiag;
54: if (a->idiagvalid) return(0);
55: MatMarkDiagonal_SeqBAIJ(A);
56: diag_offset = a->diag;
57: if (!a->idiag) {
58: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
59: }
60: diag = a->idiag;
61: mdiag = a->idiag+bs*bs*mbs;
62: /* factor and invert each block */
63: switch (bs){
64: case 2:
65: for (i=0; i<mbs; i++) {
66: odiag = v + 4*diag_offset[i];
67: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69: Kernel_A_gets_inverse_A_2(diag);
70: diag += 4;
71: mdiag += 4;
72: }
73: break;
74: case 3:
75: for (i=0; i<mbs; i++) {
76: odiag = v + 9*diag_offset[i];
77: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79: diag[8] = odiag[8];
80: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82: mdiag[8] = odiag[8];
83: Kernel_A_gets_inverse_A_3(diag);
84: diag += 9;
85: mdiag += 9;
86: }
87: break;
88: case 4:
89: for (i=0; i<mbs; i++) {
90: odiag = v + 16*diag_offset[i];
91: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
92: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
93: Kernel_A_gets_inverse_A_4(diag);
94: diag += 16;
95: mdiag += 16;
96: }
97: break;
98: case 5:
99: for (i=0; i<mbs; i++) {
100: odiag = v + 25*diag_offset[i];
101: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103: Kernel_A_gets_inverse_A_5(diag);
104: diag += 25;
105: mdiag += 25;
106: }
107: break;
108: default:
109: SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110: }
111: a->idiagvalid = PETSC_TRUE;
112: return(0);
113: }
118: PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
121: PetscScalar *x,x1,s1;
122: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
123: PetscErrorCode ierr;
124: PetscInt m = a->mbs,i,i2,nz,idx;
125: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
128: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129: its = its*lits;
130: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
136: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
138: diag = a->diag;
139: idiag = a->idiag;
140: VecGetArray(xx,&x);
141: VecGetArray(bb,(PetscScalar**)&b);
143: if (flag & SOR_ZERO_INITIAL_GUESS) {
144: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145: x[0] = b[0]*idiag[0];
146: i2 = 1;
147: idiag += 1;
148: for (i=1; i<m; i++) {
149: v = aa + ai[i];
150: vi = aj + ai[i];
151: nz = diag[i] - ai[i];
152: s1 = b[i2];
153: while (nz--) {
154: idx = (*vi++);
155: x1 = x[idx];
156: s1 -= v[0]*x1;
157: v += 1;
158: }
159: x[i2] = idiag[0]*s1;
160: idiag += 1;
161: i2 += 1;
162: }
163: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
164: PetscLogFlops(a->nz);
165: }
166: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
167: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
168: i2 = 0;
169: mdiag = a->idiag+a->mbs;
170: for (i=0; i<m; i++) {
171: x1 = x[i2];
172: x[i2] = mdiag[0]*x1;
173: mdiag += 1;
174: i2 += 1;
175: }
176: PetscLogFlops(m);
177: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
179: }
180: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
181: idiag = a->idiag+a->mbs - 1;
182: i2 = m - 1;
183: x1 = x[i2];
184: x[i2] = idiag[0]*x1;
185: idiag -= 1;
186: i2 -= 1;
187: for (i=m-2; i>=0; i--) {
188: v = aa + (diag[i]+1);
189: vi = aj + diag[i] + 1;
190: nz = ai[i+1] - diag[i] - 1;
191: s1 = x[i2];
192: while (nz--) {
193: idx = (*vi++);
194: x1 = x[idx];
195: s1 -= v[0]*x1;
196: v += 1;
197: }
198: x[i2] = idiag[0]*s1;
199: idiag -= 2;
200: i2 -= 1;
201: }
202: PetscLogFlops(a->nz);
203: }
204: } else {
205: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
206: }
207: VecRestoreArray(xx,&x);
208: VecRestoreArray(bb,(PetscScalar**)&b);
209: return(0);
210: }
214: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
215: {
216: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
217: PetscScalar *x,x1,x2,s1,s2;
218: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
219: PetscErrorCode ierr;
220: PetscInt m = a->mbs,i,i2,nz,idx;
221: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
224: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
225: its = its*lits;
226: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
227: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
228: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
229: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
230: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
232: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
234: diag = a->diag;
235: idiag = a->idiag;
236: VecGetArray(xx,&x);
237: VecGetArray(bb,(PetscScalar**)&b);
239: if (flag & SOR_ZERO_INITIAL_GUESS) {
240: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
241: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
242: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
243: i2 = 2;
244: idiag += 4;
245: for (i=1; i<m; i++) {
246: v = aa + 4*ai[i];
247: vi = aj + ai[i];
248: nz = diag[i] - ai[i];
249: s1 = b[i2]; s2 = b[i2+1];
250: while (nz--) {
251: idx = 2*(*vi++);
252: x1 = x[idx]; x2 = x[1+idx];
253: s1 -= v[0]*x1 + v[2]*x2;
254: s2 -= v[1]*x1 + v[3]*x2;
255: v += 4;
256: }
257: x[i2] = idiag[0]*s1 + idiag[2]*s2;
258: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
259: idiag += 4;
260: i2 += 2;
261: }
262: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
263: PetscLogFlops(4*(a->nz));
264: }
265: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
266: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
267: i2 = 0;
268: mdiag = a->idiag+4*a->mbs;
269: for (i=0; i<m; i++) {
270: x1 = x[i2]; x2 = x[i2+1];
271: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
272: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
273: mdiag += 4;
274: i2 += 2;
275: }
276: PetscLogFlops(6*m);
277: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
278: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
279: }
280: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
281: idiag = a->idiag+4*a->mbs - 4;
282: i2 = 2*m - 2;
283: x1 = x[i2]; x2 = x[i2+1];
284: x[i2] = idiag[0]*x1 + idiag[2]*x2;
285: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
286: idiag -= 4;
287: i2 -= 2;
288: for (i=m-2; i>=0; i--) {
289: v = aa + 4*(diag[i]+1);
290: vi = aj + diag[i] + 1;
291: nz = ai[i+1] - diag[i] - 1;
292: s1 = x[i2]; s2 = x[i2+1];
293: while (nz--) {
294: idx = 2*(*vi++);
295: x1 = x[idx]; x2 = x[1+idx];
296: s1 -= v[0]*x1 + v[2]*x2;
297: s2 -= v[1]*x1 + v[3]*x2;
298: v += 4;
299: }
300: x[i2] = idiag[0]*s1 + idiag[2]*s2;
301: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
302: idiag -= 4;
303: i2 -= 2;
304: }
305: PetscLogFlops(4*(a->nz));
306: }
307: } else {
308: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
309: }
310: VecRestoreArray(xx,&x);
311: VecRestoreArray(bb,(PetscScalar**)&b);
312: return(0);
313: }
317: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
318: {
319: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
320: PetscScalar *x,x1,x2,x3,s1,s2,s3;
321: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
322: PetscErrorCode ierr;
323: PetscInt m = a->mbs,i,i2,nz,idx;
324: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
327: its = its*lits;
328: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
329: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
330: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
331: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
332: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
333: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
335: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
337: diag = a->diag;
338: idiag = a->idiag;
339: VecGetArray(xx,&x);
340: VecGetArray(bb,(PetscScalar**)&b);
342: if (flag & SOR_ZERO_INITIAL_GUESS) {
343: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
344: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
345: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
346: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
347: i2 = 3;
348: idiag += 9;
349: for (i=1; i<m; i++) {
350: v = aa + 9*ai[i];
351: vi = aj + ai[i];
352: nz = diag[i] - ai[i];
353: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
354: while (nz--) {
355: idx = 3*(*vi++);
356: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
357: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
358: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
359: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
360: v += 9;
361: }
362: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
363: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
364: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
365: idiag += 9;
366: i2 += 3;
367: }
368: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
369: PetscLogFlops(9*(a->nz));
370: }
371: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
372: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
373: i2 = 0;
374: mdiag = a->idiag+9*a->mbs;
375: for (i=0; i<m; i++) {
376: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
377: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
378: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
379: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
380: mdiag += 9;
381: i2 += 3;
382: }
383: PetscLogFlops(15*m);
384: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
385: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
386: }
387: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
388: idiag = a->idiag+9*a->mbs - 9;
389: i2 = 3*m - 3;
390: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
391: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
392: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
393: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
394: idiag -= 9;
395: i2 -= 3;
396: for (i=m-2; i>=0; i--) {
397: v = aa + 9*(diag[i]+1);
398: vi = aj + diag[i] + 1;
399: nz = ai[i+1] - diag[i] - 1;
400: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
401: while (nz--) {
402: idx = 3*(*vi++);
403: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
404: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
405: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
406: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
407: v += 9;
408: }
409: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
410: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
411: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
412: idiag -= 9;
413: i2 -= 3;
414: }
415: PetscLogFlops(9*(a->nz));
416: }
417: } else {
418: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
419: }
420: VecRestoreArray(xx,&x);
421: VecRestoreArray(bb,(PetscScalar**)&b);
422: return(0);
423: }
427: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
428: {
429: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
430: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
431: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
432: PetscErrorCode ierr;
433: PetscInt m = a->mbs,i,i2,nz,idx;
434: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
437: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
438: its = its*lits;
439: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
440: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
441: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
442: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
443: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
445: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
447: diag = a->diag;
448: idiag = a->idiag;
449: VecGetArray(xx,&x);
450: VecGetArray(bb,(PetscScalar**)&b);
452: if (flag & SOR_ZERO_INITIAL_GUESS) {
453: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
454: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
455: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
456: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
457: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
458: i2 = 4;
459: idiag += 16;
460: for (i=1; i<m; i++) {
461: v = aa + 16*ai[i];
462: vi = aj + ai[i];
463: nz = diag[i] - ai[i];
464: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
465: while (nz--) {
466: idx = 4*(*vi++);
467: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
468: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
469: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
470: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
471: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
472: v += 16;
473: }
474: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
475: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
476: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
477: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
478: idiag += 16;
479: i2 += 4;
480: }
481: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
482: PetscLogFlops(16*(a->nz));
483: }
484: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
485: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
486: i2 = 0;
487: mdiag = a->idiag+16*a->mbs;
488: for (i=0; i<m; i++) {
489: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
490: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
491: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
492: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
493: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
494: mdiag += 16;
495: i2 += 4;
496: }
497: PetscLogFlops(28*m);
498: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
499: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
500: }
501: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
502: idiag = a->idiag+16*a->mbs - 16;
503: i2 = 4*m - 4;
504: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
505: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
506: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
507: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
508: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
509: idiag -= 16;
510: i2 -= 4;
511: for (i=m-2; i>=0; i--) {
512: v = aa + 16*(diag[i]+1);
513: vi = aj + diag[i] + 1;
514: nz = ai[i+1] - diag[i] - 1;
515: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
516: while (nz--) {
517: idx = 4*(*vi++);
518: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
519: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
520: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
521: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
522: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
523: v += 16;
524: }
525: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
526: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
527: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
528: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
529: idiag -= 16;
530: i2 -= 4;
531: }
532: PetscLogFlops(16*(a->nz));
533: }
534: } else {
535: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
536: }
537: VecRestoreArray(xx,&x);
538: VecRestoreArray(bb,(PetscScalar**)&b);
539: return(0);
540: }
544: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
545: {
546: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
547: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
548: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
549: PetscErrorCode ierr;
550: PetscInt m = a->mbs,i,i2,nz,idx;
551: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
554: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
555: its = its*lits;
556: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
557: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
558: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
559: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
560: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
562: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
564: diag = a->diag;
565: idiag = a->idiag;
566: VecGetArray(xx,&x);
567: VecGetArray(bb,(PetscScalar**)&b);
569: if (flag & SOR_ZERO_INITIAL_GUESS) {
570: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
571: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
572: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
573: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
574: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
575: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
576: i2 = 5;
577: idiag += 25;
578: for (i=1; i<m; i++) {
579: v = aa + 25*ai[i];
580: vi = aj + ai[i];
581: nz = diag[i] - ai[i];
582: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
583: while (nz--) {
584: idx = 5*(*vi++);
585: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
586: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
587: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
588: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
589: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
590: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
591: v += 25;
592: }
593: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
594: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
595: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
596: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
597: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
598: idiag += 25;
599: i2 += 5;
600: }
601: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
602: PetscLogFlops(25*(a->nz));
603: }
604: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
605: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
606: i2 = 0;
607: mdiag = a->idiag+25*a->mbs;
608: for (i=0; i<m; i++) {
609: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
610: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
611: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
612: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
613: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
614: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
615: mdiag += 25;
616: i2 += 5;
617: }
618: PetscLogFlops(45*m);
619: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
620: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
621: }
622: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
623: idiag = a->idiag+25*a->mbs - 25;
624: i2 = 5*m - 5;
625: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
626: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
627: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
628: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
629: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
630: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
631: idiag -= 25;
632: i2 -= 5;
633: for (i=m-2; i>=0; i--) {
634: v = aa + 25*(diag[i]+1);
635: vi = aj + diag[i] + 1;
636: nz = ai[i+1] - diag[i] - 1;
637: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
638: while (nz--) {
639: idx = 5*(*vi++);
640: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
641: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
642: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
643: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
644: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
645: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
646: v += 25;
647: }
648: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
649: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
650: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
651: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
652: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
653: idiag -= 25;
654: i2 -= 5;
655: }
656: PetscLogFlops(25*(a->nz));
657: }
658: } else {
659: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
660: }
661: VecRestoreArray(xx,&x);
662: VecRestoreArray(bb,(PetscScalar**)&b);
663: return(0);
664: }
668: PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
669: {
670: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
671: PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
672: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
673: PetscErrorCode ierr;
674: PetscInt m = a->mbs,i,i2,nz,idx;
675: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
678: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
679: its = its*lits;
680: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
681: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
682: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
683: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
684: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
686: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
688: diag = a->diag;
689: idiag = a->idiag;
690: VecGetArray(xx,&x);
691: VecGetArray(bb,(PetscScalar**)&b);
693: if (flag & SOR_ZERO_INITIAL_GUESS) {
694: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
695: x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
696: x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
697: x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
698: x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
699: x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
700: x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
701: i2 = 6;
702: idiag += 36;
703: for (i=1; i<m; i++) {
704: v = aa + 36*ai[i];
705: vi = aj + ai[i];
706: nz = diag[i] - ai[i];
707: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
708: while (nz--) {
709: idx = 6*(*vi++);
710: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
711: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
712: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
713: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
714: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
715: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
716: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
717: v += 36;
718: }
719: x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
720: x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
721: x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
722: x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
723: x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
724: x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
725: idiag += 36;
726: i2 += 6;
727: }
728: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
729: PetscLogFlops(36*(a->nz));
730: }
731: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
732: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
733: i2 = 0;
734: mdiag = a->idiag+36*a->mbs;
735: for (i=0; i<m; i++) {
736: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
737: x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
738: x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
739: x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
740: x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
741: x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
742: x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
743: mdiag += 36;
744: i2 += 6;
745: }
746: PetscLogFlops(60*m);
747: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
748: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
749: }
750: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
751: idiag = a->idiag+36*a->mbs - 36;
752: i2 = 6*m - 6;
753: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
754: x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
755: x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
756: x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
757: x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
758: x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
759: x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
760: idiag -= 36;
761: i2 -= 6;
762: for (i=m-2; i>=0; i--) {
763: v = aa + 36*(diag[i]+1);
764: vi = aj + diag[i] + 1;
765: nz = ai[i+1] - diag[i] - 1;
766: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
767: while (nz--) {
768: idx = 6*(*vi++);
769: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
770: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
771: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
772: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
773: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
774: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
775: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
776: v += 36;
777: }
778: x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
779: x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
780: x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
781: x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
782: x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
783: x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
784: idiag -= 36;
785: i2 -= 6;
786: }
787: PetscLogFlops(36*(a->nz));
788: }
789: } else {
790: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
791: }
792: VecRestoreArray(xx,&x);
793: VecRestoreArray(bb,(PetscScalar**)&b);
794: return(0);
795: }
799: PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
800: {
801: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
802: PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
803: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
804: PetscErrorCode ierr;
805: PetscInt m = a->mbs,i,i2,nz,idx;
806: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
809: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
810: its = its*lits;
811: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
812: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
813: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
814: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
815: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
817: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
819: diag = a->diag;
820: idiag = a->idiag;
821: VecGetArray(xx,&x);
822: VecGetArray(bb,(PetscScalar**)&b);
824: if (flag & SOR_ZERO_INITIAL_GUESS) {
825: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
826: x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
827: x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
828: x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
829: x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
830: x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
831: x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
832: x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
833: i2 = 7;
834: idiag += 49;
835: for (i=1; i<m; i++) {
836: v = aa + 49*ai[i];
837: vi = aj + ai[i];
838: nz = diag[i] - ai[i];
839: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
840: while (nz--) {
841: idx = 7*(*vi++);
842: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
843: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
844: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
845: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
846: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
847: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
848: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
849: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
850: v += 49;
851: }
852: x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
853: x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
854: x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
855: x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
856: x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
857: x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
858: x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
859: idiag += 49;
860: i2 += 7;
861: }
862: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
863: PetscLogFlops(49*(a->nz));
864: }
865: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
866: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
867: i2 = 0;
868: mdiag = a->idiag+49*a->mbs;
869: for (i=0; i<m; i++) {
870: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
871: x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
872: x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
873: x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
874: x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
875: x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
876: x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
877: x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
878: mdiag += 36;
879: i2 += 6;
880: }
881: PetscLogFlops(93*m);
882: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
883: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
884: }
885: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
886: idiag = a->idiag+49*a->mbs - 49;
887: i2 = 7*m - 7;
888: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
889: x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
890: x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
891: x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
892: x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
893: x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
894: x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
895: x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
896: idiag -= 49;
897: i2 -= 7;
898: for (i=m-2; i>=0; i--) {
899: v = aa + 49*(diag[i]+1);
900: vi = aj + diag[i] + 1;
901: nz = ai[i+1] - diag[i] - 1;
902: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
903: while (nz--) {
904: idx = 7*(*vi++);
905: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
906: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
907: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
908: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
909: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
910: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
911: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
912: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
913: v += 49;
914: }
915: x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
916: x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
917: x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
918: x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
919: x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
920: x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
921: x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
922: idiag -= 49;
923: i2 -= 7;
924: }
925: PetscLogFlops(49*(a->nz));
926: }
927: } else {
928: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
929: }
930: VecRestoreArray(xx,&x);
931: VecRestoreArray(bb,(PetscScalar**)&b);
932: return(0);
933: }
935: /*
936: Special version for direct calls from Fortran (Used in PETSc-fun3d)
937: */
938: #if defined(PETSC_HAVE_FORTRAN_CAPS)
939: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
940: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
941: #define matsetvaluesblocked4_ matsetvaluesblocked4
942: #endif
947: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
948: {
949: Mat A = *AA;
950: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
951: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
952: PetscInt *ai=a->i,*ailen=a->ilen;
953: PetscInt *aj=a->j,stepval,lastcol = -1;
954: const PetscScalar *value = v;
955: MatScalar *ap,*aa = a->a,*bap;
958: if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
959: stepval = (n-1)*4;
960: for (k=0; k<m; k++) { /* loop over added rows */
961: row = im[k];
962: rp = aj + ai[row];
963: ap = aa + 16*ai[row];
964: nrow = ailen[row];
965: low = 0;
966: high = nrow;
967: for (l=0; l<n; l++) { /* loop over added columns */
968: col = in[l];
969: if (col <= lastcol) low = 0; else high = nrow;
970: lastcol = col;
971: value = v + k*(stepval+4 + l)*4;
972: while (high-low > 7) {
973: t = (low+high)/2;
974: if (rp[t] > col) high = t;
975: else low = t;
976: }
977: for (i=low; i<high; i++) {
978: if (rp[i] > col) break;
979: if (rp[i] == col) {
980: bap = ap + 16*i;
981: for (ii=0; ii<4; ii++,value+=stepval) {
982: for (jj=ii; jj<16; jj+=4) {
983: bap[jj] += *value++;
984: }
985: }
986: goto noinsert2;
987: }
988: }
989: N = nrow++ - 1;
990: high++; /* added new column index thus must search to one higher than before */
991: /* shift up all the later entries in this row */
992: for (ii=N; ii>=i; ii--) {
993: rp[ii+1] = rp[ii];
994: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
995: }
996: if (N >= i) {
997: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
998: }
999: rp[i] = col;
1000: bap = ap + 16*i;
1001: for (ii=0; ii<4; ii++,value+=stepval) {
1002: for (jj=ii; jj<16; jj+=4) {
1003: bap[jj] = *value++;
1004: }
1005: }
1006: noinsert2:;
1007: low = i;
1008: }
1009: ailen[row] = nrow;
1010: }
1011: PetscFunctionReturnVoid();
1012: }
1015: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1016: #define matsetvalues4_ MATSETVALUES4
1017: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1018: #define matsetvalues4_ matsetvalues4
1019: #endif
1024: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1025: {
1026: Mat A = *AA;
1027: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1028: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1029: PetscInt *ai=a->i,*ailen=a->ilen;
1030: PetscInt *aj=a->j,brow,bcol;
1031: PetscInt ridx,cidx,lastcol = -1;
1032: MatScalar *ap,value,*aa=a->a,*bap;
1033:
1035: for (k=0; k<m; k++) { /* loop over added rows */
1036: row = im[k]; brow = row/4;
1037: rp = aj + ai[brow];
1038: ap = aa + 16*ai[brow];
1039: nrow = ailen[brow];
1040: low = 0;
1041: high = nrow;
1042: for (l=0; l<n; l++) { /* loop over added columns */
1043: col = in[l]; bcol = col/4;
1044: ridx = row % 4; cidx = col % 4;
1045: value = v[l + k*n];
1046: if (col <= lastcol) low = 0; else high = nrow;
1047: lastcol = col;
1048: while (high-low > 7) {
1049: t = (low+high)/2;
1050: if (rp[t] > bcol) high = t;
1051: else low = t;
1052: }
1053: for (i=low; i<high; i++) {
1054: if (rp[i] > bcol) break;
1055: if (rp[i] == bcol) {
1056: bap = ap + 16*i + 4*cidx + ridx;
1057: *bap += value;
1058: goto noinsert1;
1059: }
1060: }
1061: N = nrow++ - 1;
1062: high++; /* added new column thus must search to one higher than before */
1063: /* shift up all the later entries in this row */
1064: for (ii=N; ii>=i; ii--) {
1065: rp[ii+1] = rp[ii];
1066: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1067: }
1068: if (N>=i) {
1069: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1070: }
1071: rp[i] = bcol;
1072: ap[16*i + 4*cidx + ridx] = value;
1073: noinsert1:;
1074: low = i;
1075: }
1076: ailen[brow] = nrow;
1077: }
1078: PetscFunctionReturnVoid();
1079: }
1082: /* UGLY, ugly, ugly
1083: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
1084: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
1085: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
1086: converts the entries into single precision and then calls ..._MatScalar() to put them
1087: into the single precision data structures.
1088: */
1089: #if defined(PETSC_USE_MAT_SINGLE)
1090: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
1091: #else
1092: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
1093: #endif
1095: #define CHUNKSIZE 10
1097: /*
1098: Checks for missing diagonals
1099: */
1102: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1103: {
1104: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1106: PetscInt *diag,*jj = a->j,i;
1109: MatMarkDiagonal_SeqBAIJ(A);
1110: *missing = PETSC_FALSE;
1111: diag = a->diag;
1112: for (i=0; i<a->mbs; i++) {
1113: if (jj[diag[i]] != i) {
1114: *missing = PETSC_TRUE;
1115: if (d) *d = i;
1116: }
1117: }
1118: return(0);
1119: }
1123: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1124: {
1125: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1127: PetscInt i,j,m = a->mbs;
1130: if (!a->diag) {
1131: PetscMalloc(m*sizeof(PetscInt),&a->diag);
1132: }
1133: for (i=0; i<m; i++) {
1134: a->diag[i] = a->i[i+1];
1135: for (j=a->i[i]; j<a->i[i+1]; j++) {
1136: if (a->j[j] == i) {
1137: a->diag[i] = j;
1138: break;
1139: }
1140: }
1141: }
1142: return(0);
1143: }
1146: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1150: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1151: {
1152: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1154: PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1155: PetscInt *tia, *tja;
1158: *nn = n;
1159: if (!ia) return(0);
1160: if (symmetric) {
1161: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1162: } else {
1163: tia = a->i; tja = a->j;
1164: }
1165:
1166: if (!blockcompressed && bs > 1) {
1167: (*nn) *= bs;
1168: nbs = bs;
1169: /* malloc & create the natural set of indices */
1170: PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1171: if (n) {
1172: (*ia)[0] = 0;
1173: for (j=1; j<bs; j++) {
1174: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1175: }
1176: }
1178: for (i=1; i<n; i++) {
1179: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1180: for (j=1; j<bs; j++) {
1181: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1182: }
1183: }
1184: if (n) {
1185: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1186: }
1188: if (ja) {
1189: PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1190: cnt = 0;
1191: for (i=0; i<n; i++) {
1192: for (j=0; j<bs; j++) {
1193: for (k=tia[i]; k<tia[i+1]; k++) {
1194: for (l=0; l<bs; l++) {
1195: (*ja)[cnt++] = bs*tja[k] + l;
1196: }
1197: }
1198: }
1199: }
1200: }
1202: n *= bs;
1203: nz *= bs*bs;
1204: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1205: PetscFree(tia);
1206: PetscFree(tja);
1207: }
1208: } else {
1209: *ia = tia;
1210: if (ja) *ja = tja;
1211: }
1212: if (oshift == 1) {
1213: for (i=0; i<n+nbs; i++) (*ia)[i]++;
1214: if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1215: }
1216: return(0);
1217: }
1221: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1222: {
1223: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1225: PetscInt i,n = a->mbs,nz = a->i[n];
1228: if (!ia) return(0);
1229: if (!blockcompressed && A->rmap.bs > 1) {
1230: PetscFree(*ia);
1231: if (ja) {PetscFree(*ja);}
1232: } else if (symmetric) {
1233: PetscFree(*ia);
1234: if (ja) {PetscFree(*ja);}
1235: } else if (oshift == 1) { /* blockcompressed */
1236: for (i=0; i<n+1; i++) a->i[i]--;
1237: if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1238: }
1239: return(0);
1240: }
1244: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1245: {
1246: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1250: #if defined(PETSC_USE_LOG)
1251: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1252: #endif
1253: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1254: if (a->row) {
1255: ISDestroy(a->row);
1256: }
1257: if (a->col) {
1258: ISDestroy(a->col);
1259: }
1260: PetscFree(a->diag);
1261: PetscFree(a->idiag);
1262: PetscFree2(a->imax,a->ilen);
1263: PetscFree(a->solve_work);
1264: PetscFree(a->mult_work);
1265: if (a->icol) {ISDestroy(a->icol);}
1266: PetscFree(a->saved_values);
1267: #if defined(PETSC_USE_MAT_SINGLE)
1268: PetscFree(a->setvaluescopy);
1269: #endif
1270: PetscFree(a->xtoy);
1271: if (a->compressedrow.use){PetscFree(a->compressedrow.i);}
1273: PetscFree(a);
1275: PetscObjectChangeTypeName((PetscObject)A,0);
1276: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
1277: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1278: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1279: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1280: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1281: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1282: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1283: return(0);
1284: }
1288: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1289: {
1290: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1294: switch (op) {
1295: case MAT_ROW_ORIENTED:
1296: a->roworiented = flg;
1297: break;
1298: case MAT_KEEP_ZEROED_ROWS:
1299: a->keepzeroedrows = flg;
1300: break;
1301: case MAT_NEW_NONZERO_LOCATIONS:
1302: a->nonew = (flg ? 0 : 1);
1303: break;
1304: case MAT_NEW_NONZERO_LOCATION_ERR:
1305: a->nonew = (flg ? -1 : 0);
1306: break;
1307: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1308: a->nonew = (flg ? -2 : 0);
1309: break;
1310: case MAT_NEW_DIAGONALS:
1311: case MAT_IGNORE_OFF_PROC_ENTRIES:
1312: case MAT_USE_HASH_TABLE:
1313: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1314: break;
1315: case MAT_SYMMETRIC:
1316: case MAT_STRUCTURALLY_SYMMETRIC:
1317: case MAT_HERMITIAN:
1318: case MAT_SYMMETRY_ETERNAL:
1319: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1320: break;
1321: default:
1322: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1323: }
1324: return(0);
1325: }
1329: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1330: {
1331: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1333: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1334: MatScalar *aa,*aa_i;
1335: PetscScalar *v_i;
1338: bs = A->rmap.bs;
1339: ai = a->i;
1340: aj = a->j;
1341: aa = a->a;
1342: bs2 = a->bs2;
1343:
1344: if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1345:
1346: bn = row/bs; /* Block number */
1347: bp = row % bs; /* Block Position */
1348: M = ai[bn+1] - ai[bn];
1349: *nz = bs*M;
1350:
1351: if (v) {
1352: *v = 0;
1353: if (*nz) {
1354: PetscMalloc((*nz)*sizeof(PetscScalar),v);
1355: for (i=0; i<M; i++) { /* for each block in the block row */
1356: v_i = *v + i*bs;
1357: aa_i = aa + bs2*(ai[bn] + i);
1358: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1359: }
1360: }
1361: }
1363: if (idx) {
1364: *idx = 0;
1365: if (*nz) {
1366: PetscMalloc((*nz)*sizeof(PetscInt),idx);
1367: for (i=0; i<M; i++) { /* for each block in the block row */
1368: idx_i = *idx + i*bs;
1369: itmp = bs*aj[ai[bn] + i];
1370: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1371: }
1372: }
1373: }
1374: return(0);
1375: }
1379: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380: {
1384: if (idx) {PetscFree(*idx);}
1385: if (v) {PetscFree(*v);}
1386: return(0);
1387: }
1391: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1392: {
1393: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1394: Mat C;
1396: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1397: PetscInt *rows,*cols,bs2=a->bs2;
1398: PetscScalar *array;
1401: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1402: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1403: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1405: #if defined(PETSC_USE_MAT_SINGLE)
1406: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1407: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1408: #else
1409: array = a->a;
1410: #endif
1412: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1413: MatCreate(((PetscObject)A)->comm,&C);
1414: MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1415: MatSetType(C,((PetscObject)A)->type_name);
1416: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1417: PetscFree(col);
1418: PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1419: cols = rows + bs;
1420: for (i=0; i<mbs; i++) {
1421: cols[0] = i*bs;
1422: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1423: len = ai[i+1] - ai[i];
1424: for (j=0; j<len; j++) {
1425: rows[0] = (*aj++)*bs;
1426: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1427: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1428: array += bs2;
1429: }
1430: }
1431: PetscFree(rows);
1432: #if defined(PETSC_USE_MAT_SINGLE)
1433: PetscFree(array);
1434: #endif
1435:
1436: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1437: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1438:
1439: if (B) {
1440: *B = C;
1441: } else {
1442: MatHeaderCopy(A,C);
1443: }
1444: return(0);
1445: }
1449: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1450: {
1451: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1453: PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1454: int fd;
1455: PetscScalar *aa;
1456: FILE *file;
1459: PetscViewerBinaryGetDescriptor(viewer,&fd);
1460: PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1461: col_lens[0] = MAT_FILE_COOKIE;
1463: col_lens[1] = A->rmap.N;
1464: col_lens[2] = A->cmap.n;
1465: col_lens[3] = a->nz*bs2;
1467: /* store lengths of each row and write (including header) to file */
1468: count = 0;
1469: for (i=0; i<a->mbs; i++) {
1470: for (j=0; j<bs; j++) {
1471: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1472: }
1473: }
1474: PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1475: PetscFree(col_lens);
1477: /* store column indices (zero start index) */
1478: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1479: count = 0;
1480: for (i=0; i<a->mbs; i++) {
1481: for (j=0; j<bs; j++) {
1482: for (k=a->i[i]; k<a->i[i+1]; k++) {
1483: for (l=0; l<bs; l++) {
1484: jj[count++] = bs*a->j[k] + l;
1485: }
1486: }
1487: }
1488: }
1489: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1490: PetscFree(jj);
1492: /* store nonzero values */
1493: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1494: count = 0;
1495: for (i=0; i<a->mbs; i++) {
1496: for (j=0; j<bs; j++) {
1497: for (k=a->i[i]; k<a->i[i+1]; k++) {
1498: for (l=0; l<bs; l++) {
1499: aa[count++] = a->a[bs2*k + l*bs + j];
1500: }
1501: }
1502: }
1503: }
1504: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1505: PetscFree(aa);
1507: PetscViewerBinaryGetInfoPointer(viewer,&file);
1508: if (file) {
1509: fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1510: }
1511: return(0);
1512: }
1516: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1517: {
1518: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1519: PetscErrorCode ierr;
1520: PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1521: PetscViewerFormat format;
1524: PetscViewerGetFormat(viewer,&format);
1525: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1526: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1527: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1528: Mat aij;
1529: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1530: MatView(aij,viewer);
1531: MatDestroy(aij);
1532: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1533: return(0);
1534: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1535: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1536: for (i=0; i<a->mbs; i++) {
1537: for (j=0; j<bs; j++) {
1538: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1539: for (k=a->i[i]; k<a->i[i+1]; k++) {
1540: for (l=0; l<bs; l++) {
1541: #if defined(PETSC_USE_COMPLEX)
1542: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1543: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1544: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1545: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1546: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1547: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1548: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1549: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1550: }
1551: #else
1552: if (a->a[bs2*k + l*bs + j] != 0.0) {
1553: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1554: }
1555: #endif
1556: }
1557: }
1558: PetscViewerASCIIPrintf(viewer,"\n");
1559: }
1560: }
1561: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1562: } else {
1563: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1564: for (i=0; i<a->mbs; i++) {
1565: for (j=0; j<bs; j++) {
1566: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1567: for (k=a->i[i]; k<a->i[i+1]; k++) {
1568: for (l=0; l<bs; l++) {
1569: #if defined(PETSC_USE_COMPLEX)
1570: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1571: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1572: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1573: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1574: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1575: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1576: } else {
1577: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1578: }
1579: #else
1580: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1581: #endif
1582: }
1583: }
1584: PetscViewerASCIIPrintf(viewer,"\n");
1585: }
1586: }
1587: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1588: }
1589: PetscViewerFlush(viewer);
1590: return(0);
1591: }
1595: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1596: {
1597: Mat A = (Mat) Aa;
1598: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1600: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1601: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1602: MatScalar *aa;
1603: PetscViewer viewer;
1607: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1608: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1610: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1612: /* loop over matrix elements drawing boxes */
1613: color = PETSC_DRAW_BLUE;
1614: for (i=0,row=0; i<mbs; i++,row+=bs) {
1615: for (j=a->i[i]; j<a->i[i+1]; j++) {
1616: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1617: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1618: aa = a->a + j*bs2;
1619: for (k=0; k<bs; k++) {
1620: for (l=0; l<bs; l++) {
1621: if (PetscRealPart(*aa++) >= 0.) continue;
1622: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1623: }
1624: }
1625: }
1626: }
1627: color = PETSC_DRAW_CYAN;
1628: for (i=0,row=0; i<mbs; i++,row+=bs) {
1629: for (j=a->i[i]; j<a->i[i+1]; j++) {
1630: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1631: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1632: aa = a->a + j*bs2;
1633: for (k=0; k<bs; k++) {
1634: for (l=0; l<bs; l++) {
1635: if (PetscRealPart(*aa++) != 0.) continue;
1636: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1637: }
1638: }
1639: }
1640: }
1642: color = PETSC_DRAW_RED;
1643: for (i=0,row=0; i<mbs; i++,row+=bs) {
1644: for (j=a->i[i]; j<a->i[i+1]; j++) {
1645: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1646: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1647: aa = a->a + j*bs2;
1648: for (k=0; k<bs; k++) {
1649: for (l=0; l<bs; l++) {
1650: if (PetscRealPart(*aa++) <= 0.) continue;
1651: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1652: }
1653: }
1654: }
1655: }
1656: return(0);
1657: }
1661: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1662: {
1664: PetscReal xl,yl,xr,yr,w,h;
1665: PetscDraw draw;
1666: PetscTruth isnull;
1670: PetscViewerDrawGetDraw(viewer,0,&draw);
1671: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1673: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1674: xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1675: xr += w; yr += h; xl = -w; yl = -h;
1676: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1677: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1678: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1679: return(0);
1680: }
1684: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1685: {
1687: PetscTruth iascii,isbinary,isdraw;
1690: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1691: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1692: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1693: if (iascii){
1694: MatView_SeqBAIJ_ASCII(A,viewer);
1695: } else if (isbinary) {
1696: MatView_SeqBAIJ_Binary(A,viewer);
1697: } else if (isdraw) {
1698: MatView_SeqBAIJ_Draw(A,viewer);
1699: } else {
1700: Mat B;
1701: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1702: MatView(B,viewer);
1703: MatDestroy(B);
1704: }
1705: return(0);
1706: }
1711: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1712: {
1713: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1714: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1715: PetscInt *ai = a->i,*ailen = a->ilen;
1716: PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1717: MatScalar *ap,*aa = a->a;
1720: for (k=0; k<m; k++) { /* loop over rows */
1721: row = im[k]; brow = row/bs;
1722: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1723: if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1724: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1725: nrow = ailen[brow];
1726: for (l=0; l<n; l++) { /* loop over columns */
1727: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1728: if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1729: col = in[l] ;
1730: bcol = col/bs;
1731: cidx = col%bs;
1732: ridx = row%bs;
1733: high = nrow;
1734: low = 0; /* assume unsorted */
1735: while (high-low > 5) {
1736: t = (low+high)/2;
1737: if (rp[t] > bcol) high = t;
1738: else low = t;
1739: }
1740: for (i=low; i<high; i++) {
1741: if (rp[i] > bcol) break;
1742: if (rp[i] == bcol) {
1743: *v++ = ap[bs2*i+bs*cidx+ridx];
1744: goto finished;
1745: }
1746: }
1747: *v++ = 0.0;
1748: finished:;
1749: }
1750: }
1751: return(0);
1752: }
1754: #if defined(PETSC_USE_MAT_SINGLE)
1757: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1758: {
1759: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1761: PetscInt i,N = m*n*b->bs2;
1762: MatScalar *vsingle;
1765: if (N > b->setvalueslen) {
1766: PetscFree(b->setvaluescopy);
1767: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1768: b->setvalueslen = N;
1769: }
1770: vsingle = b->setvaluescopy;
1771: for (i=0; i<N; i++) {
1772: vsingle[i] = v[i];
1773: }
1774: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1775: return(0);
1776: }
1777: #endif
1782: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1783: {
1784: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1785: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1786: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1787: PetscErrorCode ierr;
1788: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1789: PetscTruth roworiented=a->roworiented;
1790: const MatScalar *value = v;
1791: MatScalar *ap,*aa = a->a,*bap;
1794: if (roworiented) {
1795: stepval = (n-1)*bs;
1796: } else {
1797: stepval = (m-1)*bs;
1798: }
1799: for (k=0; k<m; k++) { /* loop over added rows */
1800: row = im[k];
1801: if (row < 0) continue;
1802: #if defined(PETSC_USE_DEBUG)
1803: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1804: #endif
1805: rp = aj + ai[row];
1806: ap = aa + bs2*ai[row];
1807: rmax = imax[row];
1808: nrow = ailen[row];
1809: low = 0;
1810: high = nrow;
1811: for (l=0; l<n; l++) { /* loop over added columns */
1812: if (in[l] < 0) continue;
1813: #if defined(PETSC_USE_DEBUG)
1814: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1815: #endif
1816: col = in[l];
1817: if (roworiented) {
1818: value = v + k*(stepval+bs)*bs + l*bs;
1819: } else {
1820: value = v + l*(stepval+bs)*bs + k*bs;
1821: }
1822: if (col <= lastcol) low = 0; else high = nrow;
1823: lastcol = col;
1824: while (high-low > 7) {
1825: t = (low+high)/2;
1826: if (rp[t] > col) high = t;
1827: else low = t;
1828: }
1829: for (i=low; i<high; i++) {
1830: if (rp[i] > col) break;
1831: if (rp[i] == col) {
1832: bap = ap + bs2*i;
1833: if (roworiented) {
1834: if (is == ADD_VALUES) {
1835: for (ii=0; ii<bs; ii++,value+=stepval) {
1836: for (jj=ii; jj<bs2; jj+=bs) {
1837: bap[jj] += *value++;
1838: }
1839: }
1840: } else {
1841: for (ii=0; ii<bs; ii++,value+=stepval) {
1842: for (jj=ii; jj<bs2; jj+=bs) {
1843: bap[jj] = *value++;
1844: }
1845: }
1846: }
1847: } else {
1848: if (is == ADD_VALUES) {
1849: for (ii=0; ii<bs; ii++,value+=stepval) {
1850: for (jj=0; jj<bs; jj++) {
1851: *bap++ += *value++;
1852: }
1853: }
1854: } else {
1855: for (ii=0; ii<bs; ii++,value+=stepval) {
1856: for (jj=0; jj<bs; jj++) {
1857: *bap++ = *value++;
1858: }
1859: }
1860: }
1861: }
1862: goto noinsert2;
1863: }
1864: }
1865: if (nonew == 1) goto noinsert2;
1866: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1867: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868: N = nrow++ - 1; high++;
1869: /* shift up all the later entries in this row */
1870: for (ii=N; ii>=i; ii--) {
1871: rp[ii+1] = rp[ii];
1872: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1873: }
1874: if (N >= i) {
1875: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1876: }
1877: rp[i] = col;
1878: bap = ap + bs2*i;
1879: if (roworiented) {
1880: for (ii=0; ii<bs; ii++,value+=stepval) {
1881: for (jj=ii; jj<bs2; jj+=bs) {
1882: bap[jj] = *value++;
1883: }
1884: }
1885: } else {
1886: for (ii=0; ii<bs; ii++,value+=stepval) {
1887: for (jj=0; jj<bs; jj++) {
1888: *bap++ = *value++;
1889: }
1890: }
1891: }
1892: noinsert2:;
1893: low = i;
1894: }
1895: ailen[row] = nrow;
1896: }
1897: return(0);
1898: }
1902: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1903: {
1904: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1905: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1906: PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen;
1908: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1909: MatScalar *aa = a->a,*ap;
1910: PetscReal ratio=0.6;
1913: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1915: if (m) rmax = ailen[0];
1916: for (i=1; i<mbs; i++) {
1917: /* move each row back by the amount of empty slots (fshift) before it*/
1918: fshift += imax[i-1] - ailen[i-1];
1919: rmax = PetscMax(rmax,ailen[i]);
1920: if (fshift) {
1921: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1922: N = ailen[i];
1923: for (j=0; j<N; j++) {
1924: ip[j-fshift] = ip[j];
1925: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1926: }
1927: }
1928: ai[i] = ai[i-1] + ailen[i-1];
1929: }
1930: if (mbs) {
1931: fshift += imax[mbs-1] - ailen[mbs-1];
1932: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933: }
1934: /* reset ilen and imax for each row */
1935: for (i=0; i<mbs; i++) {
1936: ailen[i] = imax[i] = ai[i+1] - ai[i];
1937: }
1938: a->nz = ai[mbs];
1940: /* diagonals may have moved, so kill the diagonal pointers */
1941: a->idiagvalid = PETSC_FALSE;
1942: if (fshift && a->diag) {
1943: PetscFree(a->diag);
1944: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1945: a->diag = 0;
1946: }
1947: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);
1948: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1949: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1950: a->reallocs = 0;
1951: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1953: /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1954: if (a->compressedrow.use){
1955: Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1956: }
1958: A->same_nonzero = PETSC_TRUE;
1959: return(0);
1960: }
1962: /*
1963: This function returns an array of flags which indicate the locations of contiguous
1964: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1965: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1966: Assume: sizes should be long enough to hold all the values.
1967: */
1970: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1971: {
1972: PetscInt i,j,k,row;
1973: PetscTruth flg;
1976: for (i=0,j=0; i<n; j++) {
1977: row = idx[i];
1978: if (row%bs!=0) { /* Not the begining of a block */
1979: sizes[j] = 1;
1980: i++;
1981: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1982: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1983: i++;
1984: } else { /* Begining of the block, so check if the complete block exists */
1985: flg = PETSC_TRUE;
1986: for (k=1; k<bs; k++) {
1987: if (row+k != idx[i+k]) { /* break in the block */
1988: flg = PETSC_FALSE;
1989: break;
1990: }
1991: }
1992: if (flg) { /* No break in the bs */
1993: sizes[j] = bs;
1994: i+= bs;
1995: } else {
1996: sizes[j] = 1;
1997: i++;
1998: }
1999: }
2000: }
2001: *bs_max = j;
2002: return(0);
2003: }
2004:
2007: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2008: {
2009: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2011: PetscInt i,j,k,count,*rows;
2012: PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
2013: PetscScalar zero = 0.0;
2014: MatScalar *aa;
2017: /* Make a copy of the IS and sort it */
2018: /* allocate memory for rows,sizes */
2019: PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
2020: sizes = rows + is_n;
2022: /* copy IS values to rows, and sort them */
2023: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2024: PetscSortInt(is_n,rows);
2025: if (baij->keepzeroedrows) {
2026: for (i=0; i<is_n; i++) { sizes[i] = 1; }
2027: bs_max = is_n;
2028: A->same_nonzero = PETSC_TRUE;
2029: } else {
2030: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2031: A->same_nonzero = PETSC_FALSE;
2032: }
2034: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2035: row = rows[j];
2036: if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2037: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2038: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2039: if (sizes[i] == bs && !baij->keepzeroedrows) {
2040: if (diag != 0.0) {
2041: if (baij->ilen[row/bs] > 0) {
2042: baij->ilen[row/bs] = 1;
2043: baij->j[baij->i[row/bs]] = row/bs;
2044: PetscMemzero(aa,count*bs*sizeof(MatScalar));
2045: }
2046: /* Now insert all the diagonal values for this bs */
2047: for (k=0; k<bs; k++) {
2048: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2049: }
2050: } else { /* (diag == 0.0) */
2051: baij->ilen[row/bs] = 0;
2052: } /* end (diag == 0.0) */
2053: } else { /* (sizes[i] != bs) */
2054: #if defined (PETSC_USE_DEBUG)
2055: if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2056: #endif
2057: for (k=0; k<count; k++) {
2058: aa[0] = zero;
2059: aa += bs;
2060: }
2061: if (diag != 0.0) {
2062: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2063: }
2064: }
2065: }
2067: PetscFree(rows);
2068: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2069: return(0);
2070: }
2074: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2075: {
2076: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2077: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2078: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2079: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2081: PetscInt ridx,cidx,bs2=a->bs2;
2082: PetscTruth roworiented=a->roworiented;
2083: MatScalar *ap,value,*aa=a->a,*bap;
2086: for (k=0; k<m; k++) { /* loop over added rows */
2087: row = im[k];
2088: brow = row/bs;
2089: if (row < 0) continue;
2090: #if defined(PETSC_USE_DEBUG)
2091: if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2092: #endif
2093: rp = aj + ai[brow];
2094: ap = aa + bs2*ai[brow];
2095: rmax = imax[brow];
2096: nrow = ailen[brow];
2097: low = 0;
2098: high = nrow;
2099: for (l=0; l<n; l++) { /* loop over added columns */
2100: if (in[l] < 0) continue;
2101: #if defined(PETSC_USE_DEBUG)
2102: if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2103: #endif
2104: col = in[l]; bcol = col/bs;
2105: ridx = row % bs; cidx = col % bs;
2106: if (roworiented) {
2107: value = v[l + k*n];
2108: } else {
2109: value = v[k + l*m];
2110: }
2111: if (col <= lastcol) low = 0; else high = nrow;
2112: lastcol = col;
2113: while (high-low > 7) {
2114: t = (low+high)/2;
2115: if (rp[t] > bcol) high = t;
2116: else low = t;
2117: }
2118: for (i=low; i<high; i++) {
2119: if (rp[i] > bcol) break;
2120: if (rp[i] == bcol) {
2121: bap = ap + bs2*i + bs*cidx + ridx;
2122: if (is == ADD_VALUES) *bap += value;
2123: else *bap = value;
2124: goto noinsert1;
2125: }
2126: }
2127: if (nonew == 1) goto noinsert1;
2128: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2129: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2130: N = nrow++ - 1; high++;
2131: /* shift up all the later entries in this row */
2132: for (ii=N; ii>=i; ii--) {
2133: rp[ii+1] = rp[ii];
2134: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2135: }
2136: if (N>=i) {
2137: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2138: }
2139: rp[i] = bcol;
2140: ap[bs2*i + bs*cidx + ridx] = value;
2141: a->nz++;
2142: noinsert1:;
2143: low = i;
2144: }
2145: ailen[brow] = nrow;
2146: }
2147: A->same_nonzero = PETSC_FALSE;
2148: return(0);
2149: }
2154: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2155: {
2156: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2157: Mat outA;
2159: PetscTruth row_identity,col_identity;
2162: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2163: ISIdentity(row,&row_identity);
2164: ISIdentity(col,&col_identity);
2165: if (!row_identity || !col_identity) {
2166: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2167: }
2169: outA = inA;
2170: inA->factor = FACTOR_LU;
2172: MatMarkDiagonal_SeqBAIJ(inA);
2174: PetscObjectReference((PetscObject)row);
2175: if (a->row) { ISDestroy(a->row); }
2176: a->row = row;
2177: PetscObjectReference((PetscObject)col);
2178: if (a->col) { ISDestroy(a->col); }
2179: a->col = col;
2180:
2181: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2182: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2183: PetscLogObjectParent(inA,a->icol);
2184:
2185: /*
2186: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
2187: for ILU(0) factorization with natural ordering
2188: */
2189: if (inA->rmap.bs < 8) {
2190: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
2191: } else {
2192: if (!a->solve_work) {
2193: PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);
2194: PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));
2195: }
2196: }
2198: MatLUFactorNumeric(inA,info,&outA);
2200: return(0);
2201: }
2206: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2207: {
2208: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2209: PetscInt i,nz,nbs;
2212: nz = baij->maxnz/baij->bs2;
2213: nbs = baij->nbs;
2214: for (i=0; i<nz; i++) {
2215: baij->j[i] = indices[i];
2216: }
2217: baij->nz = nz;
2218: for (i=0; i<nbs; i++) {
2219: baij->ilen[i] = baij->imax[i];
2220: }
2222: return(0);
2223: }
2228: /*@
2229: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2230: in the matrix.
2232: Input Parameters:
2233: + mat - the SeqBAIJ matrix
2234: - indices - the column indices
2236: Level: advanced
2238: Notes:
2239: This can be called if you have precomputed the nonzero structure of the
2240: matrix and want to provide it to the matrix object to improve the performance
2241: of the MatSetValues() operation.
2243: You MUST have set the correct numbers of nonzeros per row in the call to
2244: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2246: MUST be called before any calls to MatSetValues();
2248: @*/
2249: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2250: {
2251: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2256: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
2257: if (f) {
2258: (*f)(mat,indices);
2259: } else {
2260: SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2261: }
2262: return(0);
2263: }
2267: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2268: {
2269: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2271: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2272: PetscReal atmp;
2273: PetscScalar *x,zero = 0.0;
2274: MatScalar *aa;
2275: PetscInt ncols,brow,krow,kcol;
2278: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2279: bs = A->rmap.bs;
2280: aa = a->a;
2281: ai = a->i;
2282: aj = a->j;
2283: mbs = a->mbs;
2285: VecSet(v,zero);
2286: if (idx) {
2287: for (i=0; i<A->rmap.n;i++) idx[i] = 0;
2288: }
2289: VecGetArray(v,&x);
2290: VecGetLocalSize(v,&n);
2291: if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2292: for (i=0; i<mbs; i++) {
2293: ncols = ai[1] - ai[0]; ai++;
2294: brow = bs*i;
2295: for (j=0; j<ncols; j++){
2296: for (kcol=0; kcol<bs; kcol++){
2297: for (krow=0; krow<bs; krow++){
2298: atmp = PetscAbsScalar(*aa);aa++;
2299: row = brow + krow; /* row index */
2300: /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2301: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2302: }
2303: }
2304: aj++;
2305: }
2306: }
2307: VecRestoreArray(v,&x);
2308: return(0);
2309: }
2313: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2314: {
2318: /* If the two matrices have the same copy implementation, use fast copy. */
2319: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2320: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2321: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2323: if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2324: SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2325: }
2326: PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));
2327: } else {
2328: MatCopy_Basic(A,B,str);
2329: }
2330: return(0);
2331: }
2335: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2336: {
2340: MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);
2341: return(0);
2342: }
2346: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2347: {
2348: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2350: *array = a->a;
2351: return(0);
2352: }
2356: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2357: {
2359: return(0);
2360: }
2362: #include petscblaslapack.h
2365: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2366: {
2367: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2369: PetscInt i,bs=Y->rmap.bs,j,bs2;
2370: PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz;
2373: if (str == SAME_NONZERO_PATTERN) {
2374: PetscScalar alpha = a;
2375: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2376: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2377: if (y->xtoy && y->XtoY != X) {
2378: PetscFree(y->xtoy);
2379: MatDestroy(y->XtoY);
2380: }
2381: if (!y->xtoy) { /* get xtoy */
2382: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2383: y->XtoY = X;
2384: }
2385: bs2 = bs*bs;
2386: for (i=0; i<x->nz; i++) {
2387: j = 0;
2388: while (j < bs2){
2389: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2390: j++;
2391: }
2392: }
2393: PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2394: } else {
2395: MatAXPY_Basic(Y,a,X,str);
2396: }
2397: return(0);
2398: }
2402: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2403: {
2404: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2405: PetscInt i,nz = a->bs2*a->i[a->mbs];
2406: PetscScalar *aa = a->a;
2409: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2410: return(0);
2411: }
2415: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2416: {
2417: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2418: PetscInt i,nz = a->bs2*a->i[a->mbs];
2419: PetscScalar *aa = a->a;
2422: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2423: return(0);
2424: }
2427: /* -------------------------------------------------------------------*/
2428: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2429: MatGetRow_SeqBAIJ,
2430: MatRestoreRow_SeqBAIJ,
2431: MatMult_SeqBAIJ_N,
2432: /* 4*/ MatMultAdd_SeqBAIJ_N,
2433: MatMultTranspose_SeqBAIJ,
2434: MatMultTransposeAdd_SeqBAIJ,
2435: MatSolve_SeqBAIJ_N,
2436: 0,
2437: 0,
2438: /*10*/ 0,
2439: MatLUFactor_SeqBAIJ,
2440: 0,
2441: 0,
2442: MatTranspose_SeqBAIJ,
2443: /*15*/ MatGetInfo_SeqBAIJ,
2444: MatEqual_SeqBAIJ,
2445: MatGetDiagonal_SeqBAIJ,
2446: MatDiagonalScale_SeqBAIJ,
2447: MatNorm_SeqBAIJ,
2448: /*20*/ 0,
2449: MatAssemblyEnd_SeqBAIJ,
2450: 0,
2451: MatSetOption_SeqBAIJ,
2452: MatZeroEntries_SeqBAIJ,
2453: /*25*/ MatZeroRows_SeqBAIJ,
2454: MatLUFactorSymbolic_SeqBAIJ,
2455: MatLUFactorNumeric_SeqBAIJ_N,
2456: MatCholeskyFactorSymbolic_SeqBAIJ,
2457: MatCholeskyFactorNumeric_SeqBAIJ_N,
2458: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2459: MatILUFactorSymbolic_SeqBAIJ,
2460: MatICCFactorSymbolic_SeqBAIJ,
2461: MatGetArray_SeqBAIJ,
2462: MatRestoreArray_SeqBAIJ,
2463: /*35*/ MatDuplicate_SeqBAIJ,
2464: 0,
2465: 0,
2466: MatILUFactor_SeqBAIJ,
2467: 0,
2468: /*40*/ MatAXPY_SeqBAIJ,
2469: MatGetSubMatrices_SeqBAIJ,
2470: MatIncreaseOverlap_SeqBAIJ,
2471: MatGetValues_SeqBAIJ,
2472: MatCopy_SeqBAIJ,
2473: /*45*/ 0,
2474: MatScale_SeqBAIJ,
2475: 0,
2476: 0,
2477: 0,
2478: /*50*/ 0,
2479: MatGetRowIJ_SeqBAIJ,
2480: MatRestoreRowIJ_SeqBAIJ,
2481: 0,
2482: 0,
2483: /*55*/ 0,
2484: 0,
2485: 0,
2486: 0,
2487: MatSetValuesBlocked_SeqBAIJ,
2488: /*60*/ MatGetSubMatrix_SeqBAIJ,
2489: MatDestroy_SeqBAIJ,
2490: MatView_SeqBAIJ,
2491: 0,
2492: 0,
2493: /*65*/ 0,
2494: 0,
2495: 0,
2496: 0,
2497: 0,
2498: /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2499: MatConvert_Basic,
2500: 0,
2501: 0,
2502: 0,
2503: /*75*/ 0,
2504: 0,
2505: 0,
2506: 0,
2507: 0,
2508: /*80*/ 0,
2509: 0,
2510: 0,
2511: 0,
2512: MatLoad_SeqBAIJ,
2513: /*85*/ 0,
2514: 0,
2515: 0,
2516: 0,
2517: 0,
2518: /*90*/ 0,
2519: 0,
2520: 0,
2521: 0,
2522: 0,
2523: /*95*/ 0,
2524: 0,
2525: 0,
2526: 0,
2527: 0,
2528: /*100*/0,
2529: 0,
2530: 0,
2531: 0,
2532: 0,
2533: /*105*/0,
2534: MatRealPart_SeqBAIJ,
2535: MatImaginaryPart_SeqBAIJ,
2536: 0,
2537: 0,
2538: /*110*/0,
2539: 0,
2540: 0,
2541: 0,
2542: MatMissingDiagonal_SeqBAIJ
2543: /*115*/
2544: };
2549: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2550: {
2551: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2552: PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2556: if (aij->nonew != 1) {
2557: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2558: }
2560: /* allocate space for values if not already there */
2561: if (!aij->saved_values) {
2562: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2563: }
2565: /* copy values over */
2566: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2567: return(0);
2568: }
2574: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2575: {
2576: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2578: PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2581: if (aij->nonew != 1) {
2582: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2583: }
2584: if (!aij->saved_values) {
2585: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2586: }
2588: /* copy values over */
2589: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2590: return(0);
2591: }
2602: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2603: {
2604: Mat_SeqBAIJ *b;
2606: PetscInt i,mbs,nbs,bs2,newbs = bs;
2607: PetscTruth flg,skipallocation = PETSC_FALSE;
2611: if (nz == MAT_SKIP_ALLOCATION) {
2612: skipallocation = PETSC_TRUE;
2613: nz = 0;
2614: }
2616: PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2617: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);
2618: PetscOptionsEnd();
2620: if (nnz && newbs != bs) {
2621: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2622: }
2623: bs = newbs;
2625: B->rmap.bs = B->cmap.bs = bs;
2626: PetscMapSetUp(&B->rmap);
2627: PetscMapSetUp(&B->cmap);
2629: B->preallocated = PETSC_TRUE;
2631: mbs = B->rmap.n/bs;
2632: nbs = B->cmap.n/bs;
2633: bs2 = bs*bs;
2635: if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2636: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2637: }
2639: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2640: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2641: if (nnz) {
2642: for (i=0; i<mbs; i++) {
2643: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2644: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2645: }
2646: }
2648: b = (Mat_SeqBAIJ*)B->data;
2649: PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2650: PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2651: PetscOptionsEnd();
2653: B->ops->solve = MatSolve_SeqBAIJ_Update;
2654: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
2655: if (!flg) {
2656: switch (bs) {
2657: case 1:
2658: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2659: B->ops->mult = MatMult_SeqBAIJ_1;
2660: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2661: B->ops->pbrelax = MatPBRelax_SeqBAIJ_1;
2662: break;
2663: case 2:
2664: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2665: B->ops->mult = MatMult_SeqBAIJ_2;
2666: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2667: B->ops->pbrelax = MatPBRelax_SeqBAIJ_2;
2668: break;
2669: case 3:
2670: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2671: B->ops->mult = MatMult_SeqBAIJ_3;
2672: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2673: B->ops->pbrelax = MatPBRelax_SeqBAIJ_3;
2674: break;
2675: case 4:
2676: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2677: B->ops->mult = MatMult_SeqBAIJ_4;
2678: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2679: B->ops->pbrelax = MatPBRelax_SeqBAIJ_4;
2680: break;
2681: case 5:
2682: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2683: B->ops->mult = MatMult_SeqBAIJ_5;
2684: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2685: B->ops->pbrelax = MatPBRelax_SeqBAIJ_5;
2686: break;
2687: case 6:
2688: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2689: B->ops->mult = MatMult_SeqBAIJ_6;
2690: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2691: B->ops->pbrelax = MatPBRelax_SeqBAIJ_6;
2692: break;
2693: case 7:
2694: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2695: B->ops->mult = MatMult_SeqBAIJ_7;
2696: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2697: B->ops->pbrelax = MatPBRelax_SeqBAIJ_7;
2698: break;
2699: default:
2700: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2701: B->ops->mult = MatMult_SeqBAIJ_N;
2702: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2703: break;
2704: }
2705: }
2706: B->rmap.bs = bs;
2707: b->mbs = mbs;
2708: b->nbs = nbs;
2709: if (!skipallocation) {
2710: if (!b->imax) {
2711: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2712: }
2713: /* b->ilen will count nonzeros in each block row so far. */
2714: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2715: if (!nnz) {
2716: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2717: else if (nz <= 0) nz = 1;
2718: for (i=0; i<mbs; i++) b->imax[i] = nz;
2719: nz = nz*mbs;
2720: } else {
2721: nz = 0;
2722: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2723: }
2725: /* allocate the matrix space */
2726: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2727: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);
2728: PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2729: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2730: PetscMemzero(b->j,nz*sizeof(PetscInt));
2731: b->singlemalloc = PETSC_TRUE;
2732: b->i[0] = 0;
2733: for (i=1; i<mbs+1; i++) {
2734: b->i[i] = b->i[i-1] + b->imax[i-1];
2735: }
2736: b->free_a = PETSC_TRUE;
2737: b->free_ij = PETSC_TRUE;
2738: } else {
2739: b->free_a = PETSC_FALSE;
2740: b->free_ij = PETSC_FALSE;
2741: }
2743: B->rmap.bs = bs;
2744: b->bs2 = bs2;
2745: b->mbs = mbs;
2746: b->nz = 0;
2747: b->maxnz = nz*bs2;
2748: B->info.nz_unneeded = (PetscReal)b->maxnz;
2749: return(0);
2750: }
2753: /*MC
2754: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2755: block sparse compressed row format.
2757: Options Database Keys:
2758: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2760: Level: beginner
2762: .seealso: MatCreateSeqBAIJ()
2763: M*/
2768: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2769: {
2771: PetscMPIInt size;
2772: Mat_SeqBAIJ *b;
2775: MPI_Comm_size(((PetscObject)B)->comm,&size);
2776: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2778: PetscNewLog(B,Mat_SeqBAIJ,&b);
2779: B->data = (void*)b;
2780: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2781: B->factor = 0;
2782: B->mapping = 0;
2783: b->row = 0;
2784: b->col = 0;
2785: b->icol = 0;
2786: b->reallocs = 0;
2787: b->saved_values = 0;
2788: #if defined(PETSC_USE_MAT_SINGLE)
2789: b->setvalueslen = 0;
2790: b->setvaluescopy = PETSC_NULL;
2791: #endif
2793: b->roworiented = PETSC_TRUE;
2794: b->nonew = 0;
2795: b->diag = 0;
2796: b->solve_work = 0;
2797: b->mult_work = 0;
2798: B->spptr = 0;
2799: B->info.nz_unneeded = (PetscReal)b->maxnz;
2800: b->keepzeroedrows = PETSC_FALSE;
2801: b->xtoy = 0;
2802: b->XtoY = 0;
2803: b->compressedrow.use = PETSC_FALSE;
2804: b->compressedrow.nrows = 0;
2805: b->compressedrow.i = PETSC_NULL;
2806: b->compressedrow.rindex = PETSC_NULL;
2807: b->compressedrow.checked = PETSC_FALSE;
2808: B->same_nonzero = PETSC_FALSE;
2810: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2811: "MatInvertBlockDiagonal_SeqBAIJ",
2812: MatInvertBlockDiagonal_SeqBAIJ);
2813: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2814: "MatStoreValues_SeqBAIJ",
2815: MatStoreValues_SeqBAIJ);
2816: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2817: "MatRetrieveValues_SeqBAIJ",
2818: MatRetrieveValues_SeqBAIJ);
2819: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2820: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2821: MatSeqBAIJSetColumnIndices_SeqBAIJ);
2822: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2823: "MatConvert_SeqBAIJ_SeqAIJ",
2824: MatConvert_SeqBAIJ_SeqAIJ);
2825: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2826: "MatConvert_SeqBAIJ_SeqSBAIJ",
2827: MatConvert_SeqBAIJ_SeqSBAIJ);
2828: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2829: "MatSeqBAIJSetPreallocation_SeqBAIJ",
2830: MatSeqBAIJSetPreallocation_SeqBAIJ);
2831: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2832: return(0);
2833: }
2838: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2839: {
2840: Mat C;
2841: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2843: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2846: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2848: *B = 0;
2849: MatCreate(((PetscObject)A)->comm,&C);
2850: MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);
2851: MatSetType(C,((PetscObject)A)->type_name);
2852: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2853: c = (Mat_SeqBAIJ*)C->data;
2855: C->rmap.N = A->rmap.N;
2856: C->cmap.N = A->cmap.N;
2857: C->rmap.bs = A->rmap.bs;
2858: c->bs2 = a->bs2;
2859: c->mbs = a->mbs;
2860: c->nbs = a->nbs;
2862: PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2863: for (i=0; i<mbs; i++) {
2864: c->imax[i] = a->imax[i];
2865: c->ilen[i] = a->ilen[i];
2866: }
2868: /* allocate the matrix space */
2869: PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2870: c->singlemalloc = PETSC_TRUE;
2871: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2872: if (mbs > 0) {
2873: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2874: if (cpvalues == MAT_COPY_VALUES) {
2875: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2876: } else {
2877: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2878: }
2879: }
2880: c->roworiented = a->roworiented;
2881: c->nonew = a->nonew;
2883: if (a->diag) {
2884: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2885: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2886: for (i=0; i<mbs; i++) {
2887: c->diag[i] = a->diag[i];
2888: }
2889: } else c->diag = 0;
2890: c->nz = a->nz;
2891: c->maxnz = a->maxnz;
2892: c->solve_work = 0;
2893: c->mult_work = 0;
2894: c->free_a = PETSC_TRUE;
2895: c->free_ij = PETSC_TRUE;
2896: C->preallocated = PETSC_TRUE;
2897: C->assembled = PETSC_TRUE;
2899: c->compressedrow.use = a->compressedrow.use;
2900: c->compressedrow.nrows = a->compressedrow.nrows;
2901: c->compressedrow.checked = a->compressedrow.checked;
2902: if ( a->compressedrow.checked && a->compressedrow.use){
2903: i = a->compressedrow.nrows;
2904: PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2905: c->compressedrow.rindex = c->compressedrow.i + i + 1;
2906: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2907: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2908: } else {
2909: c->compressedrow.use = PETSC_FALSE;
2910: c->compressedrow.i = PETSC_NULL;
2911: c->compressedrow.rindex = PETSC_NULL;
2912: }
2913: C->same_nonzero = A->same_nonzero;
2914: *B = C;
2915: PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2916: return(0);
2917: }
2921: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2922: {
2923: Mat_SeqBAIJ *a;
2924: Mat B;
2926: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
2927: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2928: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
2929: PetscInt *masked,nmask,tmp,bs2,ishift;
2930: PetscMPIInt size;
2931: int fd;
2932: PetscScalar *aa;
2933: MPI_Comm comm = ((PetscObject)viewer)->comm;
2936: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2937: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2938: PetscOptionsEnd();
2939: bs2 = bs*bs;
2941: MPI_Comm_size(comm,&size);
2942: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2943: PetscViewerBinaryGetDescriptor(viewer,&fd);
2944: PetscBinaryRead(fd,header,4,PETSC_INT);
2945: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2946: M = header[1]; N = header[2]; nz = header[3];
2948: if (header[3] < 0) {
2949: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2950: }
2952: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2954: /*
2955: This code adds extra rows to make sure the number of rows is
2956: divisible by the blocksize
2957: */
2958: mbs = M/bs;
2959: extra_rows = bs - M + bs*(mbs);
2960: if (extra_rows == bs) extra_rows = 0;
2961: else mbs++;
2962: if (extra_rows) {
2963: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2964: }
2966: /* read in row lengths */
2967: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2968: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2969: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2971: /* read in column indices */
2972: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2973: PetscBinaryRead(fd,jj,nz,PETSC_INT);
2974: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2976: /* loop over row lengths determining block row lengths */
2977: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2978: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2979: PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2980: PetscMemzero(mask,mbs*sizeof(PetscInt));
2981: masked = mask + mbs;
2982: rowcount = 0; nzcount = 0;
2983: for (i=0; i<mbs; i++) {
2984: nmask = 0;
2985: for (j=0; j<bs; j++) {
2986: kmax = rowlengths[rowcount];
2987: for (k=0; k<kmax; k++) {
2988: tmp = jj[nzcount++]/bs;
2989: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2990: }
2991: rowcount++;
2992: }
2993: browlengths[i] += nmask;
2994: /* zero out the mask elements we set */
2995: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2996: }
2998: /* create our matrix */
2999: MatCreate(comm,&B);
3000: MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3001: MatSetType(B,type);
3002: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
3003: a = (Mat_SeqBAIJ*)B->data;
3005: /* set matrix "i" values */
3006: a->i[0] = 0;
3007: for (i=1; i<= mbs; i++) {
3008: a->i[i] = a->i[i-1] + browlengths[i-1];
3009: a->ilen[i-1] = browlengths[i-1];
3010: }
3011: a->nz = 0;
3012: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3014: /* read in nonzero values */
3015: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3016: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3017: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3019: /* set "a" and "j" values into matrix */
3020: nzcount = 0; jcount = 0;
3021: for (i=0; i<mbs; i++) {
3022: nzcountb = nzcount;
3023: nmask = 0;
3024: for (j=0; j<bs; j++) {
3025: kmax = rowlengths[i*bs+j];
3026: for (k=0; k<kmax; k++) {
3027: tmp = jj[nzcount++]/bs;
3028: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3029: }
3030: }
3031: /* sort the masked values */
3032: PetscSortInt(nmask,masked);
3034: /* set "j" values into matrix */
3035: maskcount = 1;
3036: for (j=0; j<nmask; j++) {
3037: a->j[jcount++] = masked[j];
3038: mask[masked[j]] = maskcount++;
3039: }
3040: /* set "a" values into matrix */
3041: ishift = bs2*a->i[i];
3042: for (j=0; j<bs; j++) {
3043: kmax = rowlengths[i*bs+j];
3044: for (k=0; k<kmax; k++) {
3045: tmp = jj[nzcountb]/bs ;
3046: block = mask[tmp] - 1;
3047: point = jj[nzcountb] - bs*tmp;
3048: idx = ishift + bs2*block + j + bs*point;
3049: a->a[idx] = (MatScalar)aa[nzcountb++];
3050: }
3051: }
3052: /* zero out the mask elements we set */
3053: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3054: }
3055: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3057: PetscFree(rowlengths);
3058: PetscFree(browlengths);
3059: PetscFree(aa);
3060: PetscFree(jj);
3061: PetscFree(mask);
3063: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3064: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3065: MatView_Private(B);
3067: *A = B;
3068: return(0);
3069: }
3073: /*@C
3074: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3075: compressed row) format. For good matrix assembly performance the
3076: user should preallocate the matrix storage by setting the parameter nz
3077: (or the array nnz). By setting these parameters accurately, performance
3078: during matrix assembly can be increased by more than a factor of 50.
3080: Collective on MPI_Comm
3082: Input Parameters:
3083: + comm - MPI communicator, set to PETSC_COMM_SELF
3084: . bs - size of block
3085: . m - number of rows
3086: . n - number of columns
3087: . nz - number of nonzero blocks per block row (same for all rows)
3088: - nnz - array containing the number of nonzero blocks in the various block rows
3089: (possibly different for each block row) or PETSC_NULL
3091: Output Parameter:
3092: . A - the matrix
3094: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3095: MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3096: true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3097: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3099: Options Database Keys:
3100: . -mat_no_unroll - uses code that does not unroll the loops in the
3101: block calculations (much slower)
3102: . -mat_block_size - size of the blocks to use
3104: Level: intermediate
3106: Notes:
3107: The number of rows and columns must be divisible by blocksize.
3109: If the nnz parameter is given then the nz parameter is ignored
3111: A nonzero block is any block that as 1 or more nonzeros in it
3113: The block AIJ format is fully compatible with standard Fortran 77
3114: storage. That is, the stored row and column indices can begin at
3115: either one (as in Fortran) or zero. See the users' manual for details.
3117: Specify the preallocated storage with either nz or nnz (not both).
3118: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3119: allocation. For additional details, see the users manual chapter on
3120: matrices.
3122: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3123: @*/
3124: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3125: {
3127:
3129: MatCreate(comm,A);
3130: MatSetSizes(*A,m,n,m,n);
3131: MatSetType(*A,MATSEQBAIJ);
3132: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3133: return(0);
3134: }
3138: /*@C
3139: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3140: per row in the matrix. For good matrix assembly performance the
3141: user should preallocate the matrix storage by setting the parameter nz
3142: (or the array nnz). By setting these parameters accurately, performance
3143: during matrix assembly can be increased by more than a factor of 50.
3145: Collective on MPI_Comm
3147: Input Parameters:
3148: + A - the matrix
3149: . bs - size of block
3150: . nz - number of block nonzeros per block row (same for all rows)
3151: - nnz - array containing the number of block nonzeros in the various block rows
3152: (possibly different for each block row) or PETSC_NULL
3154: Options Database Keys:
3155: . -mat_no_unroll - uses code that does not unroll the loops in the
3156: block calculations (much slower)
3157: . -mat_block_size - size of the blocks to use
3159: Level: intermediate
3161: Notes:
3162: If the nnz parameter is given then the nz parameter is ignored
3164: You can call MatGetInfo() to get information on how effective the preallocation was;
3165: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3166: You can also run with the option -info and look for messages with the string
3167: malloc in them to see if additional memory allocation was needed.
3169: The block AIJ format is fully compatible with standard Fortran 77
3170: storage. That is, the stored row and column indices can begin at
3171: either one (as in Fortran) or zero. See the users' manual for details.
3173: Specify the preallocated storage with either nz or nnz (not both).
3174: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3175: allocation. For additional details, see the users manual chapter on
3176: matrices.
3178: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3179: @*/
3180: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3181: {
3182: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3185: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
3186: if (f) {
3187: (*f)(B,bs,nz,nnz);
3188: }
3189: return(0);
3190: }
3194: /*@
3195: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3196: (upper triangular entries in CSR format) provided by the user.
3198: Collective on MPI_Comm
3200: Input Parameters:
3201: + comm - must be an MPI communicator of size 1
3202: . bs - size of block
3203: . m - number of rows
3204: . n - number of columns
3205: . i - row indices
3206: . j - column indices
3207: - a - matrix values
3209: Output Parameter:
3210: . mat - the matrix
3212: Level: intermediate
3214: Notes:
3215: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3216: once the matrix is destroyed
3218: You cannot set new nonzero locations into this matrix, that will generate an error.
3220: The i and j indices are 0 based
3222: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3224: @*/
3225: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3226: {
3228: PetscInt ii;
3229: Mat_SeqBAIJ *baij;
3232: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3233: if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3234:
3235: MatCreate(comm,mat);
3236: MatSetSizes(*mat,m,n,m,n);
3237: MatSetType(*mat,MATSEQBAIJ);
3238: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3239: baij = (Mat_SeqBAIJ*)(*mat)->data;
3240: PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3242: baij->i = i;
3243: baij->j = j;
3244: baij->a = a;
3245: baij->singlemalloc = PETSC_FALSE;
3246: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3247: baij->free_a = PETSC_FALSE;
3248: baij->free_ij = PETSC_FALSE;
3250: for (ii=0; ii<m; ii++) {
3251: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3252: #if defined(PETSC_USE_DEBUG)
3253: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3254: #endif
3255: }
3256: #if defined(PETSC_USE_DEBUG)
3257: for (ii=0; ii<baij->i[m]; ii++) {
3258: if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3259: if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3260: }
3261: #endif
3263: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3264: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3265: return(0);
3266: }