Actual source code: baij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/inline/spops.h
9: #include petscsys.h
11: #include src/inline/ilu.h
15: /*@
16: MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
18: Collective on Mat
20: Input Parameters:
21: . mat - the matrix
23: Level: advanced
24: @*/
25: PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
26: {
27: PetscErrorCode ierr,(*f)(Mat);
31: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
34: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
35: if (f) {
36: (*f)(mat);
37: } else {
38: SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39: }
40: return(0);
41: }
46: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47: {
48: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
50: PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51: PetscScalar *v = a->a,*odiag,*diag,*mdiag;
54: if (a->idiagvalid) return(0);
55: MatMarkDiagonal_SeqBAIJ(A);
56: diag_offset = a->diag;
57: if (!a->idiag) {
58: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
59: }
60: diag = a->idiag;
61: mdiag = a->idiag+bs*bs*mbs;
62: /* factor and invert each block */
63: switch (bs){
64: case 2:
65: for (i=0; i<mbs; i++) {
66: odiag = v + 4*diag_offset[i];
67: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69: Kernel_A_gets_inverse_A_2(diag);
70: diag += 4;
71: mdiag += 4;
72: }
73: break;
74: case 3:
75: for (i=0; i<mbs; i++) {
76: odiag = v + 9*diag_offset[i];
77: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79: diag[8] = odiag[8];
80: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82: mdiag[8] = odiag[8];
83: Kernel_A_gets_inverse_A_3(diag);
84: diag += 9;
85: mdiag += 9;
86: }
87: break;
88: case 4:
89: for (i=0; i<mbs; i++) {
90: odiag = v + 16*diag_offset[i];
91: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
92: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
93: Kernel_A_gets_inverse_A_4(diag);
94: diag += 16;
95: mdiag += 16;
96: }
97: break;
98: case 5:
99: for (i=0; i<mbs; i++) {
100: odiag = v + 25*diag_offset[i];
101: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103: Kernel_A_gets_inverse_A_5(diag);
104: diag += 25;
105: mdiag += 25;
106: }
107: break;
108: default:
109: SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110: }
111: a->idiagvalid = PETSC_TRUE;
112: return(0);
113: }
118: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
121: PetscScalar *x,x1,x2,s1,s2;
122: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
123: PetscErrorCode ierr;
124: PetscInt m = a->mbs,i,i2,nz,idx;
125: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
128: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129: its = its*lits;
130: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
136: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
138: diag = a->diag;
139: idiag = a->idiag;
140: VecGetArray(xx,&x);
141: VecGetArray(bb,(PetscScalar**)&b);
143: if (flag & SOR_ZERO_INITIAL_GUESS) {
144: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147: i2 = 2;
148: idiag += 4;
149: for (i=1; i<m; i++) {
150: v = aa + 4*ai[i];
151: vi = aj + ai[i];
152: nz = diag[i] - ai[i];
153: s1 = b[i2]; s2 = b[i2+1];
154: while (nz--) {
155: idx = 2*(*vi++);
156: x1 = x[idx]; x2 = x[1+idx];
157: s1 -= v[0]*x1 + v[2]*x2;
158: s2 -= v[1]*x1 + v[3]*x2;
159: v += 4;
160: }
161: x[i2] = idiag[0]*s1 + idiag[2]*s2;
162: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163: idiag += 4;
164: i2 += 2;
165: }
166: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167: PetscLogFlops(4*(a->nz));
168: }
169: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171: i2 = 0;
172: mdiag = a->idiag+4*a->mbs;
173: for (i=0; i<m; i++) {
174: x1 = x[i2]; x2 = x[i2+1];
175: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
176: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177: mdiag += 4;
178: i2 += 2;
179: }
180: PetscLogFlops(6*m);
181: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
183: }
184: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185: idiag = a->idiag+4*a->mbs - 4;
186: i2 = 2*m - 2;
187: x1 = x[i2]; x2 = x[i2+1];
188: x[i2] = idiag[0]*x1 + idiag[2]*x2;
189: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190: idiag -= 4;
191: i2 -= 2;
192: for (i=m-2; i>=0; i--) {
193: v = aa + 4*(diag[i]+1);
194: vi = aj + diag[i] + 1;
195: nz = ai[i+1] - diag[i] - 1;
196: s1 = x[i2]; s2 = x[i2+1];
197: while (nz--) {
198: idx = 2*(*vi++);
199: x1 = x[idx]; x2 = x[1+idx];
200: s1 -= v[0]*x1 + v[2]*x2;
201: s2 -= v[1]*x1 + v[3]*x2;
202: v += 4;
203: }
204: x[i2] = idiag[0]*s1 + idiag[2]*s2;
205: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206: idiag -= 4;
207: i2 -= 2;
208: }
209: PetscLogFlops(4*(a->nz));
210: }
211: } else {
212: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213: }
214: VecRestoreArray(xx,&x);
215: VecRestoreArray(bb,(PetscScalar**)&b);
216: return(0);
217: }
221: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222: {
223: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
224: PetscScalar *x,x1,x2,x3,s1,s2,s3;
225: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
226: PetscErrorCode ierr;
227: PetscInt m = a->mbs,i,i2,nz,idx;
228: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
231: its = its*lits;
232: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
233: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
239: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
241: diag = a->diag;
242: idiag = a->idiag;
243: VecGetArray(xx,&x);
244: VecGetArray(bb,(PetscScalar**)&b);
246: if (flag & SOR_ZERO_INITIAL_GUESS) {
247: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
249: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
250: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
251: i2 = 3;
252: idiag += 9;
253: for (i=1; i<m; i++) {
254: v = aa + 9*ai[i];
255: vi = aj + ai[i];
256: nz = diag[i] - ai[i];
257: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
258: while (nz--) {
259: idx = 3*(*vi++);
260: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
261: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
262: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
263: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
264: v += 9;
265: }
266: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
267: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
268: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
269: idiag += 9;
270: i2 += 3;
271: }
272: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
273: PetscLogFlops(9*(a->nz));
274: }
275: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
276: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
277: i2 = 0;
278: mdiag = a->idiag+9*a->mbs;
279: for (i=0; i<m; i++) {
280: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
281: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
282: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
283: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
284: mdiag += 9;
285: i2 += 3;
286: }
287: PetscLogFlops(15*m);
288: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
289: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
290: }
291: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
292: idiag = a->idiag+9*a->mbs - 9;
293: i2 = 3*m - 3;
294: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
295: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
296: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
297: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
298: idiag -= 9;
299: i2 -= 3;
300: for (i=m-2; i>=0; i--) {
301: v = aa + 9*(diag[i]+1);
302: vi = aj + diag[i] + 1;
303: nz = ai[i+1] - diag[i] - 1;
304: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
305: while (nz--) {
306: idx = 3*(*vi++);
307: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
308: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
309: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
310: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
311: v += 9;
312: }
313: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
314: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
315: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
316: idiag -= 9;
317: i2 -= 3;
318: }
319: PetscLogFlops(9*(a->nz));
320: }
321: } else {
322: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
323: }
324: VecRestoreArray(xx,&x);
325: VecRestoreArray(bb,(PetscScalar**)&b);
326: return(0);
327: }
331: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
332: {
333: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
334: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
335: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
336: PetscErrorCode ierr;
337: PetscInt m = a->mbs,i,i2,nz,idx;
338: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
341: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
342: its = its*lits;
343: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
344: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
345: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
346: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
347: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
349: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
351: diag = a->diag;
352: idiag = a->idiag;
353: VecGetArray(xx,&x);
354: VecGetArray(bb,(PetscScalar**)&b);
356: if (flag & SOR_ZERO_INITIAL_GUESS) {
357: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
358: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
359: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
360: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
361: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
362: i2 = 4;
363: idiag += 16;
364: for (i=1; i<m; i++) {
365: v = aa + 16*ai[i];
366: vi = aj + ai[i];
367: nz = diag[i] - ai[i];
368: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
369: while (nz--) {
370: idx = 4*(*vi++);
371: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
372: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
373: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
374: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
375: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
376: v += 16;
377: }
378: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
379: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
380: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
381: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
382: idiag += 16;
383: i2 += 4;
384: }
385: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386: PetscLogFlops(16*(a->nz));
387: }
388: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390: i2 = 0;
391: mdiag = a->idiag+16*a->mbs;
392: for (i=0; i<m; i++) {
393: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
394: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
395: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
396: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
397: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
398: mdiag += 16;
399: i2 += 4;
400: }
401: PetscLogFlops(28*m);
402: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
403: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
404: }
405: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
406: idiag = a->idiag+16*a->mbs - 16;
407: i2 = 4*m - 4;
408: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
409: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
410: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
411: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
412: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
413: idiag -= 16;
414: i2 -= 4;
415: for (i=m-2; i>=0; i--) {
416: v = aa + 16*(diag[i]+1);
417: vi = aj + diag[i] + 1;
418: nz = ai[i+1] - diag[i] - 1;
419: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
420: while (nz--) {
421: idx = 4*(*vi++);
422: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
423: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
424: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
425: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
426: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
427: v += 16;
428: }
429: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
430: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
431: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
432: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
433: idiag -= 16;
434: i2 -= 4;
435: }
436: PetscLogFlops(16*(a->nz));
437: }
438: } else {
439: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
440: }
441: VecRestoreArray(xx,&x);
442: VecRestoreArray(bb,(PetscScalar**)&b);
443: return(0);
444: }
448: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
449: {
450: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
451: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
452: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
453: PetscErrorCode ierr;
454: PetscInt m = a->mbs,i,i2,nz,idx;
455: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
458: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
459: its = its*lits;
460: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
461: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
462: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
463: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
464: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
466: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
468: diag = a->diag;
469: idiag = a->idiag;
470: VecGetArray(xx,&x);
471: VecGetArray(bb,(PetscScalar**)&b);
473: if (flag & SOR_ZERO_INITIAL_GUESS) {
474: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
475: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
476: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
477: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
478: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
479: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
480: i2 = 5;
481: idiag += 25;
482: for (i=1; i<m; i++) {
483: v = aa + 25*ai[i];
484: vi = aj + ai[i];
485: nz = diag[i] - ai[i];
486: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
487: while (nz--) {
488: idx = 5*(*vi++);
489: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
490: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
491: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
492: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
493: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
494: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
495: v += 25;
496: }
497: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
498: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
499: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
500: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
501: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
502: idiag += 25;
503: i2 += 5;
504: }
505: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506: PetscLogFlops(25*(a->nz));
507: }
508: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510: i2 = 0;
511: mdiag = a->idiag+25*a->mbs;
512: for (i=0; i<m; i++) {
513: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
514: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
515: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
516: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
517: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
518: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
519: mdiag += 25;
520: i2 += 5;
521: }
522: PetscLogFlops(45*m);
523: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
524: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
525: }
526: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
527: idiag = a->idiag+25*a->mbs - 25;
528: i2 = 5*m - 5;
529: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
530: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
531: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
532: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
533: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
534: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
535: idiag -= 25;
536: i2 -= 5;
537: for (i=m-2; i>=0; i--) {
538: v = aa + 25*(diag[i]+1);
539: vi = aj + diag[i] + 1;
540: nz = ai[i+1] - diag[i] - 1;
541: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
542: while (nz--) {
543: idx = 5*(*vi++);
544: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
545: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
546: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
547: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
548: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
549: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
550: v += 25;
551: }
552: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
553: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
554: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
555: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
556: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
557: idiag -= 25;
558: i2 -= 5;
559: }
560: PetscLogFlops(25*(a->nz));
561: }
562: } else {
563: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
564: }
565: VecRestoreArray(xx,&x);
566: VecRestoreArray(bb,(PetscScalar**)&b);
567: return(0);
568: }
570: /*
571: Special version for direct calls from Fortran (Used in PETSc-fun3d)
572: */
573: #if defined(PETSC_HAVE_FORTRAN_CAPS)
574: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
575: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
576: #define matsetvaluesblocked4_ matsetvaluesblocked4
577: #endif
582: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
583: {
584: Mat A = *AA;
585: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
586: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
587: PetscInt *ai=a->i,*ailen=a->ilen;
588: PetscInt *aj=a->j,stepval,lastcol = -1;
589: const PetscScalar *value = v;
590: MatScalar *ap,*aa = a->a,*bap;
593: if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
594: stepval = (n-1)*4;
595: for (k=0; k<m; k++) { /* loop over added rows */
596: row = im[k];
597: rp = aj + ai[row];
598: ap = aa + 16*ai[row];
599: nrow = ailen[row];
600: low = 0;
601: high = nrow;
602: for (l=0; l<n; l++) { /* loop over added columns */
603: col = in[l];
604: if (col <= lastcol) low = 0; else high = nrow;
605: lastcol = col;
606: value = v + k*(stepval+4 + l)*4;
607: while (high-low > 7) {
608: t = (low+high)/2;
609: if (rp[t] > col) high = t;
610: else low = t;
611: }
612: for (i=low; i<high; i++) {
613: if (rp[i] > col) break;
614: if (rp[i] == col) {
615: bap = ap + 16*i;
616: for (ii=0; ii<4; ii++,value+=stepval) {
617: for (jj=ii; jj<16; jj+=4) {
618: bap[jj] += *value++;
619: }
620: }
621: goto noinsert2;
622: }
623: }
624: N = nrow++ - 1;
625: high++; /* added new column index thus must search to one higher than before */
626: /* shift up all the later entries in this row */
627: for (ii=N; ii>=i; ii--) {
628: rp[ii+1] = rp[ii];
629: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
630: }
631: if (N >= i) {
632: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
633: }
634: rp[i] = col;
635: bap = ap + 16*i;
636: for (ii=0; ii<4; ii++,value+=stepval) {
637: for (jj=ii; jj<16; jj+=4) {
638: bap[jj] = *value++;
639: }
640: }
641: noinsert2:;
642: low = i;
643: }
644: ailen[row] = nrow;
645: }
646: PetscFunctionReturnVoid();
647: }
650: #if defined(PETSC_HAVE_FORTRAN_CAPS)
651: #define matsetvalues4_ MATSETVALUES4
652: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
653: #define matsetvalues4_ matsetvalues4
654: #endif
659: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
660: {
661: Mat A = *AA;
662: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
663: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
664: PetscInt *ai=a->i,*ailen=a->ilen;
665: PetscInt *aj=a->j,brow,bcol;
666: PetscInt ridx,cidx,lastcol = -1;
667: MatScalar *ap,value,*aa=a->a,*bap;
668:
670: for (k=0; k<m; k++) { /* loop over added rows */
671: row = im[k]; brow = row/4;
672: rp = aj + ai[brow];
673: ap = aa + 16*ai[brow];
674: nrow = ailen[brow];
675: low = 0;
676: high = nrow;
677: for (l=0; l<n; l++) { /* loop over added columns */
678: col = in[l]; bcol = col/4;
679: ridx = row % 4; cidx = col % 4;
680: value = v[l + k*n];
681: if (col <= lastcol) low = 0; else high = nrow;
682: lastcol = col;
683: while (high-low > 7) {
684: t = (low+high)/2;
685: if (rp[t] > bcol) high = t;
686: else low = t;
687: }
688: for (i=low; i<high; i++) {
689: if (rp[i] > bcol) break;
690: if (rp[i] == bcol) {
691: bap = ap + 16*i + 4*cidx + ridx;
692: *bap += value;
693: goto noinsert1;
694: }
695: }
696: N = nrow++ - 1;
697: high++; /* added new column thus must search to one higher than before */
698: /* shift up all the later entries in this row */
699: for (ii=N; ii>=i; ii--) {
700: rp[ii+1] = rp[ii];
701: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
702: }
703: if (N>=i) {
704: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
705: }
706: rp[i] = bcol;
707: ap[16*i + 4*cidx + ridx] = value;
708: noinsert1:;
709: low = i;
710: }
711: ailen[brow] = nrow;
712: }
713: PetscFunctionReturnVoid();
714: }
717: /* UGLY, ugly, ugly
718: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
719: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
720: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
721: converts the entries into single precision and then calls ..._MatScalar() to put them
722: into the single precision data structures.
723: */
724: #if defined(PETSC_USE_MAT_SINGLE)
725: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
726: #else
727: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
728: #endif
730: #define CHUNKSIZE 10
732: /*
733: Checks for missing diagonals
734: */
737: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
738: {
739: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
741: PetscInt *diag,*jj = a->j,i;
744: MatMarkDiagonal_SeqBAIJ(A);
745: *missing = PETSC_FALSE;
746: diag = a->diag;
747: for (i=0; i<a->mbs; i++) {
748: if (jj[diag[i]] != i) {
749: *missing = PETSC_TRUE;
750: if (d) *d = i;
751: }
752: }
753: return(0);
754: }
758: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
759: {
760: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
762: PetscInt i,j,m = a->mbs;
765: if (!a->diag) {
766: PetscMalloc(m*sizeof(PetscInt),&a->diag);
767: }
768: for (i=0; i<m; i++) {
769: a->diag[i] = a->i[i+1];
770: for (j=a->i[i]; j<a->i[i+1]; j++) {
771: if (a->j[j] == i) {
772: a->diag[i] = j;
773: break;
774: }
775: }
776: }
777: return(0);
778: }
781: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
785: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
786: {
787: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
789: PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
790: PetscInt *tia, *tja;
793: *nn = n;
794: if (!ia) return(0);
795: if (symmetric) {
796: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
797: } else {
798: tia = a->i; tja = a->j;
799: }
800:
801: if (!blockcompressed && bs > 1) {
802: (*nn) *= bs;
803: nbs = bs;
804: /* malloc & create the natural set of indices */
805: PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
806: if (n) {
807: (*ia)[0] = 0;
808: for (j=1; j<bs; j++) {
809: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
810: }
811: }
813: for (i=1; i<n; i++) {
814: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
815: for (j=1; j<bs; j++) {
816: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
817: }
818: }
819: if (n) {
820: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
821: }
823: if (ja) {
824: PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
825: cnt = 0;
826: for (i=0; i<n; i++) {
827: for (j=0; j<bs; j++) {
828: for (k=tia[i]; k<tia[i+1]; k++) {
829: for (l=0; l<bs; l++) {
830: (*ja)[cnt++] = bs*tja[k] + l;
831: }
832: }
833: }
834: }
835: }
837: n *= bs;
838: nz *= bs*bs;
839: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
840: PetscFree(tia);
841: PetscFree(tja);
842: }
843: } else {
844: *ia = tia;
845: if (ja) *ja = tja;
846: }
847: if (oshift == 1) {
848: for (i=0; i<n+nbs; i++) (*ia)[i]++;
849: if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
850: }
851: return(0);
852: }
856: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
857: {
858: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
860: PetscInt i,n = a->mbs,nz = a->i[n];
863: if (!ia) return(0);
864: if (!blockcompressed && A->rmap.bs > 1) {
865: PetscFree(*ia);
866: if (ja) {PetscFree(*ja);}
867: } else if (symmetric) {
868: PetscFree(*ia);
869: if (ja) {PetscFree(*ja);}
870: } else if (oshift == 1) { /* blockcompressed */
871: for (i=0; i<n+1; i++) a->i[i]--;
872: if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
873: }
874: return(0);
875: }
879: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
880: {
881: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
885: #if defined(PETSC_USE_LOG)
886: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
887: #endif
888: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
889: if (a->row) {
890: ISDestroy(a->row);
891: }
892: if (a->col) {
893: ISDestroy(a->col);
894: }
895: PetscFree(a->diag);
896: PetscFree(a->idiag);
897: PetscFree2(a->imax,a->ilen);
898: PetscFree(a->solve_work);
899: PetscFree(a->mult_work);
900: if (a->icol) {ISDestroy(a->icol);}
901: PetscFree(a->saved_values);
902: #if defined(PETSC_USE_MAT_SINGLE)
903: PetscFree(a->setvaluescopy);
904: #endif
905: PetscFree(a->xtoy);
906: if (a->compressedrow.use){PetscFree(a->compressedrow.i);}
908: PetscFree(a);
910: PetscObjectChangeTypeName((PetscObject)A,0);
911: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
912: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
913: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
914: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
915: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
916: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
917: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
918: return(0);
919: }
923: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
924: {
925: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
929: switch (op) {
930: case MAT_ROW_ORIENTED:
931: a->roworiented = flg;
932: break;
933: case MAT_KEEP_ZEROED_ROWS:
934: a->keepzeroedrows = flg;
935: break;
936: case MAT_NEW_NONZERO_LOCATIONS:
937: a->nonew = (flg ? 0 : 1);
938: break;
939: case MAT_NEW_NONZERO_LOCATION_ERR:
940: a->nonew = (flg ? -1 : 0);
941: break;
942: case MAT_NEW_NONZERO_ALLOCATION_ERR:
943: a->nonew = (flg ? -2 : 0);
944: break;
945: case MAT_NEW_DIAGONALS:
946: case MAT_IGNORE_OFF_PROC_ENTRIES:
947: case MAT_USE_HASH_TABLE:
948: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
949: break;
950: case MAT_SYMMETRIC:
951: case MAT_STRUCTURALLY_SYMMETRIC:
952: case MAT_HERMITIAN:
953: case MAT_SYMMETRY_ETERNAL:
954: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
955: break;
956: default:
957: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
958: }
959: return(0);
960: }
964: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
965: {
966: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
968: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
969: MatScalar *aa,*aa_i;
970: PetscScalar *v_i;
973: bs = A->rmap.bs;
974: ai = a->i;
975: aj = a->j;
976: aa = a->a;
977: bs2 = a->bs2;
978:
979: if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
980:
981: bn = row/bs; /* Block number */
982: bp = row % bs; /* Block Position */
983: M = ai[bn+1] - ai[bn];
984: *nz = bs*M;
985:
986: if (v) {
987: *v = 0;
988: if (*nz) {
989: PetscMalloc((*nz)*sizeof(PetscScalar),v);
990: for (i=0; i<M; i++) { /* for each block in the block row */
991: v_i = *v + i*bs;
992: aa_i = aa + bs2*(ai[bn] + i);
993: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
994: }
995: }
996: }
998: if (idx) {
999: *idx = 0;
1000: if (*nz) {
1001: PetscMalloc((*nz)*sizeof(PetscInt),idx);
1002: for (i=0; i<M; i++) { /* for each block in the block row */
1003: idx_i = *idx + i*bs;
1004: itmp = bs*aj[ai[bn] + i];
1005: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1006: }
1007: }
1008: }
1009: return(0);
1010: }
1014: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1015: {
1019: if (idx) {PetscFree(*idx);}
1020: if (v) {PetscFree(*v);}
1021: return(0);
1022: }
1026: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1027: {
1028: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1029: Mat C;
1031: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1032: PetscInt *rows,*cols,bs2=a->bs2;
1033: PetscScalar *array;
1036: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1037: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1038: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1040: #if defined(PETSC_USE_MAT_SINGLE)
1041: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1042: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1043: #else
1044: array = a->a;
1045: #endif
1047: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1048: MatCreate(((PetscObject)A)->comm,&C);
1049: MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1050: MatSetType(C,((PetscObject)A)->type_name);
1051: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1052: PetscFree(col);
1053: PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1054: cols = rows + bs;
1055: for (i=0; i<mbs; i++) {
1056: cols[0] = i*bs;
1057: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1058: len = ai[i+1] - ai[i];
1059: for (j=0; j<len; j++) {
1060: rows[0] = (*aj++)*bs;
1061: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1062: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1063: array += bs2;
1064: }
1065: }
1066: PetscFree(rows);
1067: #if defined(PETSC_USE_MAT_SINGLE)
1068: PetscFree(array);
1069: #endif
1070:
1071: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1072: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1073:
1074: if (B) {
1075: *B = C;
1076: } else {
1077: MatHeaderCopy(A,C);
1078: }
1079: return(0);
1080: }
1084: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1085: {
1086: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1088: PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1089: int fd;
1090: PetscScalar *aa;
1091: FILE *file;
1094: PetscViewerBinaryGetDescriptor(viewer,&fd);
1095: PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1096: col_lens[0] = MAT_FILE_COOKIE;
1098: col_lens[1] = A->rmap.N;
1099: col_lens[2] = A->cmap.n;
1100: col_lens[3] = a->nz*bs2;
1102: /* store lengths of each row and write (including header) to file */
1103: count = 0;
1104: for (i=0; i<a->mbs; i++) {
1105: for (j=0; j<bs; j++) {
1106: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1107: }
1108: }
1109: PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1110: PetscFree(col_lens);
1112: /* store column indices (zero start index) */
1113: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1114: count = 0;
1115: for (i=0; i<a->mbs; i++) {
1116: for (j=0; j<bs; j++) {
1117: for (k=a->i[i]; k<a->i[i+1]; k++) {
1118: for (l=0; l<bs; l++) {
1119: jj[count++] = bs*a->j[k] + l;
1120: }
1121: }
1122: }
1123: }
1124: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1125: PetscFree(jj);
1127: /* store nonzero values */
1128: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1129: count = 0;
1130: for (i=0; i<a->mbs; i++) {
1131: for (j=0; j<bs; j++) {
1132: for (k=a->i[i]; k<a->i[i+1]; k++) {
1133: for (l=0; l<bs; l++) {
1134: aa[count++] = a->a[bs2*k + l*bs + j];
1135: }
1136: }
1137: }
1138: }
1139: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1140: PetscFree(aa);
1142: PetscViewerBinaryGetInfoPointer(viewer,&file);
1143: if (file) {
1144: fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1145: }
1146: return(0);
1147: }
1151: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1152: {
1153: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1154: PetscErrorCode ierr;
1155: PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1156: PetscViewerFormat format;
1159: PetscViewerGetFormat(viewer,&format);
1160: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1161: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1162: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1163: Mat aij;
1164: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1165: MatView(aij,viewer);
1166: MatDestroy(aij);
1167: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1168: return(0);
1169: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1170: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1171: for (i=0; i<a->mbs; i++) {
1172: for (j=0; j<bs; j++) {
1173: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1174: for (k=a->i[i]; k<a->i[i+1]; k++) {
1175: for (l=0; l<bs; l++) {
1176: #if defined(PETSC_USE_COMPLEX)
1177: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1178: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1179: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1180: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1181: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1182: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1183: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1184: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1185: }
1186: #else
1187: if (a->a[bs2*k + l*bs + j] != 0.0) {
1188: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1189: }
1190: #endif
1191: }
1192: }
1193: PetscViewerASCIIPrintf(viewer,"\n");
1194: }
1195: }
1196: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1197: } else {
1198: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1199: for (i=0; i<a->mbs; i++) {
1200: for (j=0; j<bs; j++) {
1201: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1202: for (k=a->i[i]; k<a->i[i+1]; k++) {
1203: for (l=0; l<bs; l++) {
1204: #if defined(PETSC_USE_COMPLEX)
1205: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1206: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1207: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1208: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1209: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1210: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1211: } else {
1212: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1213: }
1214: #else
1215: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1216: #endif
1217: }
1218: }
1219: PetscViewerASCIIPrintf(viewer,"\n");
1220: }
1221: }
1222: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1223: }
1224: PetscViewerFlush(viewer);
1225: return(0);
1226: }
1230: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1231: {
1232: Mat A = (Mat) Aa;
1233: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1235: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1236: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1237: MatScalar *aa;
1238: PetscViewer viewer;
1242: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1243: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1245: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1247: /* loop over matrix elements drawing boxes */
1248: color = PETSC_DRAW_BLUE;
1249: for (i=0,row=0; i<mbs; i++,row+=bs) {
1250: for (j=a->i[i]; j<a->i[i+1]; j++) {
1251: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1252: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1253: aa = a->a + j*bs2;
1254: for (k=0; k<bs; k++) {
1255: for (l=0; l<bs; l++) {
1256: if (PetscRealPart(*aa++) >= 0.) continue;
1257: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1258: }
1259: }
1260: }
1261: }
1262: color = PETSC_DRAW_CYAN;
1263: for (i=0,row=0; i<mbs; i++,row+=bs) {
1264: for (j=a->i[i]; j<a->i[i+1]; j++) {
1265: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1266: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1267: aa = a->a + j*bs2;
1268: for (k=0; k<bs; k++) {
1269: for (l=0; l<bs; l++) {
1270: if (PetscRealPart(*aa++) != 0.) continue;
1271: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1272: }
1273: }
1274: }
1275: }
1277: color = PETSC_DRAW_RED;
1278: for (i=0,row=0; i<mbs; i++,row+=bs) {
1279: for (j=a->i[i]; j<a->i[i+1]; j++) {
1280: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1281: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1282: aa = a->a + j*bs2;
1283: for (k=0; k<bs; k++) {
1284: for (l=0; l<bs; l++) {
1285: if (PetscRealPart(*aa++) <= 0.) continue;
1286: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1287: }
1288: }
1289: }
1290: }
1291: return(0);
1292: }
1296: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1297: {
1299: PetscReal xl,yl,xr,yr,w,h;
1300: PetscDraw draw;
1301: PetscTruth isnull;
1305: PetscViewerDrawGetDraw(viewer,0,&draw);
1306: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1308: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1309: xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1310: xr += w; yr += h; xl = -w; yl = -h;
1311: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1312: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1313: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1314: return(0);
1315: }
1319: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1320: {
1322: PetscTruth iascii,isbinary,isdraw;
1325: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1326: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1327: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1328: if (iascii){
1329: MatView_SeqBAIJ_ASCII(A,viewer);
1330: } else if (isbinary) {
1331: MatView_SeqBAIJ_Binary(A,viewer);
1332: } else if (isdraw) {
1333: MatView_SeqBAIJ_Draw(A,viewer);
1334: } else {
1335: Mat B;
1336: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1337: MatView(B,viewer);
1338: MatDestroy(B);
1339: }
1340: return(0);
1341: }
1346: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1347: {
1348: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1349: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1350: PetscInt *ai = a->i,*ailen = a->ilen;
1351: PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1352: MatScalar *ap,*aa = a->a;
1355: for (k=0; k<m; k++) { /* loop over rows */
1356: row = im[k]; brow = row/bs;
1357: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1358: if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1359: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1360: nrow = ailen[brow];
1361: for (l=0; l<n; l++) { /* loop over columns */
1362: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1363: if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1364: col = in[l] ;
1365: bcol = col/bs;
1366: cidx = col%bs;
1367: ridx = row%bs;
1368: high = nrow;
1369: low = 0; /* assume unsorted */
1370: while (high-low > 5) {
1371: t = (low+high)/2;
1372: if (rp[t] > bcol) high = t;
1373: else low = t;
1374: }
1375: for (i=low; i<high; i++) {
1376: if (rp[i] > bcol) break;
1377: if (rp[i] == bcol) {
1378: *v++ = ap[bs2*i+bs*cidx+ridx];
1379: goto finished;
1380: }
1381: }
1382: *v++ = 0.0;
1383: finished:;
1384: }
1385: }
1386: return(0);
1387: }
1389: #if defined(PETSC_USE_MAT_SINGLE)
1392: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1393: {
1394: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1396: PetscInt i,N = m*n*b->bs2;
1397: MatScalar *vsingle;
1400: if (N > b->setvalueslen) {
1401: PetscFree(b->setvaluescopy);
1402: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1403: b->setvalueslen = N;
1404: }
1405: vsingle = b->setvaluescopy;
1406: for (i=0; i<N; i++) {
1407: vsingle[i] = v[i];
1408: }
1409: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1410: return(0);
1411: }
1412: #endif
1417: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1418: {
1419: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1420: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1421: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1422: PetscErrorCode ierr;
1423: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1424: PetscTruth roworiented=a->roworiented;
1425: const MatScalar *value = v;
1426: MatScalar *ap,*aa = a->a,*bap;
1429: if (roworiented) {
1430: stepval = (n-1)*bs;
1431: } else {
1432: stepval = (m-1)*bs;
1433: }
1434: for (k=0; k<m; k++) { /* loop over added rows */
1435: row = im[k];
1436: if (row < 0) continue;
1437: #if defined(PETSC_USE_DEBUG)
1438: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1439: #endif
1440: rp = aj + ai[row];
1441: ap = aa + bs2*ai[row];
1442: rmax = imax[row];
1443: nrow = ailen[row];
1444: low = 0;
1445: high = nrow;
1446: for (l=0; l<n; l++) { /* loop over added columns */
1447: if (in[l] < 0) continue;
1448: #if defined(PETSC_USE_DEBUG)
1449: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1450: #endif
1451: col = in[l];
1452: if (roworiented) {
1453: value = v + k*(stepval+bs)*bs + l*bs;
1454: } else {
1455: value = v + l*(stepval+bs)*bs + k*bs;
1456: }
1457: if (col <= lastcol) low = 0; else high = nrow;
1458: lastcol = col;
1459: while (high-low > 7) {
1460: t = (low+high)/2;
1461: if (rp[t] > col) high = t;
1462: else low = t;
1463: }
1464: for (i=low; i<high; i++) {
1465: if (rp[i] > col) break;
1466: if (rp[i] == col) {
1467: bap = ap + bs2*i;
1468: if (roworiented) {
1469: if (is == ADD_VALUES) {
1470: for (ii=0; ii<bs; ii++,value+=stepval) {
1471: for (jj=ii; jj<bs2; jj+=bs) {
1472: bap[jj] += *value++;
1473: }
1474: }
1475: } else {
1476: for (ii=0; ii<bs; ii++,value+=stepval) {
1477: for (jj=ii; jj<bs2; jj+=bs) {
1478: bap[jj] = *value++;
1479: }
1480: }
1481: }
1482: } else {
1483: if (is == ADD_VALUES) {
1484: for (ii=0; ii<bs; ii++,value+=stepval) {
1485: for (jj=0; jj<bs; jj++) {
1486: *bap++ += *value++;
1487: }
1488: }
1489: } else {
1490: for (ii=0; ii<bs; ii++,value+=stepval) {
1491: for (jj=0; jj<bs; jj++) {
1492: *bap++ = *value++;
1493: }
1494: }
1495: }
1496: }
1497: goto noinsert2;
1498: }
1499: }
1500: if (nonew == 1) goto noinsert2;
1501: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1502: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1503: N = nrow++ - 1; high++;
1504: /* shift up all the later entries in this row */
1505: for (ii=N; ii>=i; ii--) {
1506: rp[ii+1] = rp[ii];
1507: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1508: }
1509: if (N >= i) {
1510: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1511: }
1512: rp[i] = col;
1513: bap = ap + bs2*i;
1514: if (roworiented) {
1515: for (ii=0; ii<bs; ii++,value+=stepval) {
1516: for (jj=ii; jj<bs2; jj+=bs) {
1517: bap[jj] = *value++;
1518: }
1519: }
1520: } else {
1521: for (ii=0; ii<bs; ii++,value+=stepval) {
1522: for (jj=0; jj<bs; jj++) {
1523: *bap++ = *value++;
1524: }
1525: }
1526: }
1527: noinsert2:;
1528: low = i;
1529: }
1530: ailen[row] = nrow;
1531: }
1532: return(0);
1533: }
1537: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1538: {
1539: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1540: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1541: PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen;
1543: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1544: MatScalar *aa = a->a,*ap;
1545: PetscReal ratio=0.6;
1548: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1550: if (m) rmax = ailen[0];
1551: for (i=1; i<mbs; i++) {
1552: /* move each row back by the amount of empty slots (fshift) before it*/
1553: fshift += imax[i-1] - ailen[i-1];
1554: rmax = PetscMax(rmax,ailen[i]);
1555: if (fshift) {
1556: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1557: N = ailen[i];
1558: for (j=0; j<N; j++) {
1559: ip[j-fshift] = ip[j];
1560: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1561: }
1562: }
1563: ai[i] = ai[i-1] + ailen[i-1];
1564: }
1565: if (mbs) {
1566: fshift += imax[mbs-1] - ailen[mbs-1];
1567: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1568: }
1569: /* reset ilen and imax for each row */
1570: for (i=0; i<mbs; i++) {
1571: ailen[i] = imax[i] = ai[i+1] - ai[i];
1572: }
1573: a->nz = ai[mbs];
1575: /* diagonals may have moved, so kill the diagonal pointers */
1576: a->idiagvalid = PETSC_FALSE;
1577: if (fshift && a->diag) {
1578: PetscFree(a->diag);
1579: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1580: a->diag = 0;
1581: }
1582: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);
1583: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1584: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1585: a->reallocs = 0;
1586: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1588: /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1589: if (a->compressedrow.use){
1590: Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1591: }
1593: A->same_nonzero = PETSC_TRUE;
1594: return(0);
1595: }
1597: /*
1598: This function returns an array of flags which indicate the locations of contiguous
1599: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1600: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1601: Assume: sizes should be long enough to hold all the values.
1602: */
1605: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1606: {
1607: PetscInt i,j,k,row;
1608: PetscTruth flg;
1611: for (i=0,j=0; i<n; j++) {
1612: row = idx[i];
1613: if (row%bs!=0) { /* Not the begining of a block */
1614: sizes[j] = 1;
1615: i++;
1616: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1617: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1618: i++;
1619: } else { /* Begining of the block, so check if the complete block exists */
1620: flg = PETSC_TRUE;
1621: for (k=1; k<bs; k++) {
1622: if (row+k != idx[i+k]) { /* break in the block */
1623: flg = PETSC_FALSE;
1624: break;
1625: }
1626: }
1627: if (flg) { /* No break in the bs */
1628: sizes[j] = bs;
1629: i+= bs;
1630: } else {
1631: sizes[j] = 1;
1632: i++;
1633: }
1634: }
1635: }
1636: *bs_max = j;
1637: return(0);
1638: }
1639:
1642: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1643: {
1644: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1646: PetscInt i,j,k,count,*rows;
1647: PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1648: PetscScalar zero = 0.0;
1649: MatScalar *aa;
1652: /* Make a copy of the IS and sort it */
1653: /* allocate memory for rows,sizes */
1654: PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1655: sizes = rows + is_n;
1657: /* copy IS values to rows, and sort them */
1658: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1659: PetscSortInt(is_n,rows);
1660: if (baij->keepzeroedrows) {
1661: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1662: bs_max = is_n;
1663: A->same_nonzero = PETSC_TRUE;
1664: } else {
1665: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1666: A->same_nonzero = PETSC_FALSE;
1667: }
1669: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1670: row = rows[j];
1671: if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1672: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1673: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1674: if (sizes[i] == bs && !baij->keepzeroedrows) {
1675: if (diag != 0.0) {
1676: if (baij->ilen[row/bs] > 0) {
1677: baij->ilen[row/bs] = 1;
1678: baij->j[baij->i[row/bs]] = row/bs;
1679: PetscMemzero(aa,count*bs*sizeof(MatScalar));
1680: }
1681: /* Now insert all the diagonal values for this bs */
1682: for (k=0; k<bs; k++) {
1683: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
1684: }
1685: } else { /* (diag == 0.0) */
1686: baij->ilen[row/bs] = 0;
1687: } /* end (diag == 0.0) */
1688: } else { /* (sizes[i] != bs) */
1689: #if defined (PETSC_USE_DEBUG)
1690: if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1691: #endif
1692: for (k=0; k<count; k++) {
1693: aa[0] = zero;
1694: aa += bs;
1695: }
1696: if (diag != 0.0) {
1697: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
1698: }
1699: }
1700: }
1702: PetscFree(rows);
1703: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1704: return(0);
1705: }
1709: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1710: {
1711: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1712: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1713: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1714: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1716: PetscInt ridx,cidx,bs2=a->bs2;
1717: PetscTruth roworiented=a->roworiented;
1718: MatScalar *ap,value,*aa=a->a,*bap;
1721: for (k=0; k<m; k++) { /* loop over added rows */
1722: row = im[k];
1723: brow = row/bs;
1724: if (row < 0) continue;
1725: #if defined(PETSC_USE_DEBUG)
1726: if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1727: #endif
1728: rp = aj + ai[brow];
1729: ap = aa + bs2*ai[brow];
1730: rmax = imax[brow];
1731: nrow = ailen[brow];
1732: low = 0;
1733: high = nrow;
1734: for (l=0; l<n; l++) { /* loop over added columns */
1735: if (in[l] < 0) continue;
1736: #if defined(PETSC_USE_DEBUG)
1737: if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1738: #endif
1739: col = in[l]; bcol = col/bs;
1740: ridx = row % bs; cidx = col % bs;
1741: if (roworiented) {
1742: value = v[l + k*n];
1743: } else {
1744: value = v[k + l*m];
1745: }
1746: if (col <= lastcol) low = 0; else high = nrow;
1747: lastcol = col;
1748: while (high-low > 7) {
1749: t = (low+high)/2;
1750: if (rp[t] > bcol) high = t;
1751: else low = t;
1752: }
1753: for (i=low; i<high; i++) {
1754: if (rp[i] > bcol) break;
1755: if (rp[i] == bcol) {
1756: bap = ap + bs2*i + bs*cidx + ridx;
1757: if (is == ADD_VALUES) *bap += value;
1758: else *bap = value;
1759: goto noinsert1;
1760: }
1761: }
1762: if (nonew == 1) goto noinsert1;
1763: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1764: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1765: N = nrow++ - 1; high++;
1766: /* shift up all the later entries in this row */
1767: for (ii=N; ii>=i; ii--) {
1768: rp[ii+1] = rp[ii];
1769: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1770: }
1771: if (N>=i) {
1772: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1773: }
1774: rp[i] = bcol;
1775: ap[bs2*i + bs*cidx + ridx] = value;
1776: a->nz++;
1777: noinsert1:;
1778: low = i;
1779: }
1780: ailen[brow] = nrow;
1781: }
1782: A->same_nonzero = PETSC_FALSE;
1783: return(0);
1784: }
1789: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1790: {
1791: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1792: Mat outA;
1794: PetscTruth row_identity,col_identity;
1797: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1798: ISIdentity(row,&row_identity);
1799: ISIdentity(col,&col_identity);
1800: if (!row_identity || !col_identity) {
1801: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1802: }
1804: outA = inA;
1805: inA->factor = FACTOR_LU;
1807: MatMarkDiagonal_SeqBAIJ(inA);
1809: PetscObjectReference((PetscObject)row);
1810: if (a->row) { ISDestroy(a->row); }
1811: a->row = row;
1812: PetscObjectReference((PetscObject)col);
1813: if (a->col) { ISDestroy(a->col); }
1814: a->col = col;
1815:
1816: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1817: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1818: PetscLogObjectParent(inA,a->icol);
1819:
1820: /*
1821: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1822: for ILU(0) factorization with natural ordering
1823: */
1824: if (inA->rmap.bs < 8) {
1825: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1826: } else {
1827: if (!a->solve_work) {
1828: PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);
1829: PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));
1830: }
1831: }
1833: MatLUFactorNumeric(inA,info,&outA);
1835: return(0);
1836: }
1841: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1842: {
1843: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1844: PetscInt i,nz,nbs;
1847: nz = baij->maxnz/baij->bs2;
1848: nbs = baij->nbs;
1849: for (i=0; i<nz; i++) {
1850: baij->j[i] = indices[i];
1851: }
1852: baij->nz = nz;
1853: for (i=0; i<nbs; i++) {
1854: baij->ilen[i] = baij->imax[i];
1855: }
1857: return(0);
1858: }
1863: /*@
1864: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1865: in the matrix.
1867: Input Parameters:
1868: + mat - the SeqBAIJ matrix
1869: - indices - the column indices
1871: Level: advanced
1873: Notes:
1874: This can be called if you have precomputed the nonzero structure of the
1875: matrix and want to provide it to the matrix object to improve the performance
1876: of the MatSetValues() operation.
1878: You MUST have set the correct numbers of nonzeros per row in the call to
1879: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1881: MUST be called before any calls to MatSetValues();
1883: @*/
1884: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1885: {
1886: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1891: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1892: if (f) {
1893: (*f)(mat,indices);
1894: } else {
1895: SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1896: }
1897: return(0);
1898: }
1902: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1903: {
1904: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1906: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
1907: PetscReal atmp;
1908: PetscScalar *x,zero = 0.0;
1909: MatScalar *aa;
1910: PetscInt ncols,brow,krow,kcol;
1913: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1914: bs = A->rmap.bs;
1915: aa = a->a;
1916: ai = a->i;
1917: aj = a->j;
1918: mbs = a->mbs;
1920: VecSet(v,zero);
1921: if (idx) {
1922: for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1923: }
1924: VecGetArray(v,&x);
1925: VecGetLocalSize(v,&n);
1926: if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1927: for (i=0; i<mbs; i++) {
1928: ncols = ai[1] - ai[0]; ai++;
1929: brow = bs*i;
1930: for (j=0; j<ncols; j++){
1931: for (kcol=0; kcol<bs; kcol++){
1932: for (krow=0; krow<bs; krow++){
1933: atmp = PetscAbsScalar(*aa);aa++;
1934: row = brow + krow; /* row index */
1935: /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1936: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1937: }
1938: }
1939: aj++;
1940: }
1941: }
1942: VecRestoreArray(v,&x);
1943: return(0);
1944: }
1948: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1949: {
1953: /* If the two matrices have the same copy implementation, use fast copy. */
1954: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1955: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1956: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1958: if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1959: SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1960: }
1961: PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));
1962: } else {
1963: MatCopy_Basic(A,B,str);
1964: }
1965: return(0);
1966: }
1970: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1971: {
1975: MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);
1976: return(0);
1977: }
1981: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1982: {
1983: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1985: *array = a->a;
1986: return(0);
1987: }
1991: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1992: {
1994: return(0);
1995: }
1997: #include petscblaslapack.h
2000: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2001: {
2002: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2004: PetscInt i,bs=Y->rmap.bs,j,bs2;
2005: PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz;
2008: if (str == SAME_NONZERO_PATTERN) {
2009: PetscScalar alpha = a;
2010: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2011: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2012: if (y->xtoy && y->XtoY != X) {
2013: PetscFree(y->xtoy);
2014: MatDestroy(y->XtoY);
2015: }
2016: if (!y->xtoy) { /* get xtoy */
2017: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2018: y->XtoY = X;
2019: }
2020: bs2 = bs*bs;
2021: for (i=0; i<x->nz; i++) {
2022: j = 0;
2023: while (j < bs2){
2024: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2025: j++;
2026: }
2027: }
2028: PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2029: } else {
2030: MatAXPY_Basic(Y,a,X,str);
2031: }
2032: return(0);
2033: }
2037: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2038: {
2039: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2040: PetscInt i,nz = a->bs2*a->i[a->mbs];
2041: PetscScalar *aa = a->a;
2044: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2045: return(0);
2046: }
2050: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2051: {
2052: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2053: PetscInt i,nz = a->bs2*a->i[a->mbs];
2054: PetscScalar *aa = a->a;
2057: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2058: return(0);
2059: }
2062: /* -------------------------------------------------------------------*/
2063: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2064: MatGetRow_SeqBAIJ,
2065: MatRestoreRow_SeqBAIJ,
2066: MatMult_SeqBAIJ_N,
2067: /* 4*/ MatMultAdd_SeqBAIJ_N,
2068: MatMultTranspose_SeqBAIJ,
2069: MatMultTransposeAdd_SeqBAIJ,
2070: MatSolve_SeqBAIJ_N,
2071: 0,
2072: 0,
2073: /*10*/ 0,
2074: MatLUFactor_SeqBAIJ,
2075: 0,
2076: 0,
2077: MatTranspose_SeqBAIJ,
2078: /*15*/ MatGetInfo_SeqBAIJ,
2079: MatEqual_SeqBAIJ,
2080: MatGetDiagonal_SeqBAIJ,
2081: MatDiagonalScale_SeqBAIJ,
2082: MatNorm_SeqBAIJ,
2083: /*20*/ 0,
2084: MatAssemblyEnd_SeqBAIJ,
2085: 0,
2086: MatSetOption_SeqBAIJ,
2087: MatZeroEntries_SeqBAIJ,
2088: /*25*/ MatZeroRows_SeqBAIJ,
2089: MatLUFactorSymbolic_SeqBAIJ,
2090: MatLUFactorNumeric_SeqBAIJ_N,
2091: MatCholeskyFactorSymbolic_SeqBAIJ,
2092: MatCholeskyFactorNumeric_SeqBAIJ_N,
2093: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2094: MatILUFactorSymbolic_SeqBAIJ,
2095: MatICCFactorSymbolic_SeqBAIJ,
2096: MatGetArray_SeqBAIJ,
2097: MatRestoreArray_SeqBAIJ,
2098: /*35*/ MatDuplicate_SeqBAIJ,
2099: 0,
2100: 0,
2101: MatILUFactor_SeqBAIJ,
2102: 0,
2103: /*40*/ MatAXPY_SeqBAIJ,
2104: MatGetSubMatrices_SeqBAIJ,
2105: MatIncreaseOverlap_SeqBAIJ,
2106: MatGetValues_SeqBAIJ,
2107: MatCopy_SeqBAIJ,
2108: /*45*/ 0,
2109: MatScale_SeqBAIJ,
2110: 0,
2111: 0,
2112: 0,
2113: /*50*/ 0,
2114: MatGetRowIJ_SeqBAIJ,
2115: MatRestoreRowIJ_SeqBAIJ,
2116: 0,
2117: 0,
2118: /*55*/ 0,
2119: 0,
2120: 0,
2121: 0,
2122: MatSetValuesBlocked_SeqBAIJ,
2123: /*60*/ MatGetSubMatrix_SeqBAIJ,
2124: MatDestroy_SeqBAIJ,
2125: MatView_SeqBAIJ,
2126: 0,
2127: 0,
2128: /*65*/ 0,
2129: 0,
2130: 0,
2131: 0,
2132: 0,
2133: /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2134: MatConvert_Basic,
2135: 0,
2136: 0,
2137: 0,
2138: /*75*/ 0,
2139: 0,
2140: 0,
2141: 0,
2142: 0,
2143: /*80*/ 0,
2144: 0,
2145: 0,
2146: 0,
2147: MatLoad_SeqBAIJ,
2148: /*85*/ 0,
2149: 0,
2150: 0,
2151: 0,
2152: 0,
2153: /*90*/ 0,
2154: 0,
2155: 0,
2156: 0,
2157: 0,
2158: /*95*/ 0,
2159: 0,
2160: 0,
2161: 0,
2162: 0,
2163: /*100*/0,
2164: 0,
2165: 0,
2166: 0,
2167: 0,
2168: /*105*/0,
2169: MatRealPart_SeqBAIJ,
2170: MatImaginaryPart_SeqBAIJ,
2171: 0,
2172: 0,
2173: /*110*/0,
2174: 0,
2175: 0,
2176: 0,
2177: MatMissingDiagonal_SeqBAIJ
2178: /*115*/
2179: };
2184: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2185: {
2186: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2187: PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2191: if (aij->nonew != 1) {
2192: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2193: }
2195: /* allocate space for values if not already there */
2196: if (!aij->saved_values) {
2197: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2198: }
2200: /* copy values over */
2201: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2202: return(0);
2203: }
2209: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2210: {
2211: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2213: PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2216: if (aij->nonew != 1) {
2217: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2218: }
2219: if (!aij->saved_values) {
2220: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2221: }
2223: /* copy values over */
2224: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2225: return(0);
2226: }
2237: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2238: {
2239: Mat_SeqBAIJ *b;
2241: PetscInt i,mbs,nbs,bs2,newbs = bs;
2242: PetscTruth flg,skipallocation = PETSC_FALSE;
2246: if (nz == MAT_SKIP_ALLOCATION) {
2247: skipallocation = PETSC_TRUE;
2248: nz = 0;
2249: }
2251: PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2252: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);
2253: PetscOptionsEnd();
2255: if (nnz && newbs != bs) {
2256: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2257: }
2258: bs = newbs;
2260: B->rmap.bs = B->cmap.bs = bs;
2261: PetscMapSetUp(&B->rmap);
2262: PetscMapSetUp(&B->cmap);
2264: B->preallocated = PETSC_TRUE;
2266: mbs = B->rmap.n/bs;
2267: nbs = B->cmap.n/bs;
2268: bs2 = bs*bs;
2270: if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2271: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2272: }
2274: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2275: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2276: if (nnz) {
2277: for (i=0; i<mbs; i++) {
2278: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2279: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2280: }
2281: }
2283: b = (Mat_SeqBAIJ*)B->data;
2284: PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2285: PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2286: PetscOptionsEnd();
2288: B->ops->solve = MatSolve_SeqBAIJ_Update;
2289: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
2290: if (!flg) {
2291: switch (bs) {
2292: case 1:
2293: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2294: B->ops->mult = MatMult_SeqBAIJ_1;
2295: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2296: break;
2297: case 2:
2298: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2299: B->ops->mult = MatMult_SeqBAIJ_2;
2300: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2301: B->ops->pbrelax = MatPBRelax_SeqBAIJ_2;
2302: break;
2303: case 3:
2304: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2305: B->ops->mult = MatMult_SeqBAIJ_3;
2306: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2307: B->ops->pbrelax = MatPBRelax_SeqBAIJ_3;
2308: break;
2309: case 4:
2310: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2311: B->ops->mult = MatMult_SeqBAIJ_4;
2312: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2313: B->ops->pbrelax = MatPBRelax_SeqBAIJ_4;
2314: break;
2315: case 5:
2316: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2317: B->ops->mult = MatMult_SeqBAIJ_5;
2318: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2319: B->ops->pbrelax = MatPBRelax_SeqBAIJ_5;
2320: break;
2321: case 6:
2322: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2323: B->ops->mult = MatMult_SeqBAIJ_6;
2324: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2325: break;
2326: case 7:
2327: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2328: B->ops->mult = MatMult_SeqBAIJ_7;
2329: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2330: break;
2331: default:
2332: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2333: B->ops->mult = MatMult_SeqBAIJ_N;
2334: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2335: break;
2336: }
2337: }
2338: B->rmap.bs = bs;
2339: b->mbs = mbs;
2340: b->nbs = nbs;
2341: if (!skipallocation) {
2342: if (!b->imax) {
2343: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2344: }
2345: /* b->ilen will count nonzeros in each block row so far. */
2346: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2347: if (!nnz) {
2348: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2349: else if (nz <= 0) nz = 1;
2350: for (i=0; i<mbs; i++) b->imax[i] = nz;
2351: nz = nz*mbs;
2352: } else {
2353: nz = 0;
2354: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2355: }
2357: /* allocate the matrix space */
2358: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2359: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);
2360: PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2361: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2362: PetscMemzero(b->j,nz*sizeof(PetscInt));
2363: b->singlemalloc = PETSC_TRUE;
2364: b->i[0] = 0;
2365: for (i=1; i<mbs+1; i++) {
2366: b->i[i] = b->i[i-1] + b->imax[i-1];
2367: }
2368: b->free_a = PETSC_TRUE;
2369: b->free_ij = PETSC_TRUE;
2370: } else {
2371: b->free_a = PETSC_FALSE;
2372: b->free_ij = PETSC_FALSE;
2373: }
2375: B->rmap.bs = bs;
2376: b->bs2 = bs2;
2377: b->mbs = mbs;
2378: b->nz = 0;
2379: b->maxnz = nz*bs2;
2380: B->info.nz_unneeded = (PetscReal)b->maxnz;
2381: return(0);
2382: }
2385: /*MC
2386: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2387: block sparse compressed row format.
2389: Options Database Keys:
2390: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2392: Level: beginner
2394: .seealso: MatCreateSeqBAIJ()
2395: M*/
2400: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2401: {
2403: PetscMPIInt size;
2404: Mat_SeqBAIJ *b;
2407: MPI_Comm_size(((PetscObject)B)->comm,&size);
2408: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2410: PetscNewLog(B,Mat_SeqBAIJ,&b);
2411: B->data = (void*)b;
2412: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2413: B->factor = 0;
2414: B->mapping = 0;
2415: b->row = 0;
2416: b->col = 0;
2417: b->icol = 0;
2418: b->reallocs = 0;
2419: b->saved_values = 0;
2420: #if defined(PETSC_USE_MAT_SINGLE)
2421: b->setvalueslen = 0;
2422: b->setvaluescopy = PETSC_NULL;
2423: #endif
2425: b->roworiented = PETSC_TRUE;
2426: b->nonew = 0;
2427: b->diag = 0;
2428: b->solve_work = 0;
2429: b->mult_work = 0;
2430: B->spptr = 0;
2431: B->info.nz_unneeded = (PetscReal)b->maxnz;
2432: b->keepzeroedrows = PETSC_FALSE;
2433: b->xtoy = 0;
2434: b->XtoY = 0;
2435: b->compressedrow.use = PETSC_FALSE;
2436: b->compressedrow.nrows = 0;
2437: b->compressedrow.i = PETSC_NULL;
2438: b->compressedrow.rindex = PETSC_NULL;
2439: b->compressedrow.checked = PETSC_FALSE;
2440: B->same_nonzero = PETSC_FALSE;
2442: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2443: "MatInvertBlockDiagonal_SeqBAIJ",
2444: MatInvertBlockDiagonal_SeqBAIJ);
2445: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2446: "MatStoreValues_SeqBAIJ",
2447: MatStoreValues_SeqBAIJ);
2448: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2449: "MatRetrieveValues_SeqBAIJ",
2450: MatRetrieveValues_SeqBAIJ);
2451: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2452: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2453: MatSeqBAIJSetColumnIndices_SeqBAIJ);
2454: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2455: "MatConvert_SeqBAIJ_SeqAIJ",
2456: MatConvert_SeqBAIJ_SeqAIJ);
2457: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2458: "MatConvert_SeqBAIJ_SeqSBAIJ",
2459: MatConvert_SeqBAIJ_SeqSBAIJ);
2460: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2461: "MatSeqBAIJSetPreallocation_SeqBAIJ",
2462: MatSeqBAIJSetPreallocation_SeqBAIJ);
2463: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2464: return(0);
2465: }
2470: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2471: {
2472: Mat C;
2473: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2475: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2478: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2480: *B = 0;
2481: MatCreate(((PetscObject)A)->comm,&C);
2482: MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);
2483: MatSetType(C,((PetscObject)A)->type_name);
2484: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2485: c = (Mat_SeqBAIJ*)C->data;
2487: C->rmap.N = A->rmap.N;
2488: C->cmap.N = A->cmap.N;
2489: C->rmap.bs = A->rmap.bs;
2490: c->bs2 = a->bs2;
2491: c->mbs = a->mbs;
2492: c->nbs = a->nbs;
2494: PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2495: for (i=0; i<mbs; i++) {
2496: c->imax[i] = a->imax[i];
2497: c->ilen[i] = a->ilen[i];
2498: }
2500: /* allocate the matrix space */
2501: PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2502: c->singlemalloc = PETSC_TRUE;
2503: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2504: if (mbs > 0) {
2505: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2506: if (cpvalues == MAT_COPY_VALUES) {
2507: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2508: } else {
2509: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2510: }
2511: }
2512: c->roworiented = a->roworiented;
2513: c->nonew = a->nonew;
2515: if (a->diag) {
2516: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2517: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2518: for (i=0; i<mbs; i++) {
2519: c->diag[i] = a->diag[i];
2520: }
2521: } else c->diag = 0;
2522: c->nz = a->nz;
2523: c->maxnz = a->maxnz;
2524: c->solve_work = 0;
2525: c->mult_work = 0;
2526: c->free_a = PETSC_TRUE;
2527: c->free_ij = PETSC_TRUE;
2528: C->preallocated = PETSC_TRUE;
2529: C->assembled = PETSC_TRUE;
2531: c->compressedrow.use = a->compressedrow.use;
2532: c->compressedrow.nrows = a->compressedrow.nrows;
2533: c->compressedrow.checked = a->compressedrow.checked;
2534: if ( a->compressedrow.checked && a->compressedrow.use){
2535: i = a->compressedrow.nrows;
2536: PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2537: c->compressedrow.rindex = c->compressedrow.i + i + 1;
2538: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2539: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2540: } else {
2541: c->compressedrow.use = PETSC_FALSE;
2542: c->compressedrow.i = PETSC_NULL;
2543: c->compressedrow.rindex = PETSC_NULL;
2544: }
2545: C->same_nonzero = A->same_nonzero;
2546: *B = C;
2547: PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2548: return(0);
2549: }
2553: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2554: {
2555: Mat_SeqBAIJ *a;
2556: Mat B;
2558: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
2559: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2560: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
2561: PetscInt *masked,nmask,tmp,bs2,ishift;
2562: PetscMPIInt size;
2563: int fd;
2564: PetscScalar *aa;
2565: MPI_Comm comm = ((PetscObject)viewer)->comm;
2568: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2569: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2570: PetscOptionsEnd();
2571: bs2 = bs*bs;
2573: MPI_Comm_size(comm,&size);
2574: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2575: PetscViewerBinaryGetDescriptor(viewer,&fd);
2576: PetscBinaryRead(fd,header,4,PETSC_INT);
2577: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2578: M = header[1]; N = header[2]; nz = header[3];
2580: if (header[3] < 0) {
2581: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2582: }
2584: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2586: /*
2587: This code adds extra rows to make sure the number of rows is
2588: divisible by the blocksize
2589: */
2590: mbs = M/bs;
2591: extra_rows = bs - M + bs*(mbs);
2592: if (extra_rows == bs) extra_rows = 0;
2593: else mbs++;
2594: if (extra_rows) {
2595: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2596: }
2598: /* read in row lengths */
2599: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2600: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2601: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2603: /* read in column indices */
2604: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2605: PetscBinaryRead(fd,jj,nz,PETSC_INT);
2606: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2608: /* loop over row lengths determining block row lengths */
2609: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2610: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2611: PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2612: PetscMemzero(mask,mbs*sizeof(PetscInt));
2613: masked = mask + mbs;
2614: rowcount = 0; nzcount = 0;
2615: for (i=0; i<mbs; i++) {
2616: nmask = 0;
2617: for (j=0; j<bs; j++) {
2618: kmax = rowlengths[rowcount];
2619: for (k=0; k<kmax; k++) {
2620: tmp = jj[nzcount++]/bs;
2621: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2622: }
2623: rowcount++;
2624: }
2625: browlengths[i] += nmask;
2626: /* zero out the mask elements we set */
2627: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2628: }
2630: /* create our matrix */
2631: MatCreate(comm,&B);
2632: MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2633: MatSetType(B,type);
2634: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
2635: a = (Mat_SeqBAIJ*)B->data;
2637: /* set matrix "i" values */
2638: a->i[0] = 0;
2639: for (i=1; i<= mbs; i++) {
2640: a->i[i] = a->i[i-1] + browlengths[i-1];
2641: a->ilen[i-1] = browlengths[i-1];
2642: }
2643: a->nz = 0;
2644: for (i=0; i<mbs; i++) a->nz += browlengths[i];
2646: /* read in nonzero values */
2647: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2648: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2649: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2651: /* set "a" and "j" values into matrix */
2652: nzcount = 0; jcount = 0;
2653: for (i=0; i<mbs; i++) {
2654: nzcountb = nzcount;
2655: nmask = 0;
2656: for (j=0; j<bs; j++) {
2657: kmax = rowlengths[i*bs+j];
2658: for (k=0; k<kmax; k++) {
2659: tmp = jj[nzcount++]/bs;
2660: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2661: }
2662: }
2663: /* sort the masked values */
2664: PetscSortInt(nmask,masked);
2666: /* set "j" values into matrix */
2667: maskcount = 1;
2668: for (j=0; j<nmask; j++) {
2669: a->j[jcount++] = masked[j];
2670: mask[masked[j]] = maskcount++;
2671: }
2672: /* set "a" values into matrix */
2673: ishift = bs2*a->i[i];
2674: for (j=0; j<bs; j++) {
2675: kmax = rowlengths[i*bs+j];
2676: for (k=0; k<kmax; k++) {
2677: tmp = jj[nzcountb]/bs ;
2678: block = mask[tmp] - 1;
2679: point = jj[nzcountb] - bs*tmp;
2680: idx = ishift + bs2*block + j + bs*point;
2681: a->a[idx] = (MatScalar)aa[nzcountb++];
2682: }
2683: }
2684: /* zero out the mask elements we set */
2685: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2686: }
2687: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2689: PetscFree(rowlengths);
2690: PetscFree(browlengths);
2691: PetscFree(aa);
2692: PetscFree(jj);
2693: PetscFree(mask);
2695: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2696: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2697: MatView_Private(B);
2699: *A = B;
2700: return(0);
2701: }
2705: /*@C
2706: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2707: compressed row) format. For good matrix assembly performance the
2708: user should preallocate the matrix storage by setting the parameter nz
2709: (or the array nnz). By setting these parameters accurately, performance
2710: during matrix assembly can be increased by more than a factor of 50.
2712: Collective on MPI_Comm
2714: Input Parameters:
2715: + comm - MPI communicator, set to PETSC_COMM_SELF
2716: . bs - size of block
2717: . m - number of rows
2718: . n - number of columns
2719: . nz - number of nonzero blocks per block row (same for all rows)
2720: - nnz - array containing the number of nonzero blocks in the various block rows
2721: (possibly different for each block row) or PETSC_NULL
2723: Output Parameter:
2724: . A - the matrix
2726: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2727: MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2728: true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2729: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2731: Options Database Keys:
2732: . -mat_no_unroll - uses code that does not unroll the loops in the
2733: block calculations (much slower)
2734: . -mat_block_size - size of the blocks to use
2736: Level: intermediate
2738: Notes:
2739: The number of rows and columns must be divisible by blocksize.
2741: If the nnz parameter is given then the nz parameter is ignored
2743: A nonzero block is any block that as 1 or more nonzeros in it
2745: The block AIJ format is fully compatible with standard Fortran 77
2746: storage. That is, the stored row and column indices can begin at
2747: either one (as in Fortran) or zero. See the users' manual for details.
2749: Specify the preallocated storage with either nz or nnz (not both).
2750: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2751: allocation. For additional details, see the users manual chapter on
2752: matrices.
2754: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2755: @*/
2756: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2757: {
2759:
2761: MatCreate(comm,A);
2762: MatSetSizes(*A,m,n,m,n);
2763: MatSetType(*A,MATSEQBAIJ);
2764: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
2765: return(0);
2766: }
2770: /*@C
2771: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2772: per row in the matrix. For good matrix assembly performance the
2773: user should preallocate the matrix storage by setting the parameter nz
2774: (or the array nnz). By setting these parameters accurately, performance
2775: during matrix assembly can be increased by more than a factor of 50.
2777: Collective on MPI_Comm
2779: Input Parameters:
2780: + A - the matrix
2781: . bs - size of block
2782: . nz - number of block nonzeros per block row (same for all rows)
2783: - nnz - array containing the number of block nonzeros in the various block rows
2784: (possibly different for each block row) or PETSC_NULL
2786: Options Database Keys:
2787: . -mat_no_unroll - uses code that does not unroll the loops in the
2788: block calculations (much slower)
2789: . -mat_block_size - size of the blocks to use
2791: Level: intermediate
2793: Notes:
2794: If the nnz parameter is given then the nz parameter is ignored
2796: You can call MatGetInfo() to get information on how effective the preallocation was;
2797: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2798: You can also run with the option -info and look for messages with the string
2799: malloc in them to see if additional memory allocation was needed.
2801: The block AIJ format is fully compatible with standard Fortran 77
2802: storage. That is, the stored row and column indices can begin at
2803: either one (as in Fortran) or zero. See the users' manual for details.
2805: Specify the preallocated storage with either nz or nnz (not both).
2806: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2807: allocation. For additional details, see the users manual chapter on
2808: matrices.
2810: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
2811: @*/
2812: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2813: {
2814: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2817: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2818: if (f) {
2819: (*f)(B,bs,nz,nnz);
2820: }
2821: return(0);
2822: }
2826: /*@
2827: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2828: (upper triangular entries in CSR format) provided by the user.
2830: Collective on MPI_Comm
2832: Input Parameters:
2833: + comm - must be an MPI communicator of size 1
2834: . bs - size of block
2835: . m - number of rows
2836: . n - number of columns
2837: . i - row indices
2838: . j - column indices
2839: - a - matrix values
2841: Output Parameter:
2842: . mat - the matrix
2844: Level: intermediate
2846: Notes:
2847: The i, j, and a arrays are not copied by this routine, the user must free these arrays
2848: once the matrix is destroyed
2850: You cannot set new nonzero locations into this matrix, that will generate an error.
2852: The i and j indices are 0 based
2854: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2856: @*/
2857: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2858: {
2860: PetscInt ii;
2861: Mat_SeqBAIJ *baij;
2864: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2865: if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2866:
2867: MatCreate(comm,mat);
2868: MatSetSizes(*mat,m,n,m,n);
2869: MatSetType(*mat,MATSEQBAIJ);
2870: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
2871: baij = (Mat_SeqBAIJ*)(*mat)->data;
2872: PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
2874: baij->i = i;
2875: baij->j = j;
2876: baij->a = a;
2877: baij->singlemalloc = PETSC_FALSE;
2878: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2879: baij->free_a = PETSC_FALSE;
2880: baij->free_ij = PETSC_FALSE;
2882: for (ii=0; ii<m; ii++) {
2883: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2884: #if defined(PETSC_USE_DEBUG)
2885: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2886: #endif
2887: }
2888: #if defined(PETSC_USE_DEBUG)
2889: for (ii=0; ii<baij->i[m]; ii++) {
2890: if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2891: if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2892: }
2893: #endif
2895: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
2896: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
2897: return(0);
2898: }