Actual source code: baij.c
1: /*
2: Defines the basic matrix operations for the BAIJ (compressed row)
3: matrix storage format.
4: */
5: #include src/mat/impls/baij/seq/baij.h
6: #include src/inline/spops.h
7: #include petscsys.h
9: #include src/inline/ilu.h
13: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14: {
15: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
17: PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs;
18: PetscScalar *v = a->a,*odiag,*diag,*mdiag;
21: if (a->idiagvalid) return(0);
22: MatMarkDiagonal_SeqBAIJ(A);
23: diag_offset = a->diag;
24: if (!a->idiag) {
25: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
26: }
27: diag = a->idiag;
28: mdiag = a->idiag+bs*bs*mbs;
29: /* factor and invert each block */
30: switch (bs){
31: case 2:
32: for (i=0; i<mbs; i++) {
33: odiag = v + 4*diag_offset[i];
34: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
35: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
36: Kernel_A_gets_inverse_A_2(diag);
37: diag += 4;
38: mdiag += 4;
39: }
40: break;
41: case 3:
42: for (i=0; i<mbs; i++) {
43: odiag = v + 9*diag_offset[i];
44: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
45: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
46: diag[8] = odiag[8];
47: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
48: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
49: mdiag[8] = odiag[8];
50: Kernel_A_gets_inverse_A_3(diag);
51: diag += 9;
52: mdiag += 9;
53: }
54: break;
55: case 4:
56: for (i=0; i<mbs; i++) {
57: odiag = v + 16*diag_offset[i];
58: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
59: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
60: Kernel_A_gets_inverse_A_4(diag);
61: diag += 16;
62: mdiag += 16;
63: }
64: break;
65: case 5:
66: for (i=0; i<mbs; i++) {
67: odiag = v + 25*diag_offset[i];
68: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
69: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
70: Kernel_A_gets_inverse_A_5(diag);
71: diag += 25;
72: mdiag += 25;
73: }
74: break;
75: default:
76: SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
77: }
78: a->idiagvalid = PETSC_TRUE;
79: return(0);
80: }
84: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
85: {
86: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
87: PetscScalar *x,x1,x2,s1,s2;
88: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
89: PetscErrorCode ierr;
90: PetscInt m = a->mbs,i,i2,nz,idx;
91: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
94: its = its*lits;
95: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
96: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
101: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
103: diag = a->diag;
104: idiag = a->idiag;
105: VecGetArray(xx,&x);
106: VecGetArray(bb,(PetscScalar**)&b);
108: if (flag & SOR_ZERO_INITIAL_GUESS) {
109: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112: i2 = 2;
113: idiag += 4;
114: for (i=1; i<m; i++) {
115: v = aa + 4*ai[i];
116: vi = aj + ai[i];
117: nz = diag[i] - ai[i];
118: s1 = b[i2]; s2 = b[i2+1];
119: while (nz--) {
120: idx = 2*(*vi++);
121: x1 = x[idx]; x2 = x[1+idx];
122: s1 -= v[0]*x1 + v[2]*x2;
123: s2 -= v[1]*x1 + v[3]*x2;
124: v += 4;
125: }
126: x[i2] = idiag[0]*s1 + idiag[2]*s2;
127: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128: idiag += 4;
129: i2 += 2;
130: }
131: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132: PetscLogFlops(4*(a->nz));
133: }
134: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136: i2 = 0;
137: mdiag = a->idiag+4*a->mbs;
138: for (i=0; i<m; i++) {
139: x1 = x[i2]; x2 = x[i2+1];
140: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
141: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142: mdiag += 4;
143: i2 += 2;
144: }
145: PetscLogFlops(6*m);
146: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147: PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
148: }
149: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150: idiag = a->idiag+4*a->mbs - 4;
151: i2 = 2*m - 2;
152: x1 = x[i2]; x2 = x[i2+1];
153: x[i2] = idiag[0]*x1 + idiag[2]*x2;
154: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155: idiag -= 4;
156: i2 -= 2;
157: for (i=m-2; i>=0; i--) {
158: v = aa + 4*(diag[i]+1);
159: vi = aj + diag[i] + 1;
160: nz = ai[i+1] - diag[i] - 1;
161: s1 = x[i2]; s2 = x[i2+1];
162: while (nz--) {
163: idx = 2*(*vi++);
164: x1 = x[idx]; x2 = x[1+idx];
165: s1 -= v[0]*x1 + v[2]*x2;
166: s2 -= v[1]*x1 + v[3]*x2;
167: v += 4;
168: }
169: x[i2] = idiag[0]*s1 + idiag[2]*s2;
170: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171: idiag -= 4;
172: i2 -= 2;
173: }
174: PetscLogFlops(4*(a->nz));
175: }
176: } else {
177: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
178: }
179: VecRestoreArray(xx,&x);
180: VecRestoreArray(bb,(PetscScalar**)&b);
181: return(0);
182: }
186: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
187: {
188: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
189: PetscScalar *x,x1,x2,x3,s1,s2,s3;
190: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
191: PetscErrorCode ierr;
192: PetscInt m = a->mbs,i,i2,nz,idx;
193: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
196: its = its*lits;
197: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
198: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
199: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
200: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
201: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
203: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
205: diag = a->diag;
206: idiag = a->idiag;
207: VecGetArray(xx,&x);
208: VecGetArray(bb,(PetscScalar**)&b);
210: if (flag & SOR_ZERO_INITIAL_GUESS) {
211: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
212: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
213: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
214: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
215: i2 = 3;
216: idiag += 9;
217: for (i=1; i<m; i++) {
218: v = aa + 9*ai[i];
219: vi = aj + ai[i];
220: nz = diag[i] - ai[i];
221: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
222: while (nz--) {
223: idx = 3*(*vi++);
224: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
225: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
226: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
227: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
228: v += 9;
229: }
230: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
231: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
232: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
233: idiag += 9;
234: i2 += 3;
235: }
236: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237: PetscLogFlops(9*(a->nz));
238: }
239: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241: i2 = 0;
242: mdiag = a->idiag+9*a->mbs;
243: for (i=0; i<m; i++) {
244: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
245: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
246: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
247: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
248: mdiag += 9;
249: i2 += 3;
250: }
251: PetscLogFlops(15*m);
252: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
253: PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
254: }
255: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
256: idiag = a->idiag+9*a->mbs - 9;
257: i2 = 3*m - 3;
258: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
259: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
260: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
261: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
262: idiag -= 9;
263: i2 -= 3;
264: for (i=m-2; i>=0; i--) {
265: v = aa + 9*(diag[i]+1);
266: vi = aj + diag[i] + 1;
267: nz = ai[i+1] - diag[i] - 1;
268: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
269: while (nz--) {
270: idx = 3*(*vi++);
271: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
272: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
273: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
274: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
275: v += 9;
276: }
277: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
278: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
279: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
280: idiag -= 9;
281: i2 -= 3;
282: }
283: PetscLogFlops(9*(a->nz));
284: }
285: } else {
286: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
287: }
288: VecRestoreArray(xx,&x);
289: VecRestoreArray(bb,(PetscScalar**)&b);
290: return(0);
291: }
295: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
296: {
297: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
298: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
299: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
300: PetscErrorCode ierr;
301: PetscInt m = a->mbs,i,i2,nz,idx;
302: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
305: its = its*lits;
306: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
307: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
308: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
309: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
310: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
312: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
314: diag = a->diag;
315: idiag = a->idiag;
316: VecGetArray(xx,&x);
317: VecGetArray(bb,(PetscScalar**)&b);
319: if (flag & SOR_ZERO_INITIAL_GUESS) {
320: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
321: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
322: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
323: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
324: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
325: i2 = 4;
326: idiag += 16;
327: for (i=1; i<m; i++) {
328: v = aa + 16*ai[i];
329: vi = aj + ai[i];
330: nz = diag[i] - ai[i];
331: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
332: while (nz--) {
333: idx = 4*(*vi++);
334: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
335: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
336: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
337: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
338: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
339: v += 16;
340: }
341: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
342: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
343: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
344: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
345: idiag += 16;
346: i2 += 4;
347: }
348: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
349: PetscLogFlops(16*(a->nz));
350: }
351: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
352: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
353: i2 = 0;
354: mdiag = a->idiag+16*a->mbs;
355: for (i=0; i<m; i++) {
356: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
357: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
358: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
359: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
360: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
361: mdiag += 16;
362: i2 += 4;
363: }
364: PetscLogFlops(28*m);
365: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
366: PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
367: }
368: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
369: idiag = a->idiag+16*a->mbs - 16;
370: i2 = 4*m - 4;
371: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
372: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
373: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
374: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
375: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
376: idiag -= 16;
377: i2 -= 4;
378: for (i=m-2; i>=0; i--) {
379: v = aa + 16*(diag[i]+1);
380: vi = aj + diag[i] + 1;
381: nz = ai[i+1] - diag[i] - 1;
382: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
383: while (nz--) {
384: idx = 4*(*vi++);
385: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
386: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
387: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
388: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
389: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
390: v += 16;
391: }
392: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
393: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
394: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
395: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
396: idiag -= 16;
397: i2 -= 4;
398: }
399: PetscLogFlops(16*(a->nz));
400: }
401: } else {
402: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
403: }
404: VecRestoreArray(xx,&x);
405: VecRestoreArray(bb,(PetscScalar**)&b);
406: return(0);
407: }
411: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
412: {
413: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
414: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
415: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
416: PetscErrorCode ierr;
417: PetscInt m = a->mbs,i,i2,nz,idx;
418: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
421: its = its*lits;
422: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
423: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
424: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
425: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
426: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
428: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
430: diag = a->diag;
431: idiag = a->idiag;
432: VecGetArray(xx,&x);
433: VecGetArray(bb,(PetscScalar**)&b);
435: if (flag & SOR_ZERO_INITIAL_GUESS) {
436: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
437: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
438: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
439: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
440: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
441: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
442: i2 = 5;
443: idiag += 25;
444: for (i=1; i<m; i++) {
445: v = aa + 25*ai[i];
446: vi = aj + ai[i];
447: nz = diag[i] - ai[i];
448: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
449: while (nz--) {
450: idx = 5*(*vi++);
451: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
452: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
453: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
454: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
455: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
456: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
457: v += 25;
458: }
459: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
460: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
461: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
462: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
463: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
464: idiag += 25;
465: i2 += 5;
466: }
467: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
468: PetscLogFlops(25*(a->nz));
469: }
470: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
471: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
472: i2 = 0;
473: mdiag = a->idiag+25*a->mbs;
474: for (i=0; i<m; i++) {
475: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
476: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
477: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
478: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
479: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
480: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
481: mdiag += 25;
482: i2 += 5;
483: }
484: PetscLogFlops(45*m);
485: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
486: PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
487: }
488: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489: idiag = a->idiag+25*a->mbs - 25;
490: i2 = 5*m - 5;
491: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
492: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
493: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
494: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
495: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
496: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
497: idiag -= 25;
498: i2 -= 5;
499: for (i=m-2; i>=0; i--) {
500: v = aa + 25*(diag[i]+1);
501: vi = aj + diag[i] + 1;
502: nz = ai[i+1] - diag[i] - 1;
503: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
504: while (nz--) {
505: idx = 5*(*vi++);
506: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
507: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
508: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
509: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
510: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
511: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
512: v += 25;
513: }
514: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
515: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
516: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
517: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
518: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
519: idiag -= 25;
520: i2 -= 5;
521: }
522: PetscLogFlops(25*(a->nz));
523: }
524: } else {
525: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
526: }
527: VecRestoreArray(xx,&x);
528: VecRestoreArray(bb,(PetscScalar**)&b);
529: return(0);
530: }
532: /*
533: Special version for Fun3d sequential benchmark
534: */
535: #if defined(PETSC_HAVE_FORTRAN_CAPS)
536: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
537: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
538: #define matsetvaluesblocked4_ matsetvaluesblocked4
539: #endif
544: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
545: {
546: Mat A = *AA;
547: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
548: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
549: PetscInt *ai=a->i,*ailen=a->ilen;
550: PetscInt *aj=a->j,stepval;
551: const PetscScalar *value = v;
552: MatScalar *ap,*aa = a->a,*bap;
555: stepval = (n-1)*4;
556: for (k=0; k<m; k++) { /* loop over added rows */
557: row = im[k];
558: rp = aj + ai[row];
559: ap = aa + 16*ai[row];
560: nrow = ailen[row];
561: low = 0;
562: for (l=0; l<n; l++) { /* loop over added columns */
563: col = in[l];
564: value = v + k*(stepval+4)*4 + l*4;
565: low = 0; high = nrow;
566: while (high-low > 7) {
567: t = (low+high)/2;
568: if (rp[t] > col) high = t;
569: else low = t;
570: }
571: for (i=low; i<high; i++) {
572: if (rp[i] > col) break;
573: if (rp[i] == col) {
574: bap = ap + 16*i;
575: for (ii=0; ii<4; ii++,value+=stepval) {
576: for (jj=ii; jj<16; jj+=4) {
577: bap[jj] += *value++;
578: }
579: }
580: goto noinsert2;
581: }
582: }
583: N = nrow++ - 1;
584: /* shift up all the later entries in this row */
585: for (ii=N; ii>=i; ii--) {
586: rp[ii+1] = rp[ii];
587: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
588: }
589: if (N >= i) {
590: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
591: }
592: rp[i] = col;
593: bap = ap + 16*i;
594: for (ii=0; ii<4; ii++,value+=stepval) {
595: for (jj=ii; jj<16; jj+=4) {
596: bap[jj] = *value++;
597: }
598: }
599: noinsert2:;
600: low = i;
601: }
602: ailen[row] = nrow;
603: }
604: }
607: #if defined(PETSC_HAVE_FORTRAN_CAPS)
608: #define matsetvalues4_ MATSETVALUES4
609: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
610: #define matsetvalues4_ matsetvalues4
611: #endif
616: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
617: {
618: Mat A = *AA;
619: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
621: PetscInt *ai=a->i,*ailen=a->ilen;
622: PetscInt *aj=a->j,brow,bcol;
623: PetscInt ridx,cidx;
624: MatScalar *ap,value,*aa=a->a,*bap;
625:
627: for (k=0; k<m; k++) { /* loop over added rows */
628: row = im[k]; brow = row/4;
629: rp = aj + ai[brow];
630: ap = aa + 16*ai[brow];
631: nrow = ailen[brow];
632: low = 0;
633: for (l=0; l<n; l++) { /* loop over added columns */
634: col = in[l]; bcol = col/4;
635: ridx = row % 4; cidx = col % 4;
636: value = v[l + k*n];
637: low = 0; high = nrow;
638: while (high-low > 7) {
639: t = (low+high)/2;
640: if (rp[t] > bcol) high = t;
641: else low = t;
642: }
643: for (i=low; i<high; i++) {
644: if (rp[i] > bcol) break;
645: if (rp[i] == bcol) {
646: bap = ap + 16*i + 4*cidx + ridx;
647: *bap += value;
648: goto noinsert1;
649: }
650: }
651: N = nrow++ - 1;
652: /* shift up all the later entries in this row */
653: for (ii=N; ii>=i; ii--) {
654: rp[ii+1] = rp[ii];
655: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
656: }
657: if (N>=i) {
658: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
659: }
660: rp[i] = bcol;
661: ap[16*i + 4*cidx + ridx] = value;
662: noinsert1:;
663: low = i;
664: }
665: ailen[brow] = nrow;
666: }
667: }
670: /* UGLY, ugly, ugly
671: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
672: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
673: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
674: converts the entries into single precision and then calls ..._MatScalar() to put them
675: into the single precision data structures.
676: */
677: #if defined(PETSC_USE_MAT_SINGLE)
678: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
679: #else
680: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
681: #endif
683: #define CHUNKSIZE 10
685: /*
686: Checks for missing diagonals
687: */
690: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
691: {
692: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
694: PetscInt *diag,*jj = a->j,i;
697: MatMarkDiagonal_SeqBAIJ(A);
698: diag = a->diag;
699: for (i=0; i<a->mbs; i++) {
700: if (jj[diag[i]] != i) {
701: SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
702: }
703: }
704: return(0);
705: }
709: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
710: {
711: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
713: PetscInt i,j,*diag,m = a->mbs;
716: if (a->diag) return(0);
718: PetscMalloc((m+1)*sizeof(PetscInt),&diag);
719: PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));
720: for (i=0; i<m; i++) {
721: diag[i] = a->i[i+1];
722: for (j=a->i[i]; j<a->i[i+1]; j++) {
723: if (a->j[j] == i) {
724: diag[i] = j;
725: break;
726: }
727: }
728: }
729: a->diag = diag;
730: return(0);
731: }
734: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
738: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
739: {
740: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
742: PetscInt n = a->mbs,i;
745: *nn = n;
746: if (!ia) return(0);
747: if (symmetric) {
748: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
749: } else if (oshift == 1) {
750: /* temporarily add 1 to i and j indices */
751: PetscInt nz = a->i[n];
752: for (i=0; i<nz; i++) a->j[i]++;
753: for (i=0; i<n+1; i++) a->i[i]++;
754: *ia = a->i; *ja = a->j;
755: } else {
756: *ia = a->i; *ja = a->j;
757: }
759: return(0);
760: }
764: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
765: {
766: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
768: PetscInt i,n = a->mbs;
771: if (!ia) return(0);
772: if (symmetric) {
773: PetscFree(*ia);
774: PetscFree(*ja);
775: } else if (oshift == 1) {
776: PetscInt nz = a->i[n]-1;
777: for (i=0; i<nz; i++) a->j[i]--;
778: for (i=0; i<n+1; i++) a->i[i]--;
779: }
780: return(0);
781: }
785: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
786: {
787: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
791: #if defined(PETSC_USE_LOG)
792: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
793: #endif
794: PetscFree(a->a);
795: if (!a->singlemalloc) {
796: PetscFree(a->i);
797: PetscFree(a->j);
798: }
799: if (a->row) {
800: ISDestroy(a->row);
801: }
802: if (a->col) {
803: ISDestroy(a->col);
804: }
805: if (a->diag) {PetscFree(a->diag);}
806: if (a->idiag) {PetscFree(a->idiag);}
807: if (a->ilen) {PetscFree(a->ilen);}
808: if (a->imax) {PetscFree(a->imax);}
809: if (a->solve_work) {PetscFree(a->solve_work);}
810: if (a->mult_work) {PetscFree(a->mult_work);}
811: if (a->icol) {ISDestroy(a->icol);}
812: if (a->saved_values) {PetscFree(a->saved_values);}
813: #if defined(PETSC_USE_MAT_SINGLE)
814: if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
815: #endif
816: if (a->xtoy) {PetscFree(a->xtoy);}
818: PetscFree(a);
820: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
821: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
822: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
823: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
824: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
825: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
826: return(0);
827: }
831: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
832: {
833: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
836: switch (op) {
837: case MAT_ROW_ORIENTED:
838: a->roworiented = PETSC_TRUE;
839: break;
840: case MAT_COLUMN_ORIENTED:
841: a->roworiented = PETSC_FALSE;
842: break;
843: case MAT_COLUMNS_SORTED:
844: a->sorted = PETSC_TRUE;
845: break;
846: case MAT_COLUMNS_UNSORTED:
847: a->sorted = PETSC_FALSE;
848: break;
849: case MAT_KEEP_ZEROED_ROWS:
850: a->keepzeroedrows = PETSC_TRUE;
851: break;
852: case MAT_NO_NEW_NONZERO_LOCATIONS:
853: a->nonew = 1;
854: break;
855: case MAT_NEW_NONZERO_LOCATION_ERR:
856: a->nonew = -1;
857: break;
858: case MAT_NEW_NONZERO_ALLOCATION_ERR:
859: a->nonew = -2;
860: break;
861: case MAT_YES_NEW_NONZERO_LOCATIONS:
862: a->nonew = 0;
863: break;
864: case MAT_ROWS_SORTED:
865: case MAT_ROWS_UNSORTED:
866: case MAT_YES_NEW_DIAGONALS:
867: case MAT_IGNORE_OFF_PROC_ENTRIES:
868: case MAT_USE_HASH_TABLE:
869: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
870: break;
871: case MAT_NO_NEW_DIAGONALS:
872: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
873: case MAT_SYMMETRIC:
874: case MAT_STRUCTURALLY_SYMMETRIC:
875: case MAT_NOT_SYMMETRIC:
876: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
877: case MAT_HERMITIAN:
878: case MAT_NOT_HERMITIAN:
879: case MAT_SYMMETRY_ETERNAL:
880: case MAT_NOT_SYMMETRY_ETERNAL:
881: break;
882: default:
883: SETERRQ(PETSC_ERR_SUP,"unknown option");
884: }
885: return(0);
886: }
890: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
891: {
892: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
894: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
895: MatScalar *aa,*aa_i;
896: PetscScalar *v_i;
899: bs = A->bs;
900: ai = a->i;
901: aj = a->j;
902: aa = a->a;
903: bs2 = a->bs2;
904:
905: if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
906:
907: bn = row/bs; /* Block number */
908: bp = row % bs; /* Block Position */
909: M = ai[bn+1] - ai[bn];
910: *nz = bs*M;
911:
912: if (v) {
913: *v = 0;
914: if (*nz) {
915: PetscMalloc((*nz)*sizeof(PetscScalar),v);
916: for (i=0; i<M; i++) { /* for each block in the block row */
917: v_i = *v + i*bs;
918: aa_i = aa + bs2*(ai[bn] + i);
919: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
920: }
921: }
922: }
924: if (idx) {
925: *idx = 0;
926: if (*nz) {
927: PetscMalloc((*nz)*sizeof(PetscInt),idx);
928: for (i=0; i<M; i++) { /* for each block in the block row */
929: idx_i = *idx + i*bs;
930: itmp = bs*aj[ai[bn] + i];
931: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
932: }
933: }
934: }
935: return(0);
936: }
940: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
941: {
945: if (idx) {if (*idx) {PetscFree(*idx);}}
946: if (v) {if (*v) {PetscFree(*v);}}
947: return(0);
948: }
952: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
953: {
954: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
955: Mat C;
957: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
958: PetscInt *rows,*cols,bs2=a->bs2;
959: PetscScalar *array;
962: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
963: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
964: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
966: #if defined(PETSC_USE_MAT_SINGLE)
967: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
968: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
969: #else
970: array = a->a;
971: #endif
973: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
974: MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);
975: MatSetType(C,A->type_name);
976: MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);
977: PetscFree(col);
978: PetscMalloc(2*bs*sizeof(PetscInt),&rows);
979: cols = rows + bs;
980: for (i=0; i<mbs; i++) {
981: cols[0] = i*bs;
982: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
983: len = ai[i+1] - ai[i];
984: for (j=0; j<len; j++) {
985: rows[0] = (*aj++)*bs;
986: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
987: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
988: array += bs2;
989: }
990: }
991: PetscFree(rows);
992: #if defined(PETSC_USE_MAT_SINGLE)
993: PetscFree(array);
994: #endif
995:
996: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
997: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
998:
999: if (B) {
1000: *B = C;
1001: } else {
1002: MatHeaderCopy(A,C);
1003: }
1004: return(0);
1005: }
1009: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1010: {
1011: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1013: PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1014: int fd;
1015: PetscScalar *aa;
1016: FILE *file;
1019: PetscViewerBinaryGetDescriptor(viewer,&fd);
1020: PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);
1021: col_lens[0] = MAT_FILE_COOKIE;
1023: col_lens[1] = A->m;
1024: col_lens[2] = A->n;
1025: col_lens[3] = a->nz*bs2;
1027: /* store lengths of each row and write (including header) to file */
1028: count = 0;
1029: for (i=0; i<a->mbs; i++) {
1030: for (j=0; j<bs; j++) {
1031: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1032: }
1033: }
1034: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);
1035: PetscFree(col_lens);
1037: /* store column indices (zero start index) */
1038: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1039: count = 0;
1040: for (i=0; i<a->mbs; i++) {
1041: for (j=0; j<bs; j++) {
1042: for (k=a->i[i]; k<a->i[i+1]; k++) {
1043: for (l=0; l<bs; l++) {
1044: jj[count++] = bs*a->j[k] + l;
1045: }
1046: }
1047: }
1048: }
1049: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1050: PetscFree(jj);
1052: /* store nonzero values */
1053: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1054: count = 0;
1055: for (i=0; i<a->mbs; i++) {
1056: for (j=0; j<bs; j++) {
1057: for (k=a->i[i]; k<a->i[i+1]; k++) {
1058: for (l=0; l<bs; l++) {
1059: aa[count++] = a->a[bs2*k + l*bs + j];
1060: }
1061: }
1062: }
1063: }
1064: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1065: PetscFree(aa);
1067: PetscViewerBinaryGetInfoPointer(viewer,&file);
1068: if (file) {
1069: fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1070: }
1071: return(0);
1072: }
1076: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1077: {
1078: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1079: PetscErrorCode ierr;
1080: PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2;
1081: PetscViewerFormat format;
1084: PetscViewerGetFormat(viewer,&format);
1085: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1086: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1087: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1088: Mat aij;
1089: MatConvert(A,MATSEQAIJ,&aij);
1090: MatView(aij,viewer);
1091: MatDestroy(aij);
1092: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093: return(0);
1094: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1095: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1096: for (i=0; i<a->mbs; i++) {
1097: for (j=0; j<bs; j++) {
1098: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1099: for (k=a->i[i]; k<a->i[i+1]; k++) {
1100: for (l=0; l<bs; l++) {
1101: #if defined(PETSC_USE_COMPLEX)
1102: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1103: PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1104: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1105: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1106: PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1107: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1108: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1109: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1110: }
1111: #else
1112: if (a->a[bs2*k + l*bs + j] != 0.0) {
1113: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1114: }
1115: #endif
1116: }
1117: }
1118: PetscViewerASCIIPrintf(viewer,"\n");
1119: }
1120: }
1121: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1122: } else {
1123: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1124: for (i=0; i<a->mbs; i++) {
1125: for (j=0; j<bs; j++) {
1126: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1127: for (k=a->i[i]; k<a->i[i+1]; k++) {
1128: for (l=0; l<bs; l++) {
1129: #if defined(PETSC_USE_COMPLEX)
1130: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1131: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1132: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1133: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1134: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1135: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1136: } else {
1137: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1138: }
1139: #else
1140: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1141: #endif
1142: }
1143: }
1144: PetscViewerASCIIPrintf(viewer,"\n");
1145: }
1146: }
1147: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1148: }
1149: PetscViewerFlush(viewer);
1150: return(0);
1151: }
1155: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1156: {
1157: Mat A = (Mat) Aa;
1158: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1160: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1161: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1162: MatScalar *aa;
1163: PetscViewer viewer;
1167: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1168: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1170: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1172: /* loop over matrix elements drawing boxes */
1173: color = PETSC_DRAW_BLUE;
1174: for (i=0,row=0; i<mbs; i++,row+=bs) {
1175: for (j=a->i[i]; j<a->i[i+1]; j++) {
1176: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1177: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1178: aa = a->a + j*bs2;
1179: for (k=0; k<bs; k++) {
1180: for (l=0; l<bs; l++) {
1181: if (PetscRealPart(*aa++) >= 0.) continue;
1182: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1183: }
1184: }
1185: }
1186: }
1187: color = PETSC_DRAW_CYAN;
1188: for (i=0,row=0; i<mbs; i++,row+=bs) {
1189: for (j=a->i[i]; j<a->i[i+1]; j++) {
1190: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1191: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1192: aa = a->a + j*bs2;
1193: for (k=0; k<bs; k++) {
1194: for (l=0; l<bs; l++) {
1195: if (PetscRealPart(*aa++) != 0.) continue;
1196: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1197: }
1198: }
1199: }
1200: }
1202: color = PETSC_DRAW_RED;
1203: for (i=0,row=0; i<mbs; i++,row+=bs) {
1204: for (j=a->i[i]; j<a->i[i+1]; j++) {
1205: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1206: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1207: aa = a->a + j*bs2;
1208: for (k=0; k<bs; k++) {
1209: for (l=0; l<bs; l++) {
1210: if (PetscRealPart(*aa++) <= 0.) continue;
1211: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1212: }
1213: }
1214: }
1215: }
1216: return(0);
1217: }
1221: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1222: {
1224: PetscReal xl,yl,xr,yr,w,h;
1225: PetscDraw draw;
1226: PetscTruth isnull;
1230: PetscViewerDrawGetDraw(viewer,0,&draw);
1231: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1233: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1234: xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1235: xr += w; yr += h; xl = -w; yl = -h;
1236: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1237: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1238: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1239: return(0);
1240: }
1244: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1245: {
1247: PetscTruth iascii,isbinary,isdraw;
1250: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1251: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1252: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1253: if (iascii){
1254: MatView_SeqBAIJ_ASCII(A,viewer);
1255: } else if (isbinary) {
1256: MatView_SeqBAIJ_Binary(A,viewer);
1257: } else if (isdraw) {
1258: MatView_SeqBAIJ_Draw(A,viewer);
1259: } else {
1260: Mat B;
1261: MatConvert(A,MATSEQAIJ,&B);
1262: MatView(B,viewer);
1263: MatDestroy(B);
1264: }
1265: return(0);
1266: }
1271: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1272: {
1273: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1274: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1275: PetscInt *ai = a->i,*ailen = a->ilen;
1276: PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1277: MatScalar *ap,*aa = a->a,zero = 0.0;
1280: for (k=0; k<m; k++) { /* loop over rows */
1281: row = im[k]; brow = row/bs;
1282: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1283: if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1284: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1285: nrow = ailen[brow];
1286: for (l=0; l<n; l++) { /* loop over columns */
1287: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1288: if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1289: col = in[l] ;
1290: bcol = col/bs;
1291: cidx = col%bs;
1292: ridx = row%bs;
1293: high = nrow;
1294: low = 0; /* assume unsorted */
1295: while (high-low > 5) {
1296: t = (low+high)/2;
1297: if (rp[t] > bcol) high = t;
1298: else low = t;
1299: }
1300: for (i=low; i<high; i++) {
1301: if (rp[i] > bcol) break;
1302: if (rp[i] == bcol) {
1303: *v++ = ap[bs2*i+bs*cidx+ridx];
1304: goto finished;
1305: }
1306: }
1307: *v++ = zero;
1308: finished:;
1309: }
1310: }
1311: return(0);
1312: }
1314: #if defined(PETSC_USE_MAT_SINGLE)
1317: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1318: {
1319: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1321: PetscInt i,N = m*n*b->bs2;
1322: MatScalar *vsingle;
1325: if (N > b->setvalueslen) {
1326: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
1327: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1328: b->setvalueslen = N;
1329: }
1330: vsingle = b->setvaluescopy;
1331: for (i=0; i<N; i++) {
1332: vsingle[i] = v[i];
1333: }
1334: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1335: return(0);
1336: }
1337: #endif
1342: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1343: {
1344: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1345: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1346: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1347: PetscErrorCode ierr;
1348: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1349: PetscTruth roworiented=a->roworiented;
1350: const MatScalar *value = v;
1351: MatScalar *ap,*aa = a->a,*bap;
1354: if (roworiented) {
1355: stepval = (n-1)*bs;
1356: } else {
1357: stepval = (m-1)*bs;
1358: }
1359: for (k=0; k<m; k++) { /* loop over added rows */
1360: row = im[k];
1361: if (row < 0) continue;
1362: #if defined(PETSC_USE_BOPT_g)
1363: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1364: #endif
1365: rp = aj + ai[row];
1366: ap = aa + bs2*ai[row];
1367: rmax = imax[row];
1368: nrow = ailen[row];
1369: low = 0;
1370: for (l=0; l<n; l++) { /* loop over added columns */
1371: if (in[l] < 0) continue;
1372: #if defined(PETSC_USE_BOPT_g)
1373: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1374: #endif
1375: col = in[l];
1376: if (roworiented) {
1377: value = v + k*(stepval+bs)*bs + l*bs;
1378: } else {
1379: value = v + l*(stepval+bs)*bs + k*bs;
1380: }
1381: if (!sorted) low = 0; high = nrow;
1382: while (high-low > 7) {
1383: t = (low+high)/2;
1384: if (rp[t] > col) high = t;
1385: else low = t;
1386: }
1387: for (i=low; i<high; i++) {
1388: if (rp[i] > col) break;
1389: if (rp[i] == col) {
1390: bap = ap + bs2*i;
1391: if (roworiented) {
1392: if (is == ADD_VALUES) {
1393: for (ii=0; ii<bs; ii++,value+=stepval) {
1394: for (jj=ii; jj<bs2; jj+=bs) {
1395: bap[jj] += *value++;
1396: }
1397: }
1398: } else {
1399: for (ii=0; ii<bs; ii++,value+=stepval) {
1400: for (jj=ii; jj<bs2; jj+=bs) {
1401: bap[jj] = *value++;
1402: }
1403: }
1404: }
1405: } else {
1406: if (is == ADD_VALUES) {
1407: for (ii=0; ii<bs; ii++,value+=stepval) {
1408: for (jj=0; jj<bs; jj++) {
1409: *bap++ += *value++;
1410: }
1411: }
1412: } else {
1413: for (ii=0; ii<bs; ii++,value+=stepval) {
1414: for (jj=0; jj<bs; jj++) {
1415: *bap++ = *value++;
1416: }
1417: }
1418: }
1419: }
1420: goto noinsert2;
1421: }
1422: }
1423: if (nonew == 1) goto noinsert2;
1424: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1425: if (nrow >= rmax) {
1426: /* there is no extra room in row, therefore enlarge */
1427: PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1428: MatScalar *new_a;
1430: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1432: /* malloc new storage space */
1433: len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1434: PetscMalloc(len,&new_a);
1435: new_j = (PetscInt*)(new_a + bs2*new_nz);
1436: new_i = new_j + new_nz;
1438: /* copy over old data into new slots */
1439: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1440: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1441: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));
1442: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
1443: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));
1444: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
1445: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1446: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
1447: /* free up old matrix storage */
1448: PetscFree(a->a);
1449: if (!a->singlemalloc) {
1450: PetscFree(a->i);
1451: PetscFree(a->j);
1452: }
1453: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1454: a->singlemalloc = PETSC_TRUE;
1456: rp = aj + ai[row]; ap = aa + bs2*ai[row];
1457: rmax = imax[row] = imax[row] + CHUNKSIZE;
1458: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1459: a->maxnz += bs2*CHUNKSIZE;
1460: a->reallocs++;
1461: a->nz++;
1462: }
1463: N = nrow++ - 1;
1464: /* shift up all the later entries in this row */
1465: for (ii=N; ii>=i; ii--) {
1466: rp[ii+1] = rp[ii];
1467: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1468: }
1469: if (N >= i) {
1470: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1471: }
1472: rp[i] = col;
1473: bap = ap + bs2*i;
1474: if (roworiented) {
1475: for (ii=0; ii<bs; ii++,value+=stepval) {
1476: for (jj=ii; jj<bs2; jj+=bs) {
1477: bap[jj] = *value++;
1478: }
1479: }
1480: } else {
1481: for (ii=0; ii<bs; ii++,value+=stepval) {
1482: for (jj=0; jj<bs; jj++) {
1483: *bap++ = *value++;
1484: }
1485: }
1486: }
1487: noinsert2:;
1488: low = i;
1489: }
1490: ailen[row] = nrow;
1491: }
1492: return(0);
1493: }
1497: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1498: {
1499: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1500: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1501: PetscInt m = A->m,*ip,N,*ailen = a->ilen;
1503: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1504: MatScalar *aa = a->a,*ap;
1507: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1509: if (m) rmax = ailen[0];
1510: for (i=1; i<mbs; i++) {
1511: /* move each row back by the amount of empty slots (fshift) before it*/
1512: fshift += imax[i-1] - ailen[i-1];
1513: rmax = PetscMax(rmax,ailen[i]);
1514: if (fshift) {
1515: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1516: N = ailen[i];
1517: for (j=0; j<N; j++) {
1518: ip[j-fshift] = ip[j];
1519: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1520: }
1521: }
1522: ai[i] = ai[i-1] + ailen[i-1];
1523: }
1524: if (mbs) {
1525: fshift += imax[mbs-1] - ailen[mbs-1];
1526: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1527: }
1528: /* reset ilen and imax for each row */
1529: for (i=0; i<mbs; i++) {
1530: ailen[i] = imax[i] = ai[i+1] - ai[i];
1531: }
1532: a->nz = ai[mbs];
1534: /* diagonals may have moved, so kill the diagonal pointers */
1535: a->idiagvalid = PETSC_FALSE;
1536: if (fshift && a->diag) {
1537: PetscFree(a->diag);
1538: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1539: a->diag = 0;
1540: }
1541: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2);
1542: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs);
1543: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
1544: a->reallocs = 0;
1545: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1547: return(0);
1548: }
1552: /*
1553: This function returns an array of flags which indicate the locations of contiguous
1554: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1555: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1556: Assume: sizes should be long enough to hold all the values.
1557: */
1560: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1561: {
1562: PetscInt i,j,k,row;
1563: PetscTruth flg;
1566: for (i=0,j=0; i<n; j++) {
1567: row = idx[i];
1568: if (row%bs!=0) { /* Not the begining of a block */
1569: sizes[j] = 1;
1570: i++;
1571: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1572: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1573: i++;
1574: } else { /* Begining of the block, so check if the complete block exists */
1575: flg = PETSC_TRUE;
1576: for (k=1; k<bs; k++) {
1577: if (row+k != idx[i+k]) { /* break in the block */
1578: flg = PETSC_FALSE;
1579: break;
1580: }
1581: }
1582: if (flg == PETSC_TRUE) { /* No break in the bs */
1583: sizes[j] = bs;
1584: i+= bs;
1585: } else {
1586: sizes[j] = 1;
1587: i++;
1588: }
1589: }
1590: }
1591: *bs_max = j;
1592: return(0);
1593: }
1594:
1597: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1598: {
1599: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1601: PetscInt i,j,k,count,is_n,*is_idx,*rows;
1602: PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1603: PetscScalar zero = 0.0;
1604: MatScalar *aa;
1607: /* Make a copy of the IS and sort it */
1608: ISGetLocalSize(is,&is_n);
1609: ISGetIndices(is,&is_idx);
1611: /* allocate memory for rows,sizes */
1612: PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1613: sizes = rows + is_n;
1615: /* copy IS values to rows, and sort them */
1616: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1617: PetscSortInt(is_n,rows);
1618: if (baij->keepzeroedrows) {
1619: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1620: bs_max = is_n;
1621: } else {
1622: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1623: }
1624: ISRestoreIndices(is,&is_idx);
1626: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1627: row = rows[j];
1628: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1629: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1630: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1631: if (sizes[i] == bs && !baij->keepzeroedrows) {
1632: if (diag) {
1633: if (baij->ilen[row/bs] > 0) {
1634: baij->ilen[row/bs] = 1;
1635: baij->j[baij->i[row/bs]] = row/bs;
1636: PetscMemzero(aa,count*bs*sizeof(MatScalar));
1637: }
1638: /* Now insert all the diagonal values for this bs */
1639: for (k=0; k<bs; k++) {
1640: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1641: }
1642: } else { /* (!diag) */
1643: baij->ilen[row/bs] = 0;
1644: } /* end (!diag) */
1645: } else { /* (sizes[i] != bs) */
1646: #if defined (PETSC_USE_DEBUG)
1647: if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1648: #endif
1649: for (k=0; k<count; k++) {
1650: aa[0] = zero;
1651: aa += bs;
1652: }
1653: if (diag) {
1654: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1655: }
1656: }
1657: }
1659: PetscFree(rows);
1660: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1661: return(0);
1662: }
1666: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1667: {
1668: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1669: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1670: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1671: PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1673: PetscInt ridx,cidx,bs2=a->bs2;
1674: PetscTruth roworiented=a->roworiented;
1675: MatScalar *ap,value,*aa=a->a,*bap;
1678: for (k=0; k<m; k++) { /* loop over added rows */
1679: row = im[k]; brow = row/bs;
1680: if (row < 0) continue;
1681: #if defined(PETSC_USE_BOPT_g)
1682: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1683: #endif
1684: rp = aj + ai[brow];
1685: ap = aa + bs2*ai[brow];
1686: rmax = imax[brow];
1687: nrow = ailen[brow];
1688: low = 0;
1689: for (l=0; l<n; l++) { /* loop over added columns */
1690: if (in[l] < 0) continue;
1691: #if defined(PETSC_USE_BOPT_g)
1692: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1693: #endif
1694: col = in[l]; bcol = col/bs;
1695: ridx = row % bs; cidx = col % bs;
1696: if (roworiented) {
1697: value = v[l + k*n];
1698: } else {
1699: value = v[k + l*m];
1700: }
1701: if (!sorted) low = 0; high = nrow;
1702: while (high-low > 7) {
1703: t = (low+high)/2;
1704: if (rp[t] > bcol) high = t;
1705: else low = t;
1706: }
1707: for (i=low; i<high; i++) {
1708: if (rp[i] > bcol) break;
1709: if (rp[i] == bcol) {
1710: bap = ap + bs2*i + bs*cidx + ridx;
1711: if (is == ADD_VALUES) *bap += value;
1712: else *bap = value;
1713: goto noinsert1;
1714: }
1715: }
1716: if (nonew == 1) goto noinsert1;
1717: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1718: if (nrow >= rmax) {
1719: /* there is no extra room in row, therefore enlarge */
1720: PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1721: MatScalar *new_a;
1723: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1725: /* Malloc new storage space */
1726: len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
1727: PetscMalloc(len,&new_a);
1728: new_j = (PetscInt*)(new_a + bs2*new_nz);
1729: new_i = new_j + new_nz;
1731: /* copy over old data into new slots */
1732: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1733: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1734: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));
1735: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1736: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));
1737: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1738: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1739: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1740: /* free up old matrix storage */
1741: PetscFree(a->a);
1742: if (!a->singlemalloc) {
1743: PetscFree(a->i);
1744: PetscFree(a->j);
1745: }
1746: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1747: a->singlemalloc = PETSC_TRUE;
1749: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1750: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1751: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
1752: a->maxnz += bs2*CHUNKSIZE;
1753: a->reallocs++;
1754: a->nz++;
1755: }
1756: N = nrow++ - 1;
1757: /* shift up all the later entries in this row */
1758: for (ii=N; ii>=i; ii--) {
1759: rp[ii+1] = rp[ii];
1760: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1761: }
1762: if (N>=i) {
1763: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1764: }
1765: rp[i] = bcol;
1766: ap[bs2*i + bs*cidx + ridx] = value;
1767: noinsert1:;
1768: low = i;
1769: }
1770: ailen[brow] = nrow;
1771: }
1772: return(0);
1773: }
1778: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1779: {
1780: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1781: Mat outA;
1783: PetscTruth row_identity,col_identity;
1786: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1787: ISIdentity(row,&row_identity);
1788: ISIdentity(col,&col_identity);
1789: if (!row_identity || !col_identity) {
1790: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1791: }
1793: outA = inA;
1794: inA->factor = FACTOR_LU;
1796: if (!a->diag) {
1797: MatMarkDiagonal_SeqBAIJ(inA);
1798: }
1800: a->row = row;
1801: a->col = col;
1802: PetscObjectReference((PetscObject)row);
1803: PetscObjectReference((PetscObject)col);
1804:
1805: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1806: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1807: PetscLogObjectParent(inA,a->icol);
1808:
1809: /*
1810: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1811: for ILU(0) factorization with natural ordering
1812: */
1813: if (inA->bs < 8) {
1814: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1815: } else {
1816: if (!a->solve_work) {
1817: PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);
1818: PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));
1819: }
1820: }
1822: MatLUFactorNumeric(inA,&outA);
1824: return(0);
1825: }
1828: PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1829: {
1830: static PetscTruth called = PETSC_FALSE;
1831: MPI_Comm comm = A->comm;
1832: PetscErrorCode ierr;
1835: if (called) {return(0);} else called = PETSC_TRUE;
1836: (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1837: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");
1838: return(0);
1839: }
1844: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1845: {
1846: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1847: PetscInt i,nz,nbs;
1850: nz = baij->maxnz/baij->bs2;
1851: nbs = baij->nbs;
1852: for (i=0; i<nz; i++) {
1853: baij->j[i] = indices[i];
1854: }
1855: baij->nz = nz;
1856: for (i=0; i<nbs; i++) {
1857: baij->ilen[i] = baij->imax[i];
1858: }
1860: return(0);
1861: }
1866: /*@
1867: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1868: in the matrix.
1870: Input Parameters:
1871: + mat - the SeqBAIJ matrix
1872: - indices - the column indices
1874: Level: advanced
1876: Notes:
1877: This can be called if you have precomputed the nonzero structure of the
1878: matrix and want to provide it to the matrix object to improve the performance
1879: of the MatSetValues() operation.
1881: You MUST have set the correct numbers of nonzeros per row in the call to
1882: MatCreateSeqBAIJ().
1884: MUST be called before any calls to MatSetValues();
1886: @*/
1887: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1888: {
1889: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1894: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1895: if (f) {
1896: (*f)(mat,indices);
1897: } else {
1898: SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1899: }
1900: return(0);
1901: }
1905: PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1906: {
1907: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1909: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
1910: PetscReal atmp;
1911: PetscScalar *x,zero = 0.0;
1912: MatScalar *aa;
1913: PetscInt ncols,brow,krow,kcol;
1916: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1917: bs = A->bs;
1918: aa = a->a;
1919: ai = a->i;
1920: aj = a->j;
1921: mbs = a->mbs;
1923: VecSet(&zero,v);
1924: VecGetArray(v,&x);
1925: VecGetLocalSize(v,&n);
1926: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1927: for (i=0; i<mbs; i++) {
1928: ncols = ai[1] - ai[0]; ai++;
1929: brow = bs*i;
1930: for (j=0; j<ncols; j++){
1931: /* bcol = bs*(*aj); */
1932: for (kcol=0; kcol<bs; kcol++){
1933: for (krow=0; krow<bs; krow++){
1934: atmp = PetscAbsScalar(*aa); aa++;
1935: row = brow + krow; /* row index */
1936: /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1937: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1938: }
1939: }
1940: aj++;
1941: }
1942: }
1943: VecRestoreArray(v,&x);
1944: return(0);
1945: }
1949: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1950: {
1954: MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1955: return(0);
1956: }
1960: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1961: {
1962: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1964: *array = a->a;
1965: return(0);
1966: }
1970: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1971: {
1973: return(0);
1974: }
1976: #include petscblaslapack.h
1979: PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1980: {
1981: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1983: PetscInt i,bs=Y->bs,j,bs2;
1984: PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz;
1987: if (str == SAME_NONZERO_PATTERN) {
1988: BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1989: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1990: if (y->xtoy && y->XtoY != X) {
1991: PetscFree(y->xtoy);
1992: MatDestroy(y->XtoY);
1993: }
1994: if (!y->xtoy) { /* get xtoy */
1995: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1996: y->XtoY = X;
1997: }
1998: bs2 = bs*bs;
1999: for (i=0; i<x->nz; i++) {
2000: j = 0;
2001: while (j < bs2){
2002: y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2003: j++;
2004: }
2005: }
2006: PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2007: } else {
2008: MatAXPY_Basic(a,X,Y,str);
2009: }
2010: return(0);
2011: }
2013: /* -------------------------------------------------------------------*/
2014: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2015: MatGetRow_SeqBAIJ,
2016: MatRestoreRow_SeqBAIJ,
2017: MatMult_SeqBAIJ_N,
2018: /* 4*/ MatMultAdd_SeqBAIJ_N,
2019: MatMultTranspose_SeqBAIJ,
2020: MatMultTransposeAdd_SeqBAIJ,
2021: MatSolve_SeqBAIJ_N,
2022: 0,
2023: 0,
2024: /*10*/ 0,
2025: MatLUFactor_SeqBAIJ,
2026: 0,
2027: 0,
2028: MatTranspose_SeqBAIJ,
2029: /*15*/ MatGetInfo_SeqBAIJ,
2030: MatEqual_SeqBAIJ,
2031: MatGetDiagonal_SeqBAIJ,
2032: MatDiagonalScale_SeqBAIJ,
2033: MatNorm_SeqBAIJ,
2034: /*20*/ 0,
2035: MatAssemblyEnd_SeqBAIJ,
2036: 0,
2037: MatSetOption_SeqBAIJ,
2038: MatZeroEntries_SeqBAIJ,
2039: /*25*/ MatZeroRows_SeqBAIJ,
2040: MatLUFactorSymbolic_SeqBAIJ,
2041: MatLUFactorNumeric_SeqBAIJ_N,
2042: 0,
2043: 0,
2044: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2045: MatILUFactorSymbolic_SeqBAIJ,
2046: 0,
2047: MatGetArray_SeqBAIJ,
2048: MatRestoreArray_SeqBAIJ,
2049: /*35*/ MatDuplicate_SeqBAIJ,
2050: 0,
2051: 0,
2052: MatILUFactor_SeqBAIJ,
2053: 0,
2054: /*40*/ MatAXPY_SeqBAIJ,
2055: MatGetSubMatrices_SeqBAIJ,
2056: MatIncreaseOverlap_SeqBAIJ,
2057: MatGetValues_SeqBAIJ,
2058: 0,
2059: /*45*/ MatPrintHelp_SeqBAIJ,
2060: MatScale_SeqBAIJ,
2061: 0,
2062: 0,
2063: 0,
2064: /*50*/ 0,
2065: MatGetRowIJ_SeqBAIJ,
2066: MatRestoreRowIJ_SeqBAIJ,
2067: 0,
2068: 0,
2069: /*55*/ 0,
2070: 0,
2071: 0,
2072: 0,
2073: MatSetValuesBlocked_SeqBAIJ,
2074: /*60*/ MatGetSubMatrix_SeqBAIJ,
2075: MatDestroy_SeqBAIJ,
2076: MatView_SeqBAIJ,
2077: MatGetPetscMaps_Petsc,
2078: 0,
2079: /*65*/ 0,
2080: 0,
2081: 0,
2082: 0,
2083: 0,
2084: /*70*/ MatGetRowMax_SeqBAIJ,
2085: MatConvert_Basic,
2086: 0,
2087: 0,
2088: 0,
2089: /*75*/ 0,
2090: 0,
2091: 0,
2092: 0,
2093: 0,
2094: /*80*/ 0,
2095: 0,
2096: 0,
2097: 0,
2098: MatLoad_SeqBAIJ,
2099: /*85*/ 0,
2100: 0,
2101: 0,
2102: 0,
2103: 0,
2104: /*90*/ 0,
2105: 0,
2106: 0,
2107: 0,
2108: 0,
2109: /*95*/ 0,
2110: 0,
2111: 0,
2112: 0};
2117: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2118: {
2119: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2120: PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2;
2124: if (aij->nonew != 1) {
2125: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2126: }
2128: /* allocate space for values if not already there */
2129: if (!aij->saved_values) {
2130: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2131: }
2133: /* copy values over */
2134: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2135: return(0);
2136: }
2142: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2143: {
2144: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2146: PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2;
2149: if (aij->nonew != 1) {
2150: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2151: }
2152: if (!aij->saved_values) {
2153: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2154: }
2156: /* copy values over */
2157: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2158: return(0);
2159: }
2170: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2171: {
2172: Mat_SeqBAIJ *b;
2174: PetscInt i,len,mbs,nbs,bs2,newbs = bs;
2175: PetscTruth flg;
2179: B->preallocated = PETSC_TRUE;
2180: PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
2181: if (nnz && newbs != bs) {
2182: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2183: }
2184: bs = newbs;
2186: mbs = B->m/bs;
2187: nbs = B->n/bs;
2188: bs2 = bs*bs;
2190: if (mbs*bs!=B->m || nbs*bs!=B->n) {
2191: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2192: }
2194: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2195: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2196: if (nnz) {
2197: for (i=0; i<mbs; i++) {
2198: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2199: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2200: }
2201: }
2203: b = (Mat_SeqBAIJ*)B->data;
2204: PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
2205: B->ops->solve = MatSolve_SeqBAIJ_Update;
2206: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
2207: if (!flg) {
2208: switch (bs) {
2209: case 1:
2210: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2211: B->ops->mult = MatMult_SeqBAIJ_1;
2212: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2213: break;
2214: case 2:
2215: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2216: B->ops->mult = MatMult_SeqBAIJ_2;
2217: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2218: B->ops->pbrelax = MatPBRelax_SeqBAIJ_2;
2219: break;
2220: case 3:
2221: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2222: B->ops->mult = MatMult_SeqBAIJ_3;
2223: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2224: B->ops->pbrelax = MatPBRelax_SeqBAIJ_3;
2225: break;
2226: case 4:
2227: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2228: B->ops->mult = MatMult_SeqBAIJ_4;
2229: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2230: B->ops->pbrelax = MatPBRelax_SeqBAIJ_4;
2231: break;
2232: case 5:
2233: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2234: B->ops->mult = MatMult_SeqBAIJ_5;
2235: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2236: B->ops->pbrelax = MatPBRelax_SeqBAIJ_5;
2237: break;
2238: case 6:
2239: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2240: B->ops->mult = MatMult_SeqBAIJ_6;
2241: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2242: break;
2243: case 7:
2244: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2245: B->ops->mult = MatMult_SeqBAIJ_7;
2246: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2247: break;
2248: default:
2249: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2250: B->ops->mult = MatMult_SeqBAIJ_N;
2251: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2252: break;
2253: }
2254: }
2255: B->bs = bs;
2256: b->mbs = mbs;
2257: b->nbs = nbs;
2258: PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);
2259: if (!nnz) {
2260: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2261: else if (nz <= 0) nz = 1;
2262: for (i=0; i<mbs; i++) b->imax[i] = nz;
2263: nz = nz*mbs;
2264: } else {
2265: nz = 0;
2266: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2267: }
2269: /* allocate the matrix space */
2270: len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
2271: PetscMalloc(len,&b->a);
2272: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2273: b->j = (PetscInt*)(b->a + nz*bs2);
2274: PetscMemzero(b->j,nz*sizeof(PetscInt));
2275: b->i = b->j + nz;
2276: b->singlemalloc = PETSC_TRUE;
2278: b->i[0] = 0;
2279: for (i=1; i<mbs+1; i++) {
2280: b->i[i] = b->i[i-1] + b->imax[i-1];
2281: }
2283: /* b->ilen will count nonzeros in each block row so far. */
2284: PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);
2285: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2286: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2288: B->bs = bs;
2289: b->bs2 = bs2;
2290: b->mbs = mbs;
2291: b->nz = 0;
2292: b->maxnz = nz*bs2;
2293: B->info.nz_unneeded = (PetscReal)b->maxnz;
2294: return(0);
2295: }
2298: /*MC
2299: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2300: block sparse compressed row format.
2302: Options Database Keys:
2303: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2305: Level: beginner
2307: .seealso: MatCreateSeqBAIJ
2308: M*/
2313: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2314: {
2316: PetscMPIInt size;
2317: Mat_SeqBAIJ *b;
2320: MPI_Comm_size(B->comm,&size);
2321: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2323: B->m = B->M = PetscMax(B->m,B->M);
2324: B->n = B->N = PetscMax(B->n,B->N);
2325: PetscNew(Mat_SeqBAIJ,&b);
2326: B->data = (void*)b;
2327: PetscMemzero(b,sizeof(Mat_SeqBAIJ));
2328: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2329: B->factor = 0;
2330: B->lupivotthreshold = 1.0;
2331: B->mapping = 0;
2332: b->row = 0;
2333: b->col = 0;
2334: b->icol = 0;
2335: b->reallocs = 0;
2336: b->saved_values = 0;
2337: #if defined(PETSC_USE_MAT_SINGLE)
2338: b->setvalueslen = 0;
2339: b->setvaluescopy = PETSC_NULL;
2340: #endif
2342: PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
2343: PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);
2345: b->sorted = PETSC_FALSE;
2346: b->roworiented = PETSC_TRUE;
2347: b->nonew = 0;
2348: b->diag = 0;
2349: b->solve_work = 0;
2350: b->mult_work = 0;
2351: B->spptr = 0;
2352: B->info.nz_unneeded = (PetscReal)b->maxnz;
2353: b->keepzeroedrows = PETSC_FALSE;
2354: b->xtoy = 0;
2355: b->XtoY = 0;
2357: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2358: "MatStoreValues_SeqBAIJ",
2359: MatStoreValues_SeqBAIJ);
2360: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2361: "MatRetrieveValues_SeqBAIJ",
2362: MatRetrieveValues_SeqBAIJ);
2363: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2364: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2365: MatSeqBAIJSetColumnIndices_SeqBAIJ);
2366: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2367: "MatConvert_SeqBAIJ_SeqAIJ",
2368: MatConvert_SeqBAIJ_SeqAIJ);
2369: #if !defined(PETSC_USE_64BIT_INT)
2370: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2371: "MatConvert_SeqBAIJ_SeqSBAIJ",
2372: MatConvert_SeqBAIJ_SeqSBAIJ);
2373: #endif
2374: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2375: "MatSeqBAIJSetPreallocation_SeqBAIJ",
2376: MatSeqBAIJSetPreallocation_SeqBAIJ);
2377: return(0);
2378: }
2383: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2384: {
2385: Mat C;
2386: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2388: PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2391: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2393: *B = 0;
2394: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
2395: MatSetType(C,A->type_name);
2396: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2397: c = (Mat_SeqBAIJ*)C->data;
2399: C->M = A->M;
2400: C->N = A->N;
2401: C->bs = A->bs;
2402: c->bs2 = a->bs2;
2403: c->mbs = a->mbs;
2404: c->nbs = a->nbs;
2406: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);
2407: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);
2408: for (i=0; i<mbs; i++) {
2409: c->imax[i] = a->imax[i];
2410: c->ilen[i] = a->ilen[i];
2411: }
2413: /* allocate the matrix space */
2414: c->singlemalloc = PETSC_TRUE;
2415: len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
2416: PetscMalloc(len,&c->a);
2417: c->j = (PetscInt*)(c->a + nz*bs2);
2418: c->i = c->j + nz;
2419: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2420: if (mbs > 0) {
2421: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2422: if (cpvalues == MAT_COPY_VALUES) {
2423: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2424: } else {
2425: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2426: }
2427: }
2429: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2430: c->sorted = a->sorted;
2431: c->roworiented = a->roworiented;
2432: c->nonew = a->nonew;
2434: if (a->diag) {
2435: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2436: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2437: for (i=0; i<mbs; i++) {
2438: c->diag[i] = a->diag[i];
2439: }
2440: } else c->diag = 0;
2441: c->nz = a->nz;
2442: c->maxnz = a->maxnz;
2443: c->solve_work = 0;
2444: c->mult_work = 0;
2445: C->preallocated = PETSC_TRUE;
2446: C->assembled = PETSC_TRUE;
2447: *B = C;
2448: PetscFListDuplicate(A->qlist,&C->qlist);
2449: return(0);
2450: }
2454: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2455: {
2456: Mat_SeqBAIJ *a;
2457: Mat B;
2459: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
2460: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2461: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
2462: PetscInt *masked,nmask,tmp,bs2,ishift;
2463: PetscMPIInt size;
2464: int fd;
2465: PetscScalar *aa;
2466: MPI_Comm comm = ((PetscObject)viewer)->comm;
2469: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
2470: bs2 = bs*bs;
2472: MPI_Comm_size(comm,&size);
2473: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2474: PetscViewerBinaryGetDescriptor(viewer,&fd);
2475: PetscBinaryRead(fd,header,4,PETSC_INT);
2476: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2477: M = header[1]; N = header[2]; nz = header[3];
2479: if (header[3] < 0) {
2480: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2481: }
2483: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2485: /*
2486: This code adds extra rows to make sure the number of rows is
2487: divisible by the blocksize
2488: */
2489: mbs = M/bs;
2490: extra_rows = bs - M + bs*(mbs);
2491: if (extra_rows == bs) extra_rows = 0;
2492: else mbs++;
2493: if (extra_rows) {
2494: PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2495: }
2497: /* read in row lengths */
2498: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2499: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2500: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2502: /* read in column indices */
2503: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2504: PetscBinaryRead(fd,jj,nz,PETSC_INT);
2505: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2507: /* loop over row lengths determining block row lengths */
2508: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2509: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2510: PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2511: PetscMemzero(mask,mbs*sizeof(PetscInt));
2512: masked = mask + mbs;
2513: rowcount = 0; nzcount = 0;
2514: for (i=0; i<mbs; i++) {
2515: nmask = 0;
2516: for (j=0; j<bs; j++) {
2517: kmax = rowlengths[rowcount];
2518: for (k=0; k<kmax; k++) {
2519: tmp = jj[nzcount++]/bs;
2520: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2521: }
2522: rowcount++;
2523: }
2524: browlengths[i] += nmask;
2525: /* zero out the mask elements we set */
2526: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2527: }
2529: /* create our matrix */
2530: MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2531: MatSetType(B,type);
2532: MatSeqBAIJSetPreallocation(B,bs,0,browlengths);
2533: a = (Mat_SeqBAIJ*)B->data;
2535: /* set matrix "i" values */
2536: a->i[0] = 0;
2537: for (i=1; i<= mbs; i++) {
2538: a->i[i] = a->i[i-1] + browlengths[i-1];
2539: a->ilen[i-1] = browlengths[i-1];
2540: }
2541: a->nz = 0;
2542: for (i=0; i<mbs; i++) a->nz += browlengths[i];
2544: /* read in nonzero values */
2545: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2546: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2547: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2549: /* set "a" and "j" values into matrix */
2550: nzcount = 0; jcount = 0;
2551: for (i=0; i<mbs; i++) {
2552: nzcountb = nzcount;
2553: nmask = 0;
2554: for (j=0; j<bs; j++) {
2555: kmax = rowlengths[i*bs+j];
2556: for (k=0; k<kmax; k++) {
2557: tmp = jj[nzcount++]/bs;
2558: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2559: }
2560: }
2561: /* sort the masked values */
2562: PetscSortInt(nmask,masked);
2564: /* set "j" values into matrix */
2565: maskcount = 1;
2566: for (j=0; j<nmask; j++) {
2567: a->j[jcount++] = masked[j];
2568: mask[masked[j]] = maskcount++;
2569: }
2570: /* set "a" values into matrix */
2571: ishift = bs2*a->i[i];
2572: for (j=0; j<bs; j++) {
2573: kmax = rowlengths[i*bs+j];
2574: for (k=0; k<kmax; k++) {
2575: tmp = jj[nzcountb]/bs ;
2576: block = mask[tmp] - 1;
2577: point = jj[nzcountb] - bs*tmp;
2578: idx = ishift + bs2*block + j + bs*point;
2579: a->a[idx] = (MatScalar)aa[nzcountb++];
2580: }
2581: }
2582: /* zero out the mask elements we set */
2583: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2584: }
2585: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2587: PetscFree(rowlengths);
2588: PetscFree(browlengths);
2589: PetscFree(aa);
2590: PetscFree(jj);
2591: PetscFree(mask);
2593: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2594: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2595: MatView_Private(B);
2597: *A = B;
2598: return(0);
2599: }
2603: /*@C
2604: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2605: compressed row) format. For good matrix assembly performance the
2606: user should preallocate the matrix storage by setting the parameter nz
2607: (or the array nnz). By setting these parameters accurately, performance
2608: during matrix assembly can be increased by more than a factor of 50.
2610: Collective on MPI_Comm
2612: Input Parameters:
2613: + comm - MPI communicator, set to PETSC_COMM_SELF
2614: . bs - size of block
2615: . m - number of rows
2616: . n - number of columns
2617: . nz - number of nonzero blocks per block row (same for all rows)
2618: - nnz - array containing the number of nonzero blocks in the various block rows
2619: (possibly different for each block row) or PETSC_NULL
2621: Output Parameter:
2622: . A - the matrix
2624: Options Database Keys:
2625: . -mat_no_unroll - uses code that does not unroll the loops in the
2626: block calculations (much slower)
2627: . -mat_block_size - size of the blocks to use
2629: Level: intermediate
2631: Notes:
2632: If the nnz parameter is given then the nz parameter is ignored
2634: A nonzero block is any block that as 1 or more nonzeros in it
2636: The block AIJ format is fully compatible with standard Fortran 77
2637: storage. That is, the stored row and column indices can begin at
2638: either one (as in Fortran) or zero. See the users' manual for details.
2640: Specify the preallocated storage with either nz or nnz (not both).
2641: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2642: allocation. For additional details, see the users manual chapter on
2643: matrices.
2645: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2646: @*/
2647: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2648: {
2650:
2652: MatCreate(comm,m,n,m,n,A);
2653: MatSetType(*A,MATSEQBAIJ);
2654: MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
2655: return(0);
2656: }
2660: /*@C
2661: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2662: per row in the matrix. For good matrix assembly performance the
2663: user should preallocate the matrix storage by setting the parameter nz
2664: (or the array nnz). By setting these parameters accurately, performance
2665: during matrix assembly can be increased by more than a factor of 50.
2667: Collective on MPI_Comm
2669: Input Parameters:
2670: + A - the matrix
2671: . bs - size of block
2672: . nz - number of block nonzeros per block row (same for all rows)
2673: - nnz - array containing the number of block nonzeros in the various block rows
2674: (possibly different for each block row) or PETSC_NULL
2676: Options Database Keys:
2677: . -mat_no_unroll - uses code that does not unroll the loops in the
2678: block calculations (much slower)
2679: . -mat_block_size - size of the blocks to use
2681: Level: intermediate
2683: Notes:
2684: If the nnz parameter is given then the nz parameter is ignored
2686: The block AIJ format is fully compatible with standard Fortran 77
2687: storage. That is, the stored row and column indices can begin at
2688: either one (as in Fortran) or zero. See the users' manual for details.
2690: Specify the preallocated storage with either nz or nnz (not both).
2691: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2692: allocation. For additional details, see the users manual chapter on
2693: matrices.
2695: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2696: @*/
2697: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2698: {
2699: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2702: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2703: if (f) {
2704: (*f)(B,bs,nz,nnz);
2705: }
2706: return(0);
2707: }