Actual source code: pbjacobi.c
1: /*$Id: pbjacobi.c,v 1.4 2001/08/07 03:03:42 balay Exp $*/
3: /*
4: Include files needed for the PBJacobi preconditioner:
5: pcimpl.h - private include file intended for use by all preconditioners
6: */
8: #include src/sles/pc/pcimpl.h
10: /*
11: Private context (data structure) for the PBJacobi preconditioner.
12: */
13: typedef struct {
14: PetscScalar *diag;
15: int bs,mbs;
16: } PC_PBJacobi;
18: /*
19: Currently only implemented for baij matrices and directly access baij
20: data structures.
21: */
22: #include src/mat/impls/baij/mpi/mpibaij.h
23: #include src/inline/ilu.h
25: #undef __FUNCT__
27: static int PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
28: {
29: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
30: int ierr,i,m = jac->mbs;
31: PetscScalar *diag = jac->diag,x0,x1,*xx,*yy;
32:
34: VecGetArray(x,&xx);
35: VecGetArray(y,&yy);
36: for (i=0; i<m; i++) {
37: x0 = xx[2*i]; x1 = xx[2*i+1];
38: yy[2*i] = diag[0]*x0 + diag[2]*x1;
39: yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
40: diag += 4;
41: }
42: VecRestoreArray(x,&xx);
43: VecRestoreArray(y,&yy);
44: PetscLogFlops(6*m);
45: return(0);
46: }
47: #undef __FUNCT__
49: static int PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
50: {
51: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
52: int ierr,i,m = jac->mbs;
53: PetscScalar *diag = jac->diag,x0,x1,x2,*xx,*yy;
54:
56: VecGetArray(x,&xx);
57: VecGetArray(y,&yy);
58: for (i=0; i<m; i++) {
59: x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];
60: yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
61: yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
62: yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
63: diag += 9;
64: }
65: VecRestoreArray(x,&xx);
66: VecRestoreArray(y,&yy);
67: PetscLogFlops(15*m);
68: return(0);
69: }
70: #undef __FUNCT__
72: static int PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
73: {
74: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
75: int ierr,i,m = jac->mbs;
76: PetscScalar *diag = jac->diag,x0,x1,x2,x3,*xx,*yy;
77:
79: VecGetArray(x,&xx);
80: VecGetArray(y,&yy);
81: for (i=0; i<m; i++) {
82: x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];
83: yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
84: yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
85: yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
86: yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
87: diag += 16;
88: }
89: VecRestoreArray(x,&xx);
90: VecRestoreArray(y,&yy);
91: PetscLogFlops(28*m);
92: return(0);
93: }
94: #undef __FUNCT__
96: static int PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
97: {
98: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
99: int ierr,i,m = jac->mbs;
100: PetscScalar *diag = jac->diag,x0,x1,x2,x3,x4,*xx,*yy;
101:
103: VecGetArray(x,&xx);
104: VecGetArray(y,&yy);
105: for (i=0; i<m; i++) {
106: x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];
107: yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
108: yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
109: yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
110: yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
111: yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
112: diag += 25;
113: }
114: VecRestoreArray(x,&xx);
115: VecRestoreArray(y,&yy);
116: PetscLogFlops(45*m);
117: return(0);
118: }
119: /* -------------------------------------------------------------------------- */
120: #undef __FUNCT__
122: static int PCSetUp_PBJacobi(PC pc)
123: {
124: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
125: int ierr,i,*diag_offset,bs2;
126: PetscTruth seqbaij,mpibaij;
127: Mat A = pc->pmat;
128: PetscScalar *diag,*odiag,*v;
129: Mat_SeqBAIJ *a;
132: PetscTypeCompare((PetscObject)pc->pmat,MATSEQBAIJ,&seqbaij);
133: PetscTypeCompare((PetscObject)pc->pmat,MATMPIBAIJ,&mpibaij);
134: if (!seqbaij && !mpibaij) {
135: SETERRQ(1,"Currently only supports BAIJ matrices");
136: }
137: if (mpibaij) A = ((Mat_MPIBAIJ*)A->data)->A;
138: if (A->m != A->n) SETERRQ(1,"Supported only for square matrices and square storage");
139: ierr = MatMarkDiagonal_SeqBAIJ(A);
140: a = (Mat_SeqBAIJ*)A->data;
141: bs2 = a->bs*a->bs;
142: diag_offset = a->diag;
143: v = a->a;
144: ierr = PetscMalloc((bs2*a->mbs+1)*sizeof(PetscScalar),&diag);
145: PetscLogObjectMemory(pc,bs2*a->mbs*sizeof(PetscScalar));
146: jac->diag = diag;
147: jac->bs = a->bs;
148: jac->mbs = a->mbs;
150: /* factor and invert each block */
151: switch (a->bs){
152: case 2:
153: for (i=0; i<a->mbs; i++) {
154: odiag = v + 4*diag_offset[i];
155: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
156: ierr = Kernel_A_gets_inverse_A_2(diag);
157: diag += 4;
158: }
159: pc->ops->apply = PCApply_PBJacobi_2;
160: break;
161: case 3:
162: for (i=0; i<a->mbs; i++) {
163: odiag = v + 9*diag_offset[i];
164: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
165: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
166: diag[8] = odiag[8]; diag[9] = odiag[9];
167: ierr = Kernel_A_gets_inverse_A_3(diag);
168: diag += 9;
169: }
170: pc->ops->apply = PCApply_PBJacobi_3;
171: break;
172: case 4:
173: for (i=0; i<a->mbs; i++) {
174: odiag = v + 16*diag_offset[i];
175: ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
176: ierr = Kernel_A_gets_inverse_A_4(diag);
177: diag += 16;
178: }
179: pc->ops->apply = PCApply_PBJacobi_4;
180: break;
181: case 5:
182: for (i=0; i<a->mbs; i++) {
183: odiag = v + 25*diag_offset[i];
184: ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
185: ierr = Kernel_A_gets_inverse_A_5(diag);
186: diag += 25;
187: }
188: pc->ops->apply = PCApply_PBJacobi_5;
189: break;
190: default:
191: SETERRQ1(1,"not supported for block size %d",a->bs);
192: }
194: return(0);
195: }
196: /* -------------------------------------------------------------------------- */
197: #undef __FUNCT__
199: static int PCDestroy_PBJacobi(PC pc)
200: {
201: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
202: int ierr;
205: if (jac->diag) {PetscFree(jac->diag);}
207: /*
208: Free the private data structure that was hanging off the PC
209: */
210: PetscFree(jac);
211: return(0);
212: }
213: /* -------------------------------------------------------------------------- */
214: EXTERN_C_BEGIN
215: #undef __FUNCT__
217: int PCCreate_PBJacobi(PC pc)
218: {
219: PC_PBJacobi *jac;
220: int ierr;
224: /*
225: Creates the private data structure for this preconditioner and
226: attach it to the PC object.
227: */
228: ierr = PetscNew(PC_PBJacobi,&jac);
229: pc->data = (void*)jac;
231: /*
232: Logs the memory usage; this is not needed but allows PETSc to
233: monitor how much memory is being used for various purposes.
234: */
235: PetscLogObjectMemory(pc,sizeof(PC_PBJacobi));
237: /*
238: Initialize the pointers to vectors to ZERO; these will be used to store
239: diagonal entries of the matrix for fast preconditioner application.
240: */
241: jac->diag = 0;
243: /*
244: Set the pointers for the functions that are provided above.
245: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
246: are called, they will automatically call these functions. Note we
247: choose not to provide a couple of these functions since they are
248: not needed.
249: */
250: pc->ops->apply = 0; /*set depending on the block size */
251: pc->ops->applytranspose = 0;
252: pc->ops->setup = PCSetUp_PBJacobi;
253: pc->ops->destroy = PCDestroy_PBJacobi;
254: pc->ops->setfromoptions = 0;
255: pc->ops->view = 0;
256: pc->ops->applyrichardson = 0;
257: pc->ops->applysymmetricleft = 0;
258: pc->ops->applysymmetricright = 0;
259: return(0);
260: }
261: EXTERN_C_END