Actual source code: pbjacobi.c

  1: /*$Id: pbjacobi.c,v 1.4 2001/08/07 03:03:42 balay Exp $*/

  3: /* 
  4:    Include files needed for the PBJacobi preconditioner:
  5:      pcimpl.h - private include file intended for use by all preconditioners 
  6: */

 8:  #include src/sles/pc/pcimpl.h

 10: /* 
 11:    Private context (data structure) for the PBJacobi preconditioner.  
 12: */
 13: typedef struct {
 14:   PetscScalar *diag;
 15:   int    bs,mbs;
 16: } PC_PBJacobi;

 18: /*
 19:    Currently only implemented for baij matrices and directly access baij
 20:   data structures.
 21: */
 22:  #include src/mat/impls/baij/mpi/mpibaij.h
 23:  #include src/inline/ilu.h

 27: static int PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
 28: {
 29:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
 30:   int         ierr,i,m = jac->mbs;
 31:   PetscScalar *diag = jac->diag,x0,x1,*xx,*yy;
 32: 
 34:   VecGetArray(x,&xx);
 35:   VecGetArray(y,&yy);
 36:   for (i=0; i<m; i++) {
 37:     x0 = xx[2*i]; x1 = xx[2*i+1];
 38:     yy[2*i]   = diag[0]*x0 + diag[2]*x1;
 39:     yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
 40:     diag     += 4;
 41:   }
 42:   VecRestoreArray(x,&xx);
 43:   VecRestoreArray(y,&yy);
 44:   PetscLogFlops(6*m);
 45:   return(0);
 46: }
 49: static int PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
 50: {
 51:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
 52:   int         ierr,i,m = jac->mbs;
 53:   PetscScalar *diag = jac->diag,x0,x1,x2,*xx,*yy;
 54: 
 56:   VecGetArray(x,&xx);
 57:   VecGetArray(y,&yy);
 58:   for (i=0; i<m; i++) {
 59:     x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];
 60:     yy[3*i]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
 61:     yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
 62:     yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
 63:     diag     += 9;
 64:   }
 65:   VecRestoreArray(x,&xx);
 66:   VecRestoreArray(y,&yy);
 67:   PetscLogFlops(15*m);
 68:   return(0);
 69: }
 72: static int PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
 73: {
 74:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
 75:   int         ierr,i,m = jac->mbs;
 76:   PetscScalar *diag = jac->diag,x0,x1,x2,x3,*xx,*yy;
 77: 
 79:   VecGetArray(x,&xx);
 80:   VecGetArray(y,&yy);
 81:   for (i=0; i<m; i++) {
 82:     x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];
 83:     yy[4*i]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
 84:     yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
 85:     yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
 86:     yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
 87:     diag     += 16;
 88:   }
 89:   VecRestoreArray(x,&xx);
 90:   VecRestoreArray(y,&yy);
 91:   PetscLogFlops(28*m);
 92:   return(0);
 93: }
 96: static int PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
 97: {
 98:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
 99:   int         ierr,i,m = jac->mbs;
100:   PetscScalar *diag = jac->diag,x0,x1,x2,x3,x4,*xx,*yy;
101: 
103:   VecGetArray(x,&xx);
104:   VecGetArray(y,&yy);
105:   for (i=0; i<m; i++) {
106:     x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];
107:     yy[5*i]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
108:     yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
109:     yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
110:     yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
111:     yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
112:     diag     += 25;
113:   }
114:   VecRestoreArray(x,&xx);
115:   VecRestoreArray(y,&yy);
116:   PetscLogFlops(45*m);
117:   return(0);
118: }
119: /* -------------------------------------------------------------------------- */
122: static int PCSetUp_PBJacobi(PC pc)
123: {
124:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
125:   int         ierr,i,*diag_offset,bs2;
126:   PetscTruth  seqbaij,mpibaij;
127:   Mat         A = pc->pmat;
128:   PetscScalar *diag,*odiag,*v;
129:   Mat_SeqBAIJ *a;

132:   PetscTypeCompare((PetscObject)pc->pmat,MATSEQBAIJ,&seqbaij);
133:   PetscTypeCompare((PetscObject)pc->pmat,MATMPIBAIJ,&mpibaij);
134:   if (!seqbaij && !mpibaij) {
135:     SETERRQ(1,"Currently only supports BAIJ matrices");
136:   }
137:   if (mpibaij) A = ((Mat_MPIBAIJ*)A->data)->A;
138:   if (A->m != A->n) SETERRQ(1,"Supported only for square matrices and square storage");
139:   MatMarkDiagonal_SeqBAIJ(A);
140:   a           = (Mat_SeqBAIJ*)A->data;
141:   bs2         = a->bs*a->bs;
142:   diag_offset = a->diag;
143:   v           = a->a;
144:   PetscMalloc((bs2*a->mbs+1)*sizeof(PetscScalar),&diag);
145:   PetscLogObjectMemory(pc,bs2*a->mbs*sizeof(PetscScalar));
146:   jac->diag   = diag;
147:   jac->bs     = a->bs;
148:   jac->mbs    = a->mbs;

150:   /* factor and invert each block */
151:   switch (a->bs){
152:     case 2:
153:       for (i=0; i<a->mbs; i++) {
154:         odiag   = v + 4*diag_offset[i];
155:         diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
156:         Kernel_A_gets_inverse_A_2(diag);
157:         diag   += 4;
158:       }
159:       pc->ops->apply = PCApply_PBJacobi_2;
160:       break;
161:     case 3:
162:       for (i=0; i<a->mbs; i++) {
163:         odiag   = v + 9*diag_offset[i];
164:         diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
165:         diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
166:         diag[8] = odiag[8]; diag[9] = odiag[9];
167:         Kernel_A_gets_inverse_A_3(diag);
168:         diag   += 9;
169:       }
170:       pc->ops->apply = PCApply_PBJacobi_3;
171:       break;
172:     case 4:
173:       for (i=0; i<a->mbs; i++) {
174:         odiag = v + 16*diag_offset[i];
175:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
176:         Kernel_A_gets_inverse_A_4(diag);
177:         diag += 16;
178:       }
179:       pc->ops->apply = PCApply_PBJacobi_4;
180:       break;
181:     case 5:
182:       for (i=0; i<a->mbs; i++) {
183:         odiag = v + 25*diag_offset[i];
184:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
185:         Kernel_A_gets_inverse_A_5(diag);
186:         diag += 25;
187:       }
188:       pc->ops->apply = PCApply_PBJacobi_5;
189:       break;
190:     default:
191:       SETERRQ1(1,"not supported for block size %d",a->bs);
192:   }

194:   return(0);
195: }
196: /* -------------------------------------------------------------------------- */
199: static int PCDestroy_PBJacobi(PC pc)
200: {
201:   PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
202:   int         ierr;

205:   if (jac->diag)     {PetscFree(jac->diag);}

207:   /*
208:       Free the private data structure that was hanging off the PC
209:   */
210:   PetscFree(jac);
211:   return(0);
212: }
213: /* -------------------------------------------------------------------------- */
214: EXTERN_C_BEGIN
217: int PCCreate_PBJacobi(PC pc)
218: {
219:   PC_PBJacobi *jac;
220:   int       ierr;


224:   /*
225:      Creates the private data structure for this preconditioner and
226:      attach it to the PC object.
227:   */
228:   PetscNew(PC_PBJacobi,&jac);
229:   pc->data  = (void*)jac;

231:   /*
232:      Logs the memory usage; this is not needed but allows PETSc to 
233:      monitor how much memory is being used for various purposes.
234:   */
235:   PetscLogObjectMemory(pc,sizeof(PC_PBJacobi));

237:   /*
238:      Initialize the pointers to vectors to ZERO; these will be used to store
239:      diagonal entries of the matrix for fast preconditioner application.
240:   */
241:   jac->diag          = 0;

243:   /*
244:       Set the pointers for the functions that are provided above.
245:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
246:       are called, they will automatically call these functions.  Note we
247:       choose not to provide a couple of these functions since they are
248:       not needed.
249:   */
250:   pc->ops->apply               = 0; /*set depending on the block size */
251:   pc->ops->applytranspose      = 0;
252:   pc->ops->setup               = PCSetUp_PBJacobi;
253:   pc->ops->destroy             = PCDestroy_PBJacobi;
254:   pc->ops->setfromoptions      = 0;
255:   pc->ops->view                = 0;
256:   pc->ops->applyrichardson     = 0;
257:   pc->ops->applysymmetricleft  = 0;
258:   pc->ops->applysymmetricright = 0;
259:   return(0);
260: }
261: EXTERN_C_END