Actual source code: cholbs.c

 2:  #include petsc.h

  4: /* We must define MLOG for BlockSolve logging */
  5: #if defined(PETSC_USE_LOG)
  6: #define MLOG
  7: #endif

 9:  #include src/mat/impls/rowbs/mpi/mpirowbs.h

 13: PetscErrorCode MatCholeskyFactorNumeric_MPIRowbs(Mat mat,Mat *factp)
 14: {
 15:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
 17: #if defined(PETSC_USE_LOG)
 18:   PetscReal flop1 = BSlocal_flops();
 19: #endif


 23:   if (!mbs->blocksolveassembly) {
 24:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
 25:   }
 26: 
 27:   /* Do prep work if same nonzero structure as previously factored matrix */
 28:   if (mbs->factor == FACTOR_CHOLESKY) {
 29:     /* Copy the nonzeros */
 30:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 31:   }
 32:   /* Form incomplete Cholesky factor */
 33:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 34:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 35:     CHKERRBS(0); mbs->failures++;
 36:     /* Copy only the nonzeros */
 37:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 38:     /* Increment the diagonal shift */
 39:     mbs->alpha += 0.1;
 40:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 41:     PetscLogInfo(mat,"MatCholeskyFactorNumeric_MPIRowbs:BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",
 42:                                  mbs->failures,mbs->ierr,mbs->alpha);
 43:   }
 44: #if defined(PETSC_USE_LOG)
 45:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 46: #endif

 48:   mbs->factor = FACTOR_CHOLESKY;
 49:   return(0);
 50: }

 54: PetscErrorCode MatLUFactorNumeric_MPIRowbs(Mat mat,Mat *factp)
 55: {
 56:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;

 58: #if defined(PETSC_USE_LOG)
 59:   PetscReal flop1 = BSlocal_flops();
 60: #endif

 63:   /* Do prep work if same nonzero structure as previously factored matrix */
 64:   if (mbs->factor == FACTOR_LU) {
 65:     /* Copy the nonzeros */
 66:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 67:   }
 68:   /* Form incomplete Cholesky factor */
 69:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 70:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 71:     CHKERRBS(0); mbs->failures++;
 72:     /* Copy only the nonzeros */
 73:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 74:     /* Increment the diagonal shift */
 75:     mbs->alpha += 0.1;
 76:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 77:     PetscLogInfo(mat,"MatLUFactorNumeric_MPIRowbs:BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",
 78:                                        mbs->failures,mbs->ierr,mbs->alpha);
 79:   }
 80:   mbs->factor = FACTOR_LU;
 81:   (*factp)->assembled = PETSC_TRUE;
 82: #if defined(PETSC_USE_LOG)
 83:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 84: #endif
 85:   return(0);
 86: }
 87: /* ------------------------------------------------------------------- */
 90: PetscErrorCode MatSolve_MPIRowbs(Mat mat,Vec x,Vec y)
 91: {
 92:   Mat          submat = (Mat) mat->data;
 93:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
 95:   PetscScalar  *ya,*xa,*xworka;

 97: #if defined(PETSC_USE_LOG)
 98:   PetscReal flop1 = BSlocal_flops();
 99: #endif

102:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
103:   if (!mbs->vecs_permscale) {
104:     VecGetArray(x,&xa);
105:     VecGetArray(mbs->xwork,&xworka);
106:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
107:     VecRestoreArray(x,&xa);
108:     VecRestoreArray(mbs->xwork,&xworka);
109:     VecPointwiseMult(mbs->diag,mbs->xwork,y);
110:   } else {
111:     VecCopy(x,y);
112:   }

114:   VecGetArray(y,&ya);
115:   if (mbs->procinfo->single) {
116:     /* Use BlockSolve routine for no cliques/inodes */
117:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
118:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
119:   } else {
120:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
121:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
122:   }
123:   VecRestoreArray(y,&ya);

125:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
126:   if (!mbs->vecs_permscale) {
127:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
128:     VecGetArray(y,&ya);
129:     VecGetArray(mbs->xwork,&xworka);
130:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
131:     VecRestoreArray(y,&ya);
132:     VecRestoreArray(mbs->xwork,&xworka);
133:   }
134: #if defined(PETSC_USE_LOG)
135:   PetscLogFlops((int)(BSlocal_flops()-flop1));
136: #endif
137:   return(0);
138: }

140: /* ------------------------------------------------------------------- */
143: PetscErrorCode MatForwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
144: {
145:   Mat          submat = (Mat) mat->data;
146:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
148:   PetscScalar  *ya,*xa,*xworka;

150: #if defined(PETSC_USE_LOG)
151:   PetscReal flop1 = BSlocal_flops();
152: #endif

155:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
156:   if (!mbs->vecs_permscale) {
157:     VecGetArray(x,&xa);
158:     VecGetArray(mbs->xwork,&xworka);
159:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
160:     VecRestoreArray(x,&xa);
161:     VecRestoreArray(mbs->xwork,&xworka);
162:     VecPointwiseMult(mbs->diag,mbs->xwork,y);
163:   } else {
164:     VecCopy(x,y);
165:   }

167:   VecGetArray(y,&ya);
168:   if (mbs->procinfo->single){
169:     /* Use BlockSolve routine for no cliques/inodes */
170:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
171:   } else {
172:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
173:   }
174:   VecRestoreArray(y,&ya);

176: #if defined(PETSC_USE_LOG)
177:   PetscLogFlops((int)(BSlocal_flops()-flop1));
178: #endif

180:   return(0);
181: }

183: /* ------------------------------------------------------------------- */
186: PetscErrorCode MatBackwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
187: {
188:   Mat          submat = (Mat) mat->data;
189:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
191:   PetscScalar  *ya,*xworka;

193: #if defined (PETSC_USE_LOG)
194:   PetscReal flop1 = BSlocal_flops();
195: #endif

198:   VecCopy(x,y);

200:   VecGetArray(y,&ya);
201:   if (mbs->procinfo->single) {
202:     /* Use BlockSolve routine for no cliques/inodes */
203:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
204:   } else {
205:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
206:   }
207:   VecRestoreArray(y,&ya);

209:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
210:   if (!mbs->vecs_permscale) {
211:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
212:     VecGetArray(y,&ya);
213:     VecGetArray(mbs->xwork,&xworka);
214:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
215:     VecRestoreArray(y,&ya);
216:     VecRestoreArray(mbs->xwork,&xworka);
217:   }
218: #if defined (PETSC_USE_LOG)
219:   PetscLogFlops((int)(BSlocal_flops()-flop1));
220: #endif
221:   return(0);
222: }


225: /* 
226:     The logging variables required by BlockSolve, 

228:     This is an ugly hack that allows PETSc to run properly with BlockSolve regardless
229:   of whether PETSc or BlockSolve is compiled with logging turned on. 

231:     It is bad because it relys on BlockSolve's internals not changing related to 
232:   logging but we have no choice, plus it is unlikely BlockSolve will be developed
233:   in the near future anyways.
234: */
235: double MLOG_flops;
236: double MLOG_event_flops;
237: double MLOG_time_stamp;
238: PetscErrorCode    MLOG_sequence_num;
239: #if defined (MLOG_MAX_EVNTS) 
240: MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
241: MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
242: #else
243: typedef struct __MLOG_log_type {
244:         double        time_stamp;
245:         double        total_time;
246:         double  flops;
247:         int        event_num;
248: } MLOG_log_type;
249: #define        MLOG_MAX_EVNTS 1300
250: #define        MLOG_MAX_ACCUM 75
251: MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
252: MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
253: #endif