Actual source code: matmatmult.c

  1: /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
  2: /*
  3:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  4:           C = A * B
  5: */

 7:  #include src/mat/impls/aij/seq/aij.h
 8:  #include src/mat/utils/freespace.h

 10: static int logkey_matmatmult          = 0;
 11: static int logkey_matmatmult_symbolic = 0;
 12: static int logkey_matmatmult_numeric  = 0;

 16: /*@
 17:    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.

 19:    Collective on Mat

 21:    Input Parameters:
 22: +  A - the left matrix
 23: -  B - the right matrix

 25:    Output Parameters:
 26: .  C - the product matrix

 28:    Notes:
 29:    C will be created and must be destroyed by the user with MatDestroy().

 31:    This routine is currently only implemented for pairs of SeqAIJ matrices.

 33:    Level: intermediate

 35: .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
 36: @*/
 37: int MatMatMult(Mat A,Mat B, Mat *C) {
 38:   /* Perhaps this "interface" routine should be moved into the interface directory.*/
 39:   /* To facilitate implementations with varying types, QueryFunction is used.*/
 40:   /* It is assumed that implementations will be composed as "MatMatMult_<type of A><type of B>". */
 42:   char funct[80];
 43:   int (*mult)(Mat,Mat,Mat*);


 50:   MatPreallocated(A);
 51:   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 52:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 56:   MatPreallocated(B);
 57:   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 58:   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 60:   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);

 62:   PetscStrcpy(funct,"MatMatMult_");
 63:   PetscStrcat(funct,A->type_name);
 64:   PetscStrcat(funct,B->type_name);
 65:   PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&mult);
 66:   if (!mult) SETERRQ2(PETSC_ERR_SUP,
 67:                          "C=A*B not implemented for A of type %s and B of type %s",
 68:                          A->type_name,B->type_name);
 69:   (*mult)(A,B,C);
 70:   return(0);
 71: }

 75: int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) {
 77:   char symfunct[80],numfunct[80],types[80];
 78:   int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat);

 81:   PetscStrcpy(types,A->type_name);
 82:   PetscStrcat(types,B->type_name);
 83:   PetscStrcpy(symfunct,"MatMatMultSymbolic_");
 84:   PetscStrcat(symfunct,types);
 85:   PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);
 86:   if (!symbolic) SETERRQ2(PETSC_ERR_SUP,
 87:                          "C=A*B not implemented for A of type %s and B of type %s",
 88:                          A->type_name,B->type_name);
 89:   PetscStrcpy(numfunct,"MatMatMultNumeric_");
 90:   PetscStrcat(numfunct,types);
 91:   PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);
 92:   if (!numeric) SETERRQ2(PETSC_ERR_SUP,
 93:                          "C=A*B not implemented for A of type %s and B of type %s",
 94:                          A->type_name,B->type_name);
 95:   PetscLogEventBegin(logkey_matmatmult,A,B,0,0);
 96:   (*symbolic)(A,B,C);
 97:   (*numeric)(A,B,*C);
 98:   PetscLogEventEnd(logkey_matmatmult,A,B,0,0);
 99:   return(0);
100: }

104: /*@
105:    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
106:    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().

108:    Collective on Mat

110:    Input Parameters:
111: +  A - the left matrix
112: -  B - the right matrix

114:    Output Parameters:
115: .  C - the matrix containing the ij structure of product matrix

117:    Notes:
118:    C will be created and must be destroyed by the user with MatDestroy().

120:    This routine is currently only implemented for SeqAIJ type matrices.

122:    Level: intermediate

124: .seealso: MatMatMult(),MatMatMultNumeric()
125: @*/
126: int MatMatMultSymbolic(Mat A,Mat B,Mat *C) {
127:   /* Perhaps this "interface" routine should be moved into the interface directory.*/
128:   /* To facilitate implementations with varying types, QueryFunction is used.*/
129:   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
131:   char funct[80];
132:   int (*symbolic)(Mat,Mat,Mat *);


139:   MatPreallocated(A);
140:   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
141:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

145:   MatPreallocated(B);
146:   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
147:   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

149:   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);

151:   PetscStrcpy(funct,"MatMatMultSymbolic_");
152:   PetscStrcat(funct,A->type_name);
153:   PetscStrcat(funct,B->type_name);
154:   PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&symbolic);
155:   if (!symbolic) SETERRQ2(PETSC_ERR_SUP,
156:                          "C=A*B not implemented for A of type %s and B of type %s",
157:                          A->type_name,B->type_name);
158:   (*symbolic)(A,B,C);

160:   return(0);
161: }

165: int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
166: {
167:   int            ierr;
168:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
169:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
170:   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
171:   int            *ci,*cj,*lnk,idx0,idx,bcol;
172:   int            am=A->M,bn=B->N,bm=B->M;
173:   int            i,j,k,anzi,brow,bnzj,cnzi;
174:   MatScalar      *ca;

177:   /* Start timers */
178:   PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);

180:   /* Set up */
181:   /* Allocate ci array, arrays for fill computation and */
182:   /* free space for accumulating nonzero column info */
183:   PetscMalloc(((am+1)+1)*sizeof(int),&ci);
184:   ci[0] = 0;
185: 
186:   PetscMalloc((bn+1)*sizeof(int),&lnk);
187:   for (i=0; i<bn; i++) lnk[i] = -1;

189:   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
190:   GetMoreSpace(4*bi[bm],&free_space);
191:   current_space = free_space;

193:   /* Determine symbolic info for each row of the product: */
194:   for (i=0;i<am;i++) {
195:     anzi = ai[i+1] - ai[i];
196:     cnzi = 0;
197:     lnk[bn] = bn;
198:     for (j=0;j<anzi;j++) {
199:       brow = *aj++;
200:       bnzj = bi[brow+1] - bi[brow];
201:       bjj  = bj + bi[brow];
202:       idx  = bn;
203:       for (k=0;k<bnzj;k++) {
204:         bcol = bjj[k];
205:         if (lnk[bcol] == -1) { /* new col */
206:           if (k>0) idx = bjj[k-1];
207:           do {
208:             idx0 = idx;
209:             idx  = lnk[idx0];
210:           } while (bcol > idx);
211:           lnk[idx0] = bcol;
212:           lnk[bcol] = idx;
213:           cnzi++;
214:         }
215:       }
216:     }

218:     /* If free space is not available, make more free space */
219:     /* Double the amount of total space in the list */
220:     if (current_space->local_remaining<cnzi) {
221:       printf("...%d -th row, double space ...\n",i);
222:       GetMoreSpace(current_space->total_array_size,&current_space);
223:     }

225:     /* Copy data into free space, and zero out denserow and lnk */
226:     idx = bn;
227:     for (j=0; j<cnzi; j++){
228:       idx0 = idx;
229:       idx  = lnk[idx0];
230:       *current_space->array++ = idx;
231:       lnk[idx0] = -1;
232:     }
233:     lnk[idx] = -1;

235:     current_space->local_used      += cnzi;
236:     current_space->local_remaining -= cnzi;

238:     ci[i+1] = ci[i] + cnzi;
239:   }

241:   /* Column indices are in the list of free space */
242:   /* Allocate space for cj, initialize cj, and */
243:   /* destroy list of free space and other temporary array(s) */
244:   PetscMalloc((ci[am]+1)*sizeof(int),&cj);
245:   MakeSpaceContiguous(&free_space,cj);
246:   PetscFree(lnk);
247: 
248:   /* Allocate space for ca */
249:   PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
250:   PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));
251: 
252:   /* put together the new matrix */
253:   MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);

255:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
256:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
257:   c = (Mat_SeqAIJ *)((*C)->data);
258:   c->freedata = PETSC_TRUE;
259:   c->nonew    = 0;

261:   PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);
262:   return(0);
263: }

267: /*@
268:    MatMatMultNumeric - Performs the numeric matrix-matrix product.
269:    Call this routine after first calling MatMatMultSymbolic().

271:    Collective on Mat

273:    Input Parameters:
274: +  A - the left matrix
275: -  B - the right matrix

277:    Output Parameters:
278: .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().

280:    Notes:
281:    C must have been created with MatMatMultSymbolic.

283:    This routine is currently only implemented for SeqAIJ type matrices.

285:    Level: intermediate

287: .seealso: MatMatMult(),MatMatMultSymbolic()
288: @*/
289: int MatMatMultNumeric(Mat A,Mat B,Mat C){
290:   /* Perhaps this "interface" routine should be moved into the interface directory.*/
291:   /* To facilitate implementations with varying types, QueryFunction is used.*/
292:   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
294:   char funct[80];
295:   int (*numeric)(Mat,Mat,Mat);


301:   MatPreallocated(A);
302:   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
303:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

307:   MatPreallocated(B);
308:   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
309:   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

313:   MatPreallocated(C);
314:   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
315:   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

317:   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
318:   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
319:   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);

321:   /* Query A for ApplyPtAP implementation based on types of P */
322:   PetscStrcpy(funct,"MatMatMultNumeric_");
323:   PetscStrcat(funct,A->type_name);
324:   PetscStrcat(funct,B->type_name);
325:   PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&numeric);
326:   if (!numeric) SETERRQ2(PETSC_ERR_SUP,
327:                          "C=A*B not implemented for A of type %s and B of type %s",
328:                          A->type_name,B->type_name);
329:   (*numeric)(A,B,C);

331:   return(0);
332: }

336: int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
337: {
338:   int        ierr,flops=0;
339:   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
340:   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
341:   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
342:   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
343:   int        am=A->M,cn=C->N;
344:   int        i,j,k,anzi,bnzi,cnzi,brow;
345:   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;


349:   /* Start timers */
350:   PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);

352:   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
353:   PetscMalloc((cn+1)*sizeof(MatScalar),&temp);
354:   PetscMemzero(temp,cn*sizeof(MatScalar));
355:   /* Traverse A row-wise. */
356:   /* Build the ith row in C by summing over nonzero columns in A, */
357:   /* the rows of B corresponding to nonzeros of A. */
358:   for (i=0;i<am;i++) {
359:     anzi = ai[i+1] - ai[i];
360:     for (j=0;j<anzi;j++) {
361:       brow = *aj++;
362:       bnzi = bi[brow+1] - bi[brow];
363:       bjj  = bj + bi[brow];
364:       baj  = ba + bi[brow];
365:       for (k=0;k<bnzi;k++) {
366:         temp[bjj[k]] += (*aa)*baj[k];
367:       }
368:       flops += 2*bnzi;
369:       aa++;
370:     }
371:     /* Store row back into C, and re-zero temp */
372:     cnzi = ci[i+1] - ci[i];
373:     for (j=0;j<cnzi;j++) {
374:       ca[j] = temp[cj[j]];
375:       temp[cj[j]] = 0.0;
376:     }
377:     ca += cnzi;
378:     cj += cnzi;
379:   }
380:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
381:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
382: 
383:   /* Free temp */
384:   PetscFree(temp);
385:   PetscLogFlops(flops);
386:   PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);
387:   return(0);
388: }

392: int RegisterMatMatMultRoutines_Private(Mat A) {

396:   if (!logkey_matmatmult) {
397:     PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);
398:   }
399:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij",
400:                                            "MatMatMult_SeqAIJ_SeqAIJ",
401:                                            MatMatMult_SeqAIJ_SeqAIJ);
402:   if (!logkey_matmatmult_symbolic) {
403:     PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);
404:   }
405:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
406:                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
407:                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);
408:   if (!logkey_matmatmult_numeric) {
409:     PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);
410:   }
411:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
412:                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
413:                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);
414:   return(0);
415: }