Actual source code: matmatmult.c

  1: /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/
  2: /*
  3:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  4:           C = A * B
  5:           C = P * A * P^T
  6: */

 8:  #include src/mat/impls/aij/seq/aij.h
 9:  #include src/mat/utils/freespace.h

 11: static int logkey_matmatmult            = 0;
 12: static int logkey_matmatmult_symbolic   = 0;
 13: static int logkey_matmatmult_numeric    = 0;

 15: static int logkey_matapplypapt          = 0;
 16: static int logkey_matapplypapt_symbolic = 0;
 17: static int logkey_matapplypapt_numeric  = 0;

 19: /*
 20:      MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
 21:            C = A * B;

 23:      Note: C is assumed to be uncreated.
 24:            If this is not the case, Destroy C before calling this routine.
 25: */
 26: #ifdef USE_INTSORT
 27: /* 
 28: This roution is modified by the one below for better performance.
 29: The changes are:
 30:    -- PetscSortInt() is replace by a linked list
 31:    -- malloc larger Initial FreeSpace 
 32: */
 33: #undef __FUNCT__  
 35: int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
 36: {
 37:   int            ierr;
 38:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
 39:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
 40:   int            aishift=a->indexshift,bishift=b->indexshift;
 41:   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
 42:   int            *ci,*cj,*denserow,*sparserow;
 43:   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
 44:   int            i,j,k,anzi,brow,bnzj,cnzi;
 45:   MatScalar      *ca;

 48:   /* some error checking which could be moved into interface layer */
 49:   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
 50:   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
 51: 
 52:   /* Set up timers */
 53:   if (!logkey_matmatmult_symbolic) {
 54:     PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);
 55:   }
 56:   PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);

 58:   /* Set up */
 59:   /* Allocate ci array, arrays for fill computation and */
 60:   /* free space for accumulating nonzero column info */
 61:   PetscMalloc(((am+1)+1)*sizeof(int),&ci);
 62:   ci[0] = 0;

 64:   PetscMalloc((2*bn+1)*sizeof(int),&denserow);
 65:   PetscMemzero(denserow,(2*bn+1)*sizeof(int));
 66:   sparserow = denserow + bn;

 68:   /* Initial FreeSpace size is nnz(B)=bi[bm] */
 69:   ierr          = GetMoreSpace(bi[bm],&free_space);
 70:   current_space = free_space;

 72:   /* Determine symbolic info for each row of the product: */
 73:   for (i=0;i<am;i++) {
 74:     anzi = ai[i+1] - ai[i];
 75:     cnzi = 0;
 76:     for (j=0;j<anzi;j++) {
 77:       brow = *aj++;
 78:       bnzj = bi[brow+1] - bi[brow];
 79:       bjj  = bj + bi[brow];
 80:       for (k=0;k<bnzj;k++) {
 81:         /* If column is not marked, mark it in compressed and uncompressed locations. */
 82:         /* For simplicity, leave uncompressed row unsorted until finished with row, */
 83:         /* and increment nonzero count for this row. */
 84:         if (!denserow[bjj[k]]) {
 85:           denserow[bjj[k]]  = -1;
 86:           sparserow[cnzi++] = bjj[k];
 87:         }
 88:       }
 89:     }

 91:     /* sort sparserow */
 92:     PetscSortInt(cnzi,sparserow);

 94:     /* If free space is not available, make more free space */
 95:     /* Double the amount of total space in the list */
 96:     if (current_space->local_remaining<cnzi) {
 97:       GetMoreSpace(current_space->total_array_size,&current_space);
 98:     }

100:     /* Copy data into free space, and zero out denserow */
101:     PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
102:     current_space->array           += cnzi;
103:     current_space->local_used      += cnzi;
104:     current_space->local_remaining -= cnzi;
105:     for (j=0;j<cnzi;j++) {
106:       denserow[sparserow[j]] = 0;
107:     }
108:     ci[i+1] = ci[i] + cnzi;
109:   }

111:   /* Column indices are in the list of free space */
112:   /* Allocate space for cj, initialize cj, and */
113:   /* destroy list of free space and other temporary array(s) */
114:   PetscMalloc((ci[am]+1)*sizeof(int),&cj);
115:   MakeSpaceContiguous(&free_space,cj);
116:   PetscFree(denserow);
117: 
118:   /* Allocate space for ca */
119:   PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
120:   PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));
121: 
122:   /* put together the new matrix */
123:   MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);

125:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
126:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
127:   c = (Mat_SeqAIJ *)((*C)->data);
128:   c->freedata = PETSC_TRUE;
129:   c->nonew    = 0;

131:   PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);
132:   return(0);
133: }
134: #endif /*  USE_INTSORT */

136: #undef __FUNCT__  
138: int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
139: {
140:   int            ierr;
141:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
142:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
143:   int            aishift=a->indexshift,bishift=b->indexshift;
144:   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
145:   int            *ci,*cj,*lnk,idx0,idx,bcol;
146:   int            an=A->N,am=A->M,bn=B->N,bm=B->M;
147:   int            i,j,k,anzi,brow,bnzj,cnzi;
148:   MatScalar      *ca;

151:   /* some error checking which could be moved into interface layer */
152:   if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
153:   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
154: 
155:   /* Set up timers */
156:   if (!logkey_matmatmult_symbolic) {
157:     PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);
158:   }
159:   PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);

161:   /* Set up */
162:   /* Allocate ci array, arrays for fill computation and */
163:   /* free space for accumulating nonzero column info */
164:   PetscMalloc(((am+1)+1)*sizeof(int),&ci);
165:   ci[0] = 0;
166: 
167:   PetscMalloc((bn+1)*sizeof(int),&lnk);
168:   for (i=0; i<bn; i++) lnk[i] = -1;

170:   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
171:   GetMoreSpace(4*bi[bm],&free_space);
172:   current_space = free_space;

174:   /* Determine symbolic info for each row of the product: */
175:   for (i=0;i<am;i++) {
176:     anzi = ai[i+1] - ai[i];
177:     cnzi = 0;
178:     lnk[bn] = bn;
179:     for (j=0;j<anzi;j++) {
180:       brow = *aj++;
181:       bnzj = bi[brow+1] - bi[brow];
182:       bjj  = bj + bi[brow];
183:       idx  = bn;
184:       for (k=0;k<bnzj;k++) {
185:         bcol = bjj[k];
186:         if (lnk[bcol] == -1) { /* new col */
187:           if (k>0) idx = bjj[k-1];
188:           do {
189:             idx0 = idx;
190:             idx  = lnk[idx0];
191:           } while (bcol > idx);
192:           lnk[idx0] = bcol;
193:           lnk[bcol] = idx;
194:           cnzi++;
195:         }
196:       }
197:     }

199:     /* If free space is not available, make more free space */
200:     /* Double the amount of total space in the list */
201:     if (current_space->local_remaining<cnzi) {
202:       printf("...%d -th row, double space ...n",i);
203:       GetMoreSpace(current_space->total_array_size,&current_space);
204:     }

206:     /* Copy data into free space, and zero out denserow and lnk */
207:     idx = bn;
208:     for (j=0; j<cnzi; j++){
209:       idx0 = idx;
210:       idx  = lnk[idx0];
211:       *current_space->array++ = idx;
212:       lnk[idx0] = -1;
213:     }
214:     lnk[idx] = -1;

216:     current_space->local_used      += cnzi;
217:     current_space->local_remaining -= cnzi;

219:     ci[i+1] = ci[i] + cnzi;
220:   }

222:   /* Column indices are in the list of free space */
223:   /* Allocate space for cj, initialize cj, and */
224:   /* destroy list of free space and other temporary array(s) */
225:   PetscMalloc((ci[am]+1)*sizeof(int),&cj);
226:   MakeSpaceContiguous(&free_space,cj);
227:   PetscFree(lnk);
228: 
229:   /* Allocate space for ca */
230:   PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);
231:   PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));
232: 
233:   /* put together the new matrix */
234:   MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);

236:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
237:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
238:   c = (Mat_SeqAIJ *)((*C)->data);
239:   c->freedata = PETSC_TRUE;
240:   c->nonew    = 0;

242:   PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);
243:   return(0);
244: }

246: /*
247:      MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
248:            C=A*B;
249:      Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ.
250: */
251: #undef __FUNCT__  
253: int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
254: {
255:   int        ierr,flops=0;
256:   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
257:   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
258:   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
259:   int        aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift;
260:   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
261:   int        an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M;
262:   int        i,j,k,anzi,bnzi,cnzi,brow;
263:   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;


267:   /* This error checking should be unnecessary if the symbolic was performed */
268:   if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
269:   if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm);
270:   if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm);
271:   if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn);

273:   /* Set up timers */
274:   if (!logkey_matmatmult_numeric) {
275:     PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);
276:   }
277:   PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);

279:   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
280:   PetscMalloc((cn+1)*sizeof(MatScalar),&temp);
281:   PetscMemzero(temp,cn*sizeof(MatScalar));
282:   /* Traverse A row-wise. */
283:   /* Build the ith row in C by summing over nonzero columns in A, */
284:   /* the rows of B corresponding to nonzeros of A. */
285:   for (i=0;i<am;i++) {
286:     anzi = ai[i+1] - ai[i];
287:     for (j=0;j<anzi;j++) {
288:       brow = *aj++;
289:       bnzi = bi[brow+1] - bi[brow];
290:       bjj  = bj + bi[brow];
291:       baj  = ba + bi[brow];
292:       for (k=0;k<bnzi;k++) {
293:         temp[bjj[k]] += (*aa)*baj[k];
294:       }
295:       flops += 2*bnzi;
296:       aa++;
297:     }
298:     /* Store row back into C, and re-zero temp */
299:     cnzi = ci[i+1] - ci[i];
300:     for (j=0;j<cnzi;j++) {
301:       ca[j] = temp[cj[j]];
302:       temp[cj[j]] = 0.0;
303:     }
304:     ca += cnzi;
305:     cj += cnzi;
306:   }
307:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
308:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
309: 
310:   /* Free temp */
311:   PetscFree(temp);
312:   PetscLogFlops(flops);
313:   PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);
314:   return(0);
315: }

317: #undef __FUNCT__
319: int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) {

323:   if (!logkey_matmatmult) {
324:     PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);
325:   }
326:   PetscLogEventBegin(logkey_matmatmult,A,B,0,0);
327:   MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);
328:   MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);
329:   PetscLogEventEnd(logkey_matmatmult,A,B,0,0);
330:   return(0);
331: }


334: /*
335:      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
336:            C = P * A * P^T;

338:      Note: C is assumed to be uncreated.
339:            If this is not the case, Destroy C before calling this routine.
340: */
341: #undef __FUNCT__
343: int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
344:   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
345:   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
346:   int            ierr;
347:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
348:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
349:   int            aishift=a->indexshift,pishift=p->indexshift;
350:   int            *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
351:   int            *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
352:   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
353:   int            i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
354:   MatScalar      *ca;


358:   /* some error checking which could be moved into interface layer */
359:   if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
360:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
361:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);

363:   /* Set up timers */
364:   if (!logkey_matapplypapt_symbolic) {
365:     PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);
366:   }
367:   PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);

369:   /* Create ij structure of P^T */
370:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

372:   /* Allocate ci array, arrays for fill computation and */
373:   /* free space for accumulating nonzero column info */
374:   PetscMalloc(((pm+1)*1)*sizeof(int),&ci);
375:   ci[0] = 0;

377:   PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);
378:   PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));
379:   pasparserow  = padenserow  + an;
380:   denserow     = pasparserow + an;
381:   sparserow    = denserow    + pm;

383:   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
384:   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
385:   ierr          = GetMoreSpace((ai[am]/pn)*pm,&free_space);
386:   current_space = free_space;

388:   /* Determine fill for each row of C: */
389:   for (i=0;i<pm;i++) {
390:     pnzi  = pi[i+1] - pi[i];
391:     panzi = 0;
392:     /* Get symbolic sparse row of PA: */
393:     for (j=0;j<pnzi;j++) {
394:       arow = *pj++;
395:       anzj = ai[arow+1] - ai[arow];
396:       ajj  = aj + ai[arow];
397:       for (k=0;k<anzj;k++) {
398:         if (!padenserow[ajj[k]]) {
399:           padenserow[ajj[k]]   = -1;
400:           pasparserow[panzi++] = ajj[k];
401:         }
402:       }
403:     }
404:     /* Using symbolic row of PA, determine symbolic row of C: */
405:     paj    = pasparserow;
406:     cnzi   = 0;
407:     for (j=0;j<panzi;j++) {
408:       ptrow = *paj++;
409:       ptnzj = pti[ptrow+1] - pti[ptrow];
410:       ptjj  = ptj + pti[ptrow];
411:       for (k=0;k<ptnzj;k++) {
412:         if (!denserow[ptjj[k]]) {
413:           denserow[ptjj[k]] = -1;
414:           sparserow[cnzi++] = ptjj[k];
415:         }
416:       }
417:     }

419:     /* sort sparse representation */
420:     PetscSortInt(cnzi,sparserow);

422:     /* If free space is not available, make more free space */
423:     /* Double the amount of total space in the list */
424:     if (current_space->local_remaining<cnzi) {
425:       GetMoreSpace(current_space->total_array_size,&current_space);
426:     }

428:     /* Copy data into free space, and zero out dense row */
429:     PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
430:     current_space->array           += cnzi;
431:     current_space->local_used      += cnzi;
432:     current_space->local_remaining -= cnzi;

434:     for (j=0;j<panzi;j++) {
435:       padenserow[pasparserow[j]] = 0;
436:     }
437:     for (j=0;j<cnzi;j++) {
438:       denserow[sparserow[j]] = 0;
439:     }
440:     ci[i+1] = ci[i] + cnzi;
441:   }
442:   /* column indices are in the list of free space */
443:   /* Allocate space for cj, initialize cj, and */
444:   /* destroy list of free space and other temporary array(s) */
445:   PetscMalloc((ci[pm]+1)*sizeof(int),&cj);
446:   MakeSpaceContiguous(&free_space,cj);
447:   PetscFree(padenserow);
448: 
449:   /* Allocate space for ca */
450:   PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
451:   PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
452: 
453:   /* put together the new matrix */
454:   MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);

456:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
457:   /* Since these are PETSc arrays, change flags to free them as necessary. */
458:   c = (Mat_SeqAIJ *)((*C)->data);
459:   c->freedata = PETSC_TRUE;
460:   c->nonew    = 0;

462:   /* Clean up. */
463:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

465:   PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);
466:   return(0);
467: }

469: /*
470:      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
471:            C = P * A * P^T;
472:      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
473: */
474: #undef __FUNCT__
476: int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
477:   int        ierr,flops=0;
478:   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
479:   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
480:   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
481:   int        aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
482:   int        *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
483:   int        *ci=c->i,*cj=c->j;
484:   int        an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
485:   int        i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
486:   MatScalar  *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;


490:   /* This error checking should be unnecessary if the symbolic was performed */
491:   if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
492:   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm);
493:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am);
494:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an);
495:   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn);

497:   /* Set up timers */
498:   if (!logkey_matapplypapt_numeric) {
499:     PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);
500:   }
501:   PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);

503:   PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);
504:   PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));
505:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

507:   paj      = (int *)(paa + an);
508:   pajdense = paj + an;

510:   for (i=0;i<pm;i++) {
511:     /* Form sparse row of P*A */
512:     pnzi  = pi[i+1] - pi[i];
513:     panzj = 0;
514:     for (j=0;j<pnzi;j++) {
515:       arow = *pj++;
516:       anzj = ai[arow+1] - ai[arow];
517:       ajj  = aj + ai[arow];
518:       aaj  = aa + ai[arow];
519:       for (k=0;k<anzj;k++) {
520:         if (!pajdense[ajj[k]]) {
521:           pajdense[ajj[k]] = -1;
522:           paj[panzj++]     = ajj[k];
523:         }
524:         paa[ajj[k]] += (*pa)*aaj[k];
525:       }
526:       flops += 2*anzj;
527:       pa++;
528:     }

530:     /* Sort the j index array for quick sparse axpy. */
531:     PetscSortInt(panzj,paj);

533:     /* Compute P*A*P^T using sparse inner products. */
534:     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
535:     cnzi = ci[i+1] - ci[i];
536:     for (j=0;j<cnzi;j++) {
537:       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
538:       ptcol = *cj++;
539:       ptnzj = pi[ptcol+1] - pi[ptcol];
540:       ptj   = pjj + pi[ptcol];
541:       ptaj  = pta + pi[ptcol];
542:       sum   = 0.;
543:       k1    = 0;
544:       k2    = 0;
545:       while ((k1<panzj) && (k2<ptnzj)) {
546:         if (paj[k1]==ptj[k2]) {
547:           sum += paa[paj[k1++]]*ptaj[k2++];
548:         } else if (paj[k1] < ptj[k2]) {
549:           k1++;
550:         } else /* if (paj[k1] > ptj[k2]) */ {
551:           k2++;
552:         }
553:       }
554:       *ca++ = sum;
555:     }

557:     /* Zero the current row info for P*A */
558:     for (j=0;j<panzj;j++) {
559:       paa[paj[j]]      = 0.;
560:       pajdense[paj[j]] = 0;
561:     }
562:   }

564:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
565:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
566:   PetscLogFlops(flops);
567:   PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);
568:   return(0);
569: }
570: 
571: #undef __FUNCT__
573: int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {

577:   if (!logkey_matapplypapt) {
578:     PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);
579:   }
580:   PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);
581:   MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
582:   MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
583:   PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);
584:   return(0);
585: }