Actual source code: matpapt.c

  1: /*
  2:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  3:           C = P * A * P^T
  4: */

 6:  #include src/mat/impls/aij/seq/aij.h
 7:  #include src/mat/utils/freespace.h

  9: static PetscEvent logkey_matapplypapt          = 0;
 10: static PetscEvent logkey_matapplypapt_symbolic = 0;
 11: static PetscEvent logkey_matapplypapt_numeric  = 0;

 13: /*
 14:      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
 15:            C = P * A * P^T;

 17:      Note: C is assumed to be uncreated.
 18:            If this is not the case, Destroy C before calling this routine.
 19: */
 22: PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
 23: {
 24:   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
 25:   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
 27:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
 28:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
 29:   PetscInt       *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
 30:   PetscInt       *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
 31:   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
 32:   PetscInt       i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
 33:   MatScalar      *ca;

 36:   /* some error checking which could be moved into interface layer */
 37:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
 38:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);

 40:   /* Set up timers */
 41:   if (!logkey_matapplypapt_symbolic) {
 42:     PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);
 43:   }
 44:   PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);

 46:   /* Create ij structure of P^T */
 47:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

 49:   /* Allocate ci array, arrays for fill computation and */
 50:   /* free space for accumulating nonzero column info */
 51:   PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);
 52:   ci[0] = 0;

 54:   PetscMalloc((2*an+2*pm+1)*sizeof(PetscInt),&padenserow);
 55:   PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(PetscInt));
 56:   pasparserow  = padenserow  + an;
 57:   denserow     = pasparserow + an;
 58:   sparserow    = denserow    + pm;

 60:   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
 61:   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
 62:   GetMoreSpace((ai[am]/pn)*pm,&free_space);
 63:   current_space = free_space;

 65:   /* Determine fill for each row of C: */
 66:   for (i=0;i<pm;i++) {
 67:     pnzi  = pi[i+1] - pi[i];
 68:     panzi = 0;
 69:     /* Get symbolic sparse row of PA: */
 70:     for (j=0;j<pnzi;j++) {
 71:       arow = *pj++;
 72:       anzj = ai[arow+1] - ai[arow];
 73:       ajj  = aj + ai[arow];
 74:       for (k=0;k<anzj;k++) {
 75:         if (!padenserow[ajj[k]]) {
 76:           padenserow[ajj[k]]   = -1;
 77:           pasparserow[panzi++] = ajj[k];
 78:         }
 79:       }
 80:     }
 81:     /* Using symbolic row of PA, determine symbolic row of C: */
 82:     paj    = pasparserow;
 83:     cnzi   = 0;
 84:     for (j=0;j<panzi;j++) {
 85:       ptrow = *paj++;
 86:       ptnzj = pti[ptrow+1] - pti[ptrow];
 87:       ptjj  = ptj + pti[ptrow];
 88:       for (k=0;k<ptnzj;k++) {
 89:         if (!denserow[ptjj[k]]) {
 90:           denserow[ptjj[k]] = -1;
 91:           sparserow[cnzi++] = ptjj[k];
 92:         }
 93:       }
 94:     }

 96:     /* sort sparse representation */
 97:     PetscSortInt(cnzi,sparserow);

 99:     /* If free space is not available, make more free space */
100:     /* Double the amount of total space in the list */
101:     if (current_space->local_remaining<cnzi) {
102:       GetMoreSpace(current_space->total_array_size,&current_space);
103:     }

105:     /* Copy data into free space, and zero out dense row */
106:     PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
107:     current_space->array           += cnzi;
108:     current_space->local_used      += cnzi;
109:     current_space->local_remaining -= cnzi;

111:     for (j=0;j<panzi;j++) {
112:       padenserow[pasparserow[j]] = 0;
113:     }
114:     for (j=0;j<cnzi;j++) {
115:       denserow[sparserow[j]] = 0;
116:     }
117:     ci[i+1] = ci[i] + cnzi;
118:   }
119:   /* column indices are in the list of free space */
120:   /* Allocate space for cj, initialize cj, and */
121:   /* destroy list of free space and other temporary array(s) */
122:   PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);
123:   MakeSpaceContiguous(&free_space,cj);
124:   PetscFree(padenserow);
125: 
126:   /* Allocate space for ca */
127:   PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
128:   PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
129: 
130:   /* put together the new matrix */
131:   MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);

133:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
134:   /* Since these are PETSc arrays, change flags to free them as necessary. */
135:   c = (Mat_SeqAIJ *)((*C)->data);
136:   c->freedata = PETSC_TRUE;
137:   c->nonew    = 0;

139:   /* Clean up. */
140:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

142:   PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);
143:   return(0);
144: }

146: /*
147:      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
148:            C = P * A * P^T;
149:      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
150: */
153: PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
154: {
156:   PetscInt       flops=0;
157:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
158:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
159:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
160:   PetscInt       *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
161:   PetscInt       *ci=c->i,*cj=c->j;
162:   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M;
163:   PetscInt       i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
164:   MatScalar      *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;


168:   /* This error checking should be unnecessary if the symbolic was performed */
169:   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm);
170:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
171:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
172:   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn);

174:   /* Set up timers */
175:   if (!logkey_matapplypapt_numeric) {
176:     PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);
177:   }
178:   PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);

180:   PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(PetscInt)),&paa);
181:   PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));
182:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

184:   paj      = (PetscInt*)(paa + an);
185:   pajdense = paj + an;

187:   for (i=0;i<pm;i++) {
188:     /* Form sparse row of P*A */
189:     pnzi  = pi[i+1] - pi[i];
190:     panzj = 0;
191:     for (j=0;j<pnzi;j++) {
192:       arow = *pj++;
193:       anzj = ai[arow+1] - ai[arow];
194:       ajj  = aj + ai[arow];
195:       aaj  = aa + ai[arow];
196:       for (k=0;k<anzj;k++) {
197:         if (!pajdense[ajj[k]]) {
198:           pajdense[ajj[k]] = -1;
199:           paj[panzj++]     = ajj[k];
200:         }
201:         paa[ajj[k]] += (*pa)*aaj[k];
202:       }
203:       flops += 2*anzj;
204:       pa++;
205:     }

207:     /* Sort the j index array for quick sparse axpy. */
208:     PetscSortInt(panzj,paj);

210:     /* Compute P*A*P^T using sparse inner products. */
211:     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
212:     cnzi = ci[i+1] - ci[i];
213:     for (j=0;j<cnzi;j++) {
214:       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
215:       ptcol = *cj++;
216:       ptnzj = pi[ptcol+1] - pi[ptcol];
217:       ptj   = pjj + pi[ptcol];
218:       ptaj  = pta + pi[ptcol];
219:       sum   = 0.;
220:       k1    = 0;
221:       k2    = 0;
222:       while ((k1<panzj) && (k2<ptnzj)) {
223:         if (paj[k1]==ptj[k2]) {
224:           sum += paa[paj[k1++]]*ptaj[k2++];
225:         } else if (paj[k1] < ptj[k2]) {
226:           k1++;
227:         } else /* if (paj[k1] > ptj[k2]) */ {
228:           k2++;
229:         }
230:       }
231:       *ca++ = sum;
232:     }

234:     /* Zero the current row info for P*A */
235:     for (j=0;j<panzj;j++) {
236:       paa[paj[j]]      = 0.;
237:       pajdense[paj[j]] = 0;
238:     }
239:   }

241:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
242:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
243:   PetscLogFlops(flops);
244:   PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);
245:   return(0);
246: }
247: 
250: PetscErrorCode MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
251: {

255:   if (!logkey_matapplypapt) {
256:     PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);
257:   }
258:   PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);
259:   MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
260:   MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
261:   PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);
262:   return(0);
263: }