Actual source code: matptap.c
1: /*
2: Defines projective product routines where A is a SeqAIJ matrix
3: C = P^T * A * P
4: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/mat/utils/freespace.h
9: EXTERN int MatSeqAIJPtAP(Mat,Mat,Mat*);
10: EXTERN int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*);
11: EXTERN int MatSeqAIJPtAPNumeric(Mat,Mat,Mat);
12: EXTERN int RegisterMatMatMultRoutines_Private(Mat);
14: static int MATSeqAIJ_PtAP = 0;
15: static int MATSeqAIJ_PtAPSymbolic = 0;
16: static int MATSeqAIJ_PtAPNumeric = 0;
18: /*
19: MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C,
20: of SeqAIJ matrix A and matrix P:
21: C = P^T * A * P;
23: Note: C is assumed to be uncreated.
24: If this is not the case, Destroy C before calling this routine.
25: */
28: int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) {
30: char funct[80];
34: PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);
36: MatSeqAIJPtAPSymbolic(A,P,C);
38: /* Avoid additional error checking included in */
39: /* MatSeqAIJApplyPtAPNumeric(A,P,*C); */
41: /* Query A for ApplyPtAPNumeric implementation based on types of P */
42: PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");
43: PetscStrcat(funct,P->type_name);
44: PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));
46: PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);
48: return(0);
49: }
51: /*
52: MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C,
53: of SeqAIJ matrix A and matrix P, according to:
54: C = P^T * A * P;
56: Note: C is assumed to be uncreated.
57: If this is not the case, Destroy C before calling this routine.
58: */
61: int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) {
63: char funct[80];
71: MatPreallocated(A);
72: if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
73: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
77: MatPreallocated(P);
78: if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
79: if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
81: if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
82: if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
84: /* Query A for ApplyPtAP implementation based on types of P */
85: PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");
86: PetscStrcat(funct,P->type_name);
87: PetscTryMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));
89: return(0);
90: }
92: EXTERN_C_BEGIN
95: int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
96: int ierr;
97: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
98: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
99: int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
100: int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
101: int an=A->N,am=A->M,pn=P->N,pm=P->M;
102: int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
103: MatScalar *ca;
107: /* Start timer */
108: PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);
110: /* Get ij structure of P^T */
111: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
112: ptJ=ptj;
114: /* Allocate ci array, arrays for fill computation and */
115: /* free space for accumulating nonzero column info */
116: PetscMalloc((pn+1)*sizeof(int),&ci);
117: ci[0] = 0;
119: PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);
120: PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));
121: ptasparserow = ptadenserow + an;
122: denserow = ptasparserow + an;
123: sparserow = denserow + pn;
125: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
126: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
127: GetMoreSpace((ai[am]/pm)*pn,&free_space);
128: current_space = free_space;
130: /* Determine symbolic info for each row of C: */
131: for (i=0;i<pn;i++) {
132: ptnzi = pti[i+1] - pti[i];
133: ptanzi = 0;
134: /* Determine symbolic row of PtA: */
135: for (j=0;j<ptnzi;j++) {
136: arow = *ptJ++;
137: anzj = ai[arow+1] - ai[arow];
138: ajj = aj + ai[arow];
139: for (k=0;k<anzj;k++) {
140: if (!ptadenserow[ajj[k]]) {
141: ptadenserow[ajj[k]] = -1;
142: ptasparserow[ptanzi++] = ajj[k];
143: }
144: }
145: }
146: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
147: ptaj = ptasparserow;
148: cnzi = 0;
149: for (j=0;j<ptanzi;j++) {
150: prow = *ptaj++;
151: pnzj = pi[prow+1] - pi[prow];
152: pjj = pj + pi[prow];
153: for (k=0;k<pnzj;k++) {
154: if (!denserow[pjj[k]]) {
155: denserow[pjj[k]] = -1;
156: sparserow[cnzi++] = pjj[k];
157: }
158: }
159: }
161: /* sort sparserow */
162: PetscSortInt(cnzi,sparserow);
163:
164: /* If free space is not available, make more free space */
165: /* Double the amount of total space in the list */
166: if (current_space->local_remaining<cnzi) {
167: GetMoreSpace(current_space->total_array_size,¤t_space);
168: }
170: /* Copy data into free space, and zero out denserows */
171: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
172: current_space->array += cnzi;
173: current_space->local_used += cnzi;
174: current_space->local_remaining -= cnzi;
175:
176: for (j=0;j<ptanzi;j++) {
177: ptadenserow[ptasparserow[j]] = 0;
178: }
179: for (j=0;j<cnzi;j++) {
180: denserow[sparserow[j]] = 0;
181: }
182: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
183: /* For now, we will recompute what is needed. */
184: ci[i+1] = ci[i] + cnzi;
185: }
186: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
187: /* Allocate space for cj, initialize cj, and */
188: /* destroy list of free space and other temporary array(s) */
189: PetscMalloc((ci[pn]+1)*sizeof(int),&cj);
190: MakeSpaceContiguous(&free_space,cj);
191: PetscFree(ptadenserow);
192:
193: /* Allocate space for ca */
194: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
195: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
196:
197: /* put together the new matrix */
198: MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);
200: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
201: /* Since these are PETSc arrays, change flags to free them as necessary. */
202: c = (Mat_SeqAIJ *)((*C)->data);
203: c->freedata = PETSC_TRUE;
204: c->nonew = 0;
206: /* Clean up. */
207: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
209: PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);
210: return(0);
211: }
212: EXTERN_C_END
214: #include src/mat/impls/maij/maij.h
215: EXTERN_C_BEGIN
218: int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
219: /* This routine requires testing -- I don't think it works. */
220: int ierr;
221: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
222: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
223: Mat P=pp->AIJ;
224: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
225: int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
226: int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
227: int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
228: int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
229: MatScalar *ca;
232: /* Start timer */
233: PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);
235: /* Get ij structure of P^T */
236: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
238: /* Allocate ci array, arrays for fill computation and */
239: /* free space for accumulating nonzero column info */
240: PetscMalloc((pn+1)*sizeof(int),&ci);
241: ci[0] = 0;
243: PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);
244: PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));
245: ptasparserow = ptadenserow + an;
246: denserow = ptasparserow + an;
247: sparserow = denserow + pn;
249: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
250: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
251: GetMoreSpace((ai[am]/pm)*pn,&free_space);
252: current_space = free_space;
254: /* Determine symbolic info for each row of C: */
255: for (i=0;i<pn/ppdof;i++) {
256: ptnzi = pti[i+1] - pti[i];
257: ptanzi = 0;
258: ptJ = ptj + pti[i];
259: for (dof=0;dof<ppdof;dof++) {
260: /* Determine symbolic row of PtA: */
261: for (j=0;j<ptnzi;j++) {
262: arow = ptJ[j] + dof;
263: anzj = ai[arow+1] - ai[arow];
264: ajj = aj + ai[arow];
265: for (k=0;k<anzj;k++) {
266: if (!ptadenserow[ajj[k]]) {
267: ptadenserow[ajj[k]] = -1;
268: ptasparserow[ptanzi++] = ajj[k];
269: }
270: }
271: }
272: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
273: ptaj = ptasparserow;
274: cnzi = 0;
275: for (j=0;j<ptanzi;j++) {
276: pdof = *ptaj%dof;
277: prow = (*ptaj++)/dof;
278: pnzj = pi[prow+1] - pi[prow];
279: pjj = pj + pi[prow];
280: for (k=0;k<pnzj;k++) {
281: if (!denserow[pjj[k]+pdof]) {
282: denserow[pjj[k]+pdof] = -1;
283: sparserow[cnzi++] = pjj[k]+pdof;
284: }
285: }
286: }
288: /* sort sparserow */
289: PetscSortInt(cnzi,sparserow);
290:
291: /* If free space is not available, make more free space */
292: /* Double the amount of total space in the list */
293: if (current_space->local_remaining<cnzi) {
294: GetMoreSpace(current_space->total_array_size,¤t_space);
295: }
297: /* Copy data into free space, and zero out denserows */
298: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
299: current_space->array += cnzi;
300: current_space->local_used += cnzi;
301: current_space->local_remaining -= cnzi;
303: for (j=0;j<ptanzi;j++) {
304: ptadenserow[ptasparserow[j]] = 0;
305: }
306: for (j=0;j<cnzi;j++) {
307: denserow[sparserow[j]] = 0;
308: }
309: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
310: /* For now, we will recompute what is needed. */
311: ci[i+1+dof] = ci[i+dof] + cnzi;
312: }
313: }
314: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
315: /* Allocate space for cj, initialize cj, and */
316: /* destroy list of free space and other temporary array(s) */
317: PetscMalloc((ci[pn]+1)*sizeof(int),&cj);
318: MakeSpaceContiguous(&free_space,cj);
319: PetscFree(ptadenserow);
320:
321: /* Allocate space for ca */
322: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
323: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
324:
325: /* put together the new matrix */
326: MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);
328: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
329: /* Since these are PETSc arrays, change flags to free them as necessary. */
330: c = (Mat_SeqAIJ *)((*C)->data);
331: c->freedata = PETSC_TRUE;
332: c->nonew = 0;
334: /* Clean up. */
335: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
337: PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);
338: return(0);
339: }
340: EXTERN_C_END
342: /*
343: MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C,
344: of SeqAIJ matrix A and matrix P, according to:
345: C = P^T * A * P
346: Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic.
347: */
350: int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) {
352: char funct[80];
358: MatPreallocated(A);
359: if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
360: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
364: MatPreallocated(P);
365: if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
366: if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
370: MatPreallocated(C);
371: if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
372: if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
374: if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
375: if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
376: if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
377: if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
379: /* Query A for ApplyPtAP implementation based on types of P */
380: PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");
381: PetscStrcat(funct,P->type_name);
382: PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,C));
384: return(0);
385: }
387: EXTERN_C_BEGIN
390: int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
391: int ierr,flops=0;
392: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
393: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
394: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
395: int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
396: int *ci=c->i,*cj=c->j,*cjj;
397: int am=A->M,cn=C->N,cm=C->M;
398: int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
399: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
402: PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);
404: /* Allocate temporary array for storage of one row of A*P */
405: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);
406: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));
408: apj = (int *)(apa + cn);
409: apjdense = apj + cn;
411: /* Clear old values in C */
412: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
414: for (i=0;i<am;i++) {
415: /* Form sparse row of A*P */
416: anzi = ai[i+1] - ai[i];
417: apnzj = 0;
418: for (j=0;j<anzi;j++) {
419: prow = *aj++;
420: pnzj = pi[prow+1] - pi[prow];
421: pjj = pj + pi[prow];
422: paj = pa + pi[prow];
423: for (k=0;k<pnzj;k++) {
424: if (!apjdense[pjj[k]]) {
425: apjdense[pjj[k]] = -1;
426: apj[apnzj++] = pjj[k];
427: }
428: apa[pjj[k]] += (*aa)*paj[k];
429: }
430: flops += 2*pnzj;
431: aa++;
432: }
434: /* Sort the j index array for quick sparse axpy. */
435: PetscSortInt(apnzj,apj);
437: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
438: pnzi = pi[i+1] - pi[i];
439: for (j=0;j<pnzi;j++) {
440: nextap = 0;
441: crow = *pJ++;
442: cjj = cj + ci[crow];
443: caj = ca + ci[crow];
444: /* Perform sparse axpy operation. Note cjj includes apj. */
445: for (k=0;nextap<apnzj;k++) {
446: if (cjj[k]==apj[nextap]) {
447: caj[k] += (*pA)*apa[apj[nextap++]];
448: }
449: }
450: flops += 2*apnzj;
451: pA++;
452: }
454: /* Zero the current row info for A*P */
455: for (j=0;j<apnzj;j++) {
456: apa[apj[j]] = 0.;
457: apjdense[apj[j]] = 0;
458: }
459: }
461: /* Assemble the final matrix and clean up */
462: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
463: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
464: PetscFree(apa);
465: PetscLogFlops(flops);
466: PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);
468: return(0);
469: }
470: EXTERN_C_END
474: int RegisterApplyPtAPRoutines_Private(Mat A) {
479: if (!MATSeqAIJ_PtAP) {
480: PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);
481: }
483: if (!MATSeqAIJ_PtAPSymbolic) {
484: PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);
485: }
486: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij",
487: "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
488: MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);
490: if (!MATSeqAIJ_PtAPNumeric) {
491: PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);
492: }
493: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij",
494: "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
495: MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);
496: RegisterMatMatMultRoutines_Private(A);
497: return(0);
498: }