Actual source code: matptap.c
1: /*
2: Defines projective product routines where A is a SeqAIJ matrix
3: C = P^T * A * P
4: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/mat/utils/freespace.h
9: int MatSeqAIJPtAP(Mat,Mat,Mat*);
10: int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*);
11: int MatSeqAIJPtAPNumeric(Mat,Mat,Mat);
13: static int MATSeqAIJ_PtAP = 0;
14: static int MATSeqAIJ_PtAPSymbolic = 0;
15: static int MATSeqAIJ_PtAPNumeric = 0;
17: /*
18: MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C,
19: of SeqAIJ matrix A and matrix P:
20: C = P^T * A * P;
22: Note: C is assumed to be uncreated.
23: If this is not the case, Destroy C before calling this routine.
24: */
25: #undef __FUNCT__
27: int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) {
29: char funct[80];
33: PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);
35: MatSeqAIJPtAPSymbolic(A,P,C);
37: /* Avoid additional error checking included in */
38: /* MatSeqAIJApplyPtAPNumeric(A,P,*C); */
40: /* Query A for ApplyPtAPNumeric implementation based on types of P */
41: PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");
42: PetscStrcat(funct,P->type_name);
43: PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));
45: PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);
47: return(0);
48: }
50: /*
51: MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C,
52: of SeqAIJ matrix A and matrix P, according to:
53: C = P^T * A * P;
55: Note: C is assumed to be uncreated.
56: If this is not the case, Destroy C before calling this routine.
57: */
58: #undef __FUNCT__
60: int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) {
62: char funct[80];
70: MatPreallocated(A);
71: if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
72: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
76: MatPreallocated(P);
77: if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
78: if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
80: if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
81: if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
83: /* Query A for ApplyPtAP implementation based on types of P */
84: PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");
85: PetscStrcat(funct,P->type_name);
86: PetscTryMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));
88: return(0);
89: }
91: EXTERN_C_BEGIN
92: #undef __FUNCT__
94: int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
95: int ierr;
96: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
97: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
98: int aishift=a->indexshift,pishift=p->indexshift;
99: int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
100: int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
101: int an=A->N,am=A->M,pn=P->N,pm=P->M;
102: int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
103: MatScalar *ca;
107: /* some error checking which could be moved into interface layer */
108: if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
109:
110: /* Start timer */
111: PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);
113: /* Get ij structure of P^T */
114: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
115: ptJ=ptj;
117: /* Allocate ci array, arrays for fill computation and */
118: /* free space for accumulating nonzero column info */
119: PetscMalloc((pn+1)*sizeof(int),&ci);
120: ci[0] = 0;
122: PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);
123: PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));
124: ptasparserow = ptadenserow + an;
125: denserow = ptasparserow + an;
126: sparserow = denserow + pn;
128: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
129: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
130: ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space);
131: current_space = free_space;
133: /* Determine symbolic info for each row of C: */
134: for (i=0;i<pn;i++) {
135: ptnzi = pti[i+1] - pti[i];
136: ptanzi = 0;
137: /* Determine symbolic row of PtA: */
138: for (j=0;j<ptnzi;j++) {
139: arow = *ptJ++;
140: anzj = ai[arow+1] - ai[arow];
141: ajj = aj + ai[arow];
142: for (k=0;k<anzj;k++) {
143: if (!ptadenserow[ajj[k]]) {
144: ptadenserow[ajj[k]] = -1;
145: ptasparserow[ptanzi++] = ajj[k];
146: }
147: }
148: }
149: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
150: ptaj = ptasparserow;
151: cnzi = 0;
152: for (j=0;j<ptanzi;j++) {
153: prow = *ptaj++;
154: pnzj = pi[prow+1] - pi[prow];
155: pjj = pj + pi[prow];
156: for (k=0;k<pnzj;k++) {
157: if (!denserow[pjj[k]]) {
158: denserow[pjj[k]] = -1;
159: sparserow[cnzi++] = pjj[k];
160: }
161: }
162: }
164: /* sort sparserow */
165: PetscSortInt(cnzi,sparserow);
166:
167: /* If free space is not available, make more free space */
168: /* Double the amount of total space in the list */
169: if (current_space->local_remaining<cnzi) {
170: GetMoreSpace(current_space->total_array_size,¤t_space);
171: }
173: /* Copy data into free space, and zero out denserows */
174: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
175: current_space->array += cnzi;
176: current_space->local_used += cnzi;
177: current_space->local_remaining -= cnzi;
178:
179: for (j=0;j<ptanzi;j++) {
180: ptadenserow[ptasparserow[j]] = 0;
181: }
182: for (j=0;j<cnzi;j++) {
183: denserow[sparserow[j]] = 0;
184: }
185: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
186: /* For now, we will recompute what is needed. */
187: ci[i+1] = ci[i] + cnzi;
188: }
189: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
190: /* Allocate space for cj, initialize cj, and */
191: /* destroy list of free space and other temporary array(s) */
192: PetscMalloc((ci[pn]+1)*sizeof(int),&cj);
193: MakeSpaceContiguous(&free_space,cj);
194: PetscFree(ptadenserow);
195:
196: /* Allocate space for ca */
197: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
198: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
199:
200: /* put together the new matrix */
201: MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);
203: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
204: /* Since these are PETSc arrays, change flags to free them as necessary. */
205: c = (Mat_SeqAIJ *)((*C)->data);
206: c->freedata = PETSC_TRUE;
207: c->nonew = 0;
209: /* Clean up. */
210: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
212: PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);
213: return(0);
214: }
215: EXTERN_C_END
217: #include src/mat/impls/maij/maij.h
218: EXTERN_C_BEGIN
219: #undef __FUNCT__
221: int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
222: int ierr;
223: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
224: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
225: Mat P=pp->AIJ;
226: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
227: int aishift=a->indexshift,pishift=p->indexshift;
228: int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
229: int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
230: int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
231: int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
232: MatScalar *ca;
236: /* some error checking which could be moved into interface layer */
237: if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
238:
239: /* Start timer */
240: PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);
242: /* Get ij structure of P^T */
243: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
245: /* Allocate ci array, arrays for fill computation and */
246: /* free space for accumulating nonzero column info */
247: PetscMalloc((pn+1)*sizeof(int),&ci);
248: ci[0] = 0;
250: PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);
251: PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));
252: ptasparserow = ptadenserow + an;
253: denserow = ptasparserow + an;
254: sparserow = denserow + pn;
256: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
257: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
258: ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space);
259: current_space = free_space;
261: /* Determine symbolic info for each row of C: */
262: for (i=0;i<pn/ppdof;i++) {
263: ptnzi = pti[i+1] - pti[i];
264: ptanzi = 0;
265: ptJ = ptj + pti[i];
266: for (dof=0;dof<ppdof;dof++) {
267: /* Determine symbolic row of PtA: */
268: for (j=0;j<ptnzi;j++) {
269: arow = ptJ[j] + dof;
270: anzj = ai[arow+1] - ai[arow];
271: ajj = aj + ai[arow];
272: for (k=0;k<anzj;k++) {
273: if (!ptadenserow[ajj[k]]) {
274: ptadenserow[ajj[k]] = -1;
275: ptasparserow[ptanzi++] = ajj[k];
276: }
277: }
278: }
279: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
280: ptaj = ptasparserow;
281: cnzi = 0;
282: for (j=0;j<ptanzi;j++) {
283: pdof = *ptaj%dof;
284: prow = (*ptaj++)/dof;
285: pnzj = pi[prow+1] - pi[prow];
286: pjj = pj + pi[prow];
287: for (k=0;k<pnzj;k++) {
288: if (!denserow[pjj[k]+pdof]) {
289: denserow[pjj[k]+pdof] = -1;
290: sparserow[cnzi++] = pjj[k]+pdof;
291: }
292: }
293: }
295: /* sort sparserow */
296: PetscSortInt(cnzi,sparserow);
297:
298: /* If free space is not available, make more free space */
299: /* Double the amount of total space in the list */
300: if (current_space->local_remaining<cnzi) {
301: GetMoreSpace(current_space->total_array_size,¤t_space);
302: }
304: /* Copy data into free space, and zero out denserows */
305: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));
306: current_space->array += cnzi;
307: current_space->local_used += cnzi;
308: current_space->local_remaining -= cnzi;
310: for (j=0;j<ptanzi;j++) {
311: ptadenserow[ptasparserow[j]] = 0;
312: }
313: for (j=0;j<cnzi;j++) {
314: denserow[sparserow[j]] = 0;
315: }
316: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
317: /* For now, we will recompute what is needed. */
318: ci[i+1+dof] = ci[i+dof] + cnzi;
319: }
320: }
321: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
322: /* Allocate space for cj, initialize cj, and */
323: /* destroy list of free space and other temporary array(s) */
324: PetscMalloc((ci[pn]+1)*sizeof(int),&cj);
325: MakeSpaceContiguous(&free_space,cj);
326: PetscFree(ptadenserow);
327:
328: /* Allocate space for ca */
329: PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);
330: PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));
331:
332: /* put together the new matrix */
333: MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);
335: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
336: /* Since these are PETSc arrays, change flags to free them as necessary. */
337: c = (Mat_SeqAIJ *)((*C)->data);
338: c->freedata = PETSC_TRUE;
339: c->nonew = 0;
341: /* Clean up. */
342: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
344: PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);
345: return(0);
346: }
347: EXTERN_C_END
349: /*
350: MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C,
351: of SeqAIJ matrix A and matrix P, according to:
352: C = P^T * A * P
353: Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic.
354: */
355: #undef __FUNCT__
357: int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) {
359: char funct[80];
365: MatPreallocated(A);
366: if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
367: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
371: MatPreallocated(P);
372: if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
373: if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
377: MatPreallocated(C);
378: if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
379: if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
381: if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
382: if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
383: if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
384: if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
386: /* Query A for ApplyPtAP implementation based on types of P */
387: PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");
388: PetscStrcat(funct,P->type_name);
389: PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,C));
391: return(0);
392: }
394: EXTERN_C_BEGIN
395: #undef __FUNCT__
397: int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
398: int ierr,flops=0;
399: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
400: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
401: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
402: int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift;
403: int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
404: int *ci=c->i,*cj=c->j,*cjj;
405: int am=A->M,cn=C->N,cm=C->M;
406: int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
407: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
411: /* Currently not for shifted matrices! */
412: if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported.");
414: PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);
416: /* Allocate temporary array for storage of one row of A*P */
417: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);
418: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));
420: apj = (int *)(apa + cn);
421: apjdense = apj + cn;
423: /* Clear old values in C */
424: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
426: for (i=0;i<am;i++) {
427: /* Form sparse row of A*P */
428: anzi = ai[i+1] - ai[i];
429: apnzj = 0;
430: for (j=0;j<anzi;j++) {
431: prow = *aj++;
432: pnzj = pi[prow+1] - pi[prow];
433: pjj = pj + pi[prow];
434: paj = pa + pi[prow];
435: for (k=0;k<pnzj;k++) {
436: if (!apjdense[pjj[k]]) {
437: apjdense[pjj[k]] = -1;
438: apj[apnzj++] = pjj[k];
439: }
440: apa[pjj[k]] += (*aa)*paj[k];
441: }
442: flops += 2*pnzj;
443: aa++;
444: }
446: /* Sort the j index array for quick sparse axpy. */
447: PetscSortInt(apnzj,apj);
449: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
450: pnzi = pi[i+1] - pi[i];
451: for (j=0;j<pnzi;j++) {
452: nextap = 0;
453: crow = *pJ++;
454: cjj = cj + ci[crow];
455: caj = ca + ci[crow];
456: /* Perform sparse axpy operation. Note cjj includes apj. */
457: for (k=0;nextap<apnzj;k++) {
458: if (cjj[k]==apj[nextap]) {
459: caj[k] += (*pA)*apa[apj[nextap++]];
460: }
461: }
462: flops += 2*apnzj;
463: pA++;
464: }
466: /* Zero the current row info for A*P */
467: for (j=0;j<apnzj;j++) {
468: apa[apj[j]] = 0.;
469: apjdense[apj[j]] = 0;
470: }
471: }
473: /* Assemble the final matrix and clean up */
474: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
475: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
476: PetscFree(apa);
477: PetscLogFlops(flops);
478: PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);
480: return(0);
481: }
482: EXTERN_C_END
484: #undef __FUNCT__
486: int RegisterApplyPtAPRoutines_Private(Mat A) {
491: if (!MATSeqAIJ_PtAP) {
492: PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);
493: }
495: if (!MATSeqAIJ_PtAPSymbolic) {
496: PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);
497: }
498: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij",
499: "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
500: MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);
502: if (!MATSeqAIJ_PtAPNumeric) {
503: PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);
504: }
505: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij",
506: "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
507: MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);
508: return(0);
509: }