File: | mat/impls/aij/seq/matmatmult.c |
Warning: | line 688, column 33 Dereference of null pointer |
[?] Use j/k keys for keyboard navigation
1 | ||||
2 | /* | |||
3 | Defines matrix-matrix product routines for pairs of SeqAIJ matrices | |||
4 | C = A * B | |||
5 | */ | |||
6 | ||||
7 | #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ | |||
8 | #include <../src/mat/utils/freespace.h> | |||
9 | #include <petscbt.h> | |||
10 | #include <petsc/private/isimpl.h> | |||
11 | #include <../src/mat/impls/dense/seq/dense.h> | |||
12 | ||||
13 | ||||
14 | PETSC_INTERNextern __attribute__((visibility ("hidden"))) PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) | |||
15 | { | |||
16 | PetscErrorCode ierr; | |||
17 | ||||
18 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 18; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
19 | if (scall == MAT_INITIAL_MATRIX) { | |||
20 | ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLB)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),20,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
21 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),21,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
22 | ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLE)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),22,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
23 | } | |||
24 | ||||
25 | ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLB)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),25,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
26 | ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),26,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
27 | ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLE)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),27,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
28 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
29 | } | |||
30 | ||||
31 | PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) | |||
32 | { | |||
33 | PetscErrorCode ierr; | |||
34 | ||||
35 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 35; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
36 | if (C->ops->matmultnumeric) { | |||
37 | ierr = (*C->ops->matmultnumeric)(A,B,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),37,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
38 | } else { | |||
39 | ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),39,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
40 | } | |||
41 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
42 | } | |||
43 | ||||
44 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) | |||
45 | { | |||
46 | PetscErrorCode ierr; | |||
47 | #if !defined(PETSC_HAVE_HYPRE) | |||
48 | const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","combined","rowmerge"}; | |||
49 | PetscInt nalg = 8; | |||
50 | #else | |||
51 | const char *algTypes[9] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","combined","rowmerge","hypre"}; | |||
52 | PetscInt nalg = 9; | |||
53 | #endif | |||
54 | PetscInt alg = 0; /* set default algorithm */ | |||
55 | ||||
56 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 56; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
57 | ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatMult","Mat")0; do { PetscOptionItems PetscOptionsObjectBase; PetscOptionItems *PetscOptionsObject = &PetscOptionsObjectBase; PetscMemzero (PetscOptionsObject,sizeof(PetscOptionItems)); for (PetscOptionsObject ->count=(PetscOptionsPublish?-1:1); PetscOptionsObject-> count<2; PetscOptionsObject->count++) { PetscErrorCode _5_ierr = PetscOptionsBegin_Private(PetscOptionsObject,PetscObjectComm ((PetscObject)A),((PetscObject)A)->prefix,"MatMatMult","Mat" );do {if (__builtin_expect(!!(_5_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),57,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),57,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
58 | ierr = PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,NULL)PetscOptionsEList_Private(PetscOptionsObject,"-matmatmult_via" ,"Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0 ],&alg,((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),58,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
59 | ierr = PetscOptionsEnd()_5_ierr = PetscOptionsEnd_Private(PetscOptionsObject);do {if ( __builtin_expect(!!(_5_ierr),0)) return PetscError(((MPI_Comm )0x44000001),59,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);}} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),59,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
60 | switch (alg) { | |||
61 | case 1: | |||
62 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),62,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
63 | break; | |||
64 | case 2: | |||
65 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),65,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
66 | break; | |||
67 | case 3: | |||
68 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),68,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
69 | break; | |||
70 | case 4: | |||
71 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),71,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
72 | break; | |||
73 | case 5: | |||
74 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),74,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
75 | break; | |||
76 | case 6: | |||
77 | ierr = MatMatMult_SeqAIJ_SeqAIJ_Combined(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),77,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
78 | break; | |||
79 | case 7: | |||
80 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),80,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
81 | break; | |||
82 | #if defined(PETSC_HAVE_HYPRE) | |||
83 | case 8: | |||
84 | ierr = MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),84,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
85 | break; | |||
86 | #endif | |||
87 | default: | |||
88 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),88,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
89 | break; | |||
90 | } | |||
91 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
92 | } | |||
93 | ||||
94 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat *C) | |||
95 | { | |||
96 | PetscErrorCode ierr; | |||
97 | Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; | |||
98 | PetscInt *ai=a->i,*bi=b->i,*ci,*cj; | |||
99 | PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; | |||
100 | PetscReal afill; | |||
101 | PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; | |||
102 | PetscTable ta; | |||
103 | PetscBT lnkbt; | |||
104 | PetscFreeSpaceList free_space=NULL((void*)0),current_space=NULL((void*)0); | |||
105 | ||||
106 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 106; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
107 | /* Get ci and cj */ | |||
108 | /*---------------*/ | |||
109 | /* Allocate ci array, arrays for fill computation and */ | |||
110 | /* free space for accumulating nonzero column info */ | |||
111 | ierr = PetscMalloc1(am+2,&ci)PetscMallocA(1,PETSC_FALSE,111,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(am+2)*sizeof(**(&ci)),(&ci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),111,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
112 | ci[0] = 0; | |||
113 | ||||
114 | /* create and initialize a linked list */ | |||
115 | ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),115,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
116 | MatRowMergeMax_SeqAIJ(b,bm,ta){ PetscInt _j,_row,_nz,*_col; if (b) { for (_row=0; _row<bm ; _row++) { _nz = b->i[_row+1] - b->i[_row]; for (_j=0; _j<_nz; _j++) { _col = _j + b->j + b->i[_row]; PetscTableAdd (ta,*_col+1,1,INSERT_VALUES); } } } }; | |||
117 | ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),117,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
118 | ierr = PetscTableDestroy(&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),118,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
119 | ||||
120 | ierr = PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),120,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
121 | ||||
122 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ | |||
123 | ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),123,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
124 | ||||
125 | current_space = free_space; | |||
126 | ||||
127 | /* Determine ci and cj */ | |||
128 | for (i=0; i<am; i++) { | |||
129 | anzi = ai[i+1] - ai[i]; | |||
130 | aj = a->j + ai[i]; | |||
131 | for (j=0; j<anzi; j++) { | |||
132 | brow = aj[j]; | |||
133 | bnzj = bi[brow+1] - bi[brow]; | |||
134 | bj = b->j + bi[brow]; | |||
135 | /* add non-zero cols of B into the sorted linked list lnk */ | |||
136 | ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),136,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
137 | } | |||
138 | cnzi = lnk[0]; | |||
139 | ||||
140 | /* If free space is not available, make more free space */ | |||
141 | /* Double the amount of total space in the list */ | |||
142 | if (current_space->local_remaining<cnzi) { | |||
143 | ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),143,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
144 | ndouble++; | |||
145 | } | |||
146 | ||||
147 | /* Copy data into free space, then initialize lnk */ | |||
148 | ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),148,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
149 | ||||
150 | current_space->array += cnzi; | |||
151 | current_space->local_used += cnzi; | |||
152 | current_space->local_remaining -= cnzi; | |||
153 | ||||
154 | ci[i+1] = ci[i] + cnzi; | |||
155 | } | |||
156 | ||||
157 | /* Column indices are in the list of free space */ | |||
158 | /* Allocate space for cj, initialize cj, and */ | |||
159 | /* destroy list of free space and other temporary array(s) */ | |||
160 | ierr = PetscMalloc1(ci[am]+1,&cj)PetscMallocA(1,PETSC_FALSE,160,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[am]+1)*sizeof(**(&cj)),(&cj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),160,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
161 | ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),161,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
162 | ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),162,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
163 | ||||
164 | /* put together the new symbolic matrix */ | |||
165 | ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL((void*)0),C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),165,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
166 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),166,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
167 | ierr = MatSetType(*C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),167,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
168 | ||||
169 | /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ | |||
170 | /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ | |||
171 | c = (Mat_SeqAIJ*)((*C)->data); | |||
172 | c->free_a = PETSC_FALSE; | |||
173 | c->free_ij = PETSC_TRUE; | |||
174 | c->nonew = 0; | |||
175 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; /* fast, needs non-scalable O(bn) array 'abdense' */ | |||
176 | ||||
177 | /* set MatInfo */ | |||
178 | afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; | |||
179 | if (afill < 1.0) afill = 1.0; | |||
180 | c->maxnz = ci[am]; | |||
181 | c->nz = ci[am]; | |||
182 | (*C)->info.mallocs = ndouble; | |||
183 | (*C)->info.fill_ratio_given = fill; | |||
184 | (*C)->info.fill_ratio_needed = afill; | |||
185 | ||||
186 | #if defined(PETSC_USE_INFO1) | |||
187 | if (ci[am]) { | |||
188 | ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)PetscInfo_Private(__func__,(*C),"Reallocs %D; Fill ratio: given %g needed %g.\n" ,ndouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),188,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
189 | ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,(*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),189,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
190 | } else { | |||
191 | ierr = PetscInfo((*C),"Empty matrix product\n")PetscInfo_Private(__func__,(*C),"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),191,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
192 | } | |||
193 | #endif | |||
194 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
195 | } | |||
196 | ||||
197 | PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C) | |||
198 | { | |||
199 | PetscErrorCode ierr; | |||
200 | PetscLogDouble flops=0.0; | |||
201 | Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; | |||
202 | Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; | |||
203 | Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; | |||
204 | PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; | |||
205 | PetscInt am =A->rmap->n,cm=C->rmap->n; | |||
206 | PetscInt i,j,k,anzi,bnzi,cnzi,brow; | |||
207 | PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp; | |||
208 | PetscScalar *ab_dense; | |||
209 | ||||
210 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 210; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
211 | if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ | |||
212 | ierr = PetscMalloc1(ci[cm]+1,&ca)PetscMallocA(1,PETSC_FALSE,212,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[cm]+1)*sizeof(**(&ca)),(&ca));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),212,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
213 | c->a = ca; | |||
214 | c->free_a = PETSC_TRUE; | |||
215 | } else { | |||
216 | ca = c->a; | |||
217 | } | |||
218 | if (!c->matmult_abdense) { | |||
219 | ierr = PetscCalloc1(B->cmap->N,&ab_dense)PetscMallocA(1,PETSC_TRUE,219,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(B->cmap->N)*sizeof(**(&ab_dense)),(&ab_dense ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),219,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
220 | c->matmult_abdense = ab_dense; | |||
221 | } else { | |||
222 | ab_dense = c->matmult_abdense; | |||
223 | } | |||
224 | ||||
225 | /* clean old values in C */ | |||
226 | ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),226,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
227 | /* Traverse A row-wise. */ | |||
228 | /* Build the ith row in C by summing over nonzero columns in A, */ | |||
229 | /* the rows of B corresponding to nonzeros of A. */ | |||
230 | for (i=0; i<am; i++) { | |||
231 | anzi = ai[i+1] - ai[i]; | |||
232 | for (j=0; j<anzi; j++) { | |||
233 | brow = aj[j]; | |||
234 | bnzi = bi[brow+1] - bi[brow]; | |||
235 | bjj = bj + bi[brow]; | |||
236 | baj = ba + bi[brow]; | |||
237 | /* perform dense axpy */ | |||
238 | valtmp = aa[j]; | |||
239 | for (k=0; k<bnzi; k++) { | |||
240 | ab_dense[bjj[k]] += valtmp*baj[k]; | |||
241 | } | |||
242 | flops += 2*bnzi; | |||
243 | } | |||
244 | aj += anzi; aa += anzi; | |||
245 | ||||
246 | cnzi = ci[i+1] - ci[i]; | |||
247 | for (k=0; k<cnzi; k++) { | |||
248 | ca[k] += ab_dense[cj[k]]; | |||
249 | ab_dense[cj[k]] = 0.0; /* zero ab_dense */ | |||
250 | } | |||
251 | flops += cnzi; | |||
252 | cj += cnzi; ca += cnzi; | |||
253 | } | |||
254 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),254,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
255 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),255,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
256 | ierr = PetscLogFlops(flops);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),256,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
257 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
258 | } | |||
259 | ||||
260 | PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) | |||
261 | { | |||
262 | PetscErrorCode ierr; | |||
263 | PetscLogDouble flops=0.0; | |||
264 | Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; | |||
265 | Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; | |||
266 | Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; | |||
267 | PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; | |||
268 | PetscInt am = A->rmap->N,cm=C->rmap->N; | |||
269 | PetscInt i,j,k,anzi,bnzi,cnzi,brow; | |||
270 | PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; | |||
271 | PetscInt nextb; | |||
272 | ||||
273 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 273; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
274 | if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ | |||
275 | ierr = PetscMalloc1(ci[cm]+1,&ca)PetscMallocA(1,PETSC_FALSE,275,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[cm]+1)*sizeof(**(&ca)),(&ca));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),275,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
276 | c->a = ca; | |||
277 | c->free_a = PETSC_TRUE; | |||
278 | } | |||
279 | ||||
280 | /* clean old values in C */ | |||
281 | ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),281,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
282 | /* Traverse A row-wise. */ | |||
283 | /* Build the ith row in C by summing over nonzero columns in A, */ | |||
284 | /* the rows of B corresponding to nonzeros of A. */ | |||
285 | for (i=0; i<am; i++) { | |||
286 | anzi = ai[i+1] - ai[i]; | |||
287 | cnzi = ci[i+1] - ci[i]; | |||
288 | for (j=0; j<anzi; j++) { | |||
289 | brow = aj[j]; | |||
290 | bnzi = bi[brow+1] - bi[brow]; | |||
291 | bjj = bj + bi[brow]; | |||
292 | baj = ba + bi[brow]; | |||
293 | /* perform sparse axpy */ | |||
294 | valtmp = aa[j]; | |||
295 | nextb = 0; | |||
296 | for (k=0; nextb<bnzi; k++) { | |||
297 | if (cj[k] == bjj[nextb]) { /* ccol == bcol */ | |||
298 | ca[k] += valtmp*baj[nextb++]; | |||
299 | } | |||
300 | } | |||
301 | flops += 2*bnzi; | |||
302 | } | |||
303 | aj += anzi; aa += anzi; | |||
304 | cj += cnzi; ca += cnzi; | |||
305 | } | |||
306 | ||||
307 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),307,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
308 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),308,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
309 | ierr = PetscLogFlops(flops);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),309,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
310 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
311 | } | |||
312 | ||||
313 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat *C) | |||
314 | { | |||
315 | PetscErrorCode ierr; | |||
316 | Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; | |||
317 | PetscInt *ai = a->i,*bi=b->i,*ci,*cj; | |||
318 | PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; | |||
319 | MatScalar *ca; | |||
320 | PetscReal afill; | |||
321 | PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; | |||
322 | PetscTable ta; | |||
323 | PetscFreeSpaceList free_space=NULL((void*)0),current_space=NULL((void*)0); | |||
324 | ||||
325 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 325; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
326 | /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ | |||
327 | /*-----------------------------------------------------------------------------------------*/ | |||
328 | /* Allocate arrays for fill computation and free space for accumulating nonzero column */ | |||
329 | ierr = PetscMalloc1(am+2,&ci)PetscMallocA(1,PETSC_FALSE,329,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(am+2)*sizeof(**(&ci)),(&ci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),329,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
330 | ci[0] = 0; | |||
331 | ||||
332 | /* create and initialize a linked list */ | |||
333 | ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),333,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
334 | MatRowMergeMax_SeqAIJ(b,bm,ta){ PetscInt _j,_row,_nz,*_col; if (b) { for (_row=0; _row<bm ; _row++) { _nz = b->i[_row+1] - b->i[_row]; for (_j=0; _j<_nz; _j++) { _col = _j + b->j + b->i[_row]; PetscTableAdd (ta,*_col+1,1,INSERT_VALUES); } } } }; | |||
335 | ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),335,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
336 | ierr = PetscTableDestroy(&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),336,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
337 | ||||
338 | ierr = PetscLLCondensedCreate_fast(Crmax,&lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),338,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
339 | ||||
340 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ | |||
341 | ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),341,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
342 | current_space = free_space; | |||
343 | ||||
344 | /* Determine ci and cj */ | |||
345 | for (i=0; i<am; i++) { | |||
346 | anzi = ai[i+1] - ai[i]; | |||
347 | aj = a->j + ai[i]; | |||
348 | for (j=0; j<anzi; j++) { | |||
349 | brow = aj[j]; | |||
350 | bnzj = bi[brow+1] - bi[brow]; | |||
351 | bj = b->j + bi[brow]; | |||
352 | /* add non-zero cols of B into the sorted linked list lnk */ | |||
353 | ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),353,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
354 | } | |||
355 | cnzi = lnk[1]; | |||
356 | ||||
357 | /* If free space is not available, make more free space */ | |||
358 | /* Double the amount of total space in the list */ | |||
359 | if (current_space->local_remaining<cnzi) { | |||
360 | ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),360,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
361 | ndouble++; | |||
362 | } | |||
363 | ||||
364 | /* Copy data into free space, then initialize lnk */ | |||
365 | ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),365,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
366 | ||||
367 | current_space->array += cnzi; | |||
368 | current_space->local_used += cnzi; | |||
369 | current_space->local_remaining -= cnzi; | |||
370 | ||||
371 | ci[i+1] = ci[i] + cnzi; | |||
372 | } | |||
373 | ||||
374 | /* Column indices are in the list of free space */ | |||
375 | /* Allocate space for cj, initialize cj, and */ | |||
376 | /* destroy list of free space and other temporary array(s) */ | |||
377 | ierr = PetscMalloc1(ci[am]+1,&cj)PetscMallocA(1,PETSC_FALSE,377,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[am]+1)*sizeof(**(&cj)),(&cj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),377,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
378 | ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),378,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
379 | ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),379,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
380 | ||||
381 | /* Allocate space for ca */ | |||
382 | ierr = PetscMalloc1(ci[am]+1,&ca)PetscMallocA(1,PETSC_FALSE,382,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[am]+1)*sizeof(**(&ca)),(&ca));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),382,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
383 | ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),383,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
384 | ||||
385 | /* put together the new symbolic matrix */ | |||
386 | ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),386,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
387 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),387,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
388 | ierr = MatSetType(*C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),388,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
389 | ||||
390 | /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ | |||
391 | /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ | |||
392 | c = (Mat_SeqAIJ*)((*C)->data); | |||
393 | c->free_a = PETSC_TRUE; | |||
394 | c->free_ij = PETSC_TRUE; | |||
395 | c->nonew = 0; | |||
396 | ||||
397 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ | |||
398 | ||||
399 | /* set MatInfo */ | |||
400 | afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; | |||
401 | if (afill < 1.0) afill = 1.0; | |||
402 | c->maxnz = ci[am]; | |||
403 | c->nz = ci[am]; | |||
404 | (*C)->info.mallocs = ndouble; | |||
405 | (*C)->info.fill_ratio_given = fill; | |||
406 | (*C)->info.fill_ratio_needed = afill; | |||
407 | ||||
408 | #if defined(PETSC_USE_INFO1) | |||
409 | if (ci[am]) { | |||
410 | ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)PetscInfo_Private(__func__,(*C),"Reallocs %D; Fill ratio: given %g needed %g.\n" ,ndouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),410,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
411 | ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,(*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),411,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
412 | } else { | |||
413 | ierr = PetscInfo((*C),"Empty matrix product\n")PetscInfo_Private(__func__,(*C),"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),413,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
414 | } | |||
415 | #endif | |||
416 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
417 | } | |||
418 | ||||
419 | ||||
420 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) | |||
421 | { | |||
422 | PetscErrorCode ierr; | |||
423 | Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; | |||
424 | PetscInt *ai = a->i,*bi=b->i,*ci,*cj; | |||
425 | PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; | |||
426 | MatScalar *ca; | |||
427 | PetscReal afill; | |||
428 | PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; | |||
429 | PetscTable ta; | |||
430 | PetscFreeSpaceList free_space=NULL((void*)0),current_space=NULL((void*)0); | |||
431 | ||||
432 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 432; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
433 | /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ | |||
434 | /*---------------------------------------------------------------------------------------------*/ | |||
435 | /* Allocate arrays for fill computation and free space for accumulating nonzero column */ | |||
436 | ierr = PetscMalloc1(am+2,&ci)PetscMallocA(1,PETSC_FALSE,436,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(am+2)*sizeof(**(&ci)),(&ci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),436,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
437 | ci[0] = 0; | |||
438 | ||||
439 | /* create and initialize a linked list */ | |||
440 | ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),440,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
441 | MatRowMergeMax_SeqAIJ(b,bm,ta){ PetscInt _j,_row,_nz,*_col; if (b) { for (_row=0; _row<bm ; _row++) { _nz = b->i[_row+1] - b->i[_row]; for (_j=0; _j<_nz; _j++) { _col = _j + b->j + b->i[_row]; PetscTableAdd (ta,*_col+1,1,INSERT_VALUES); } } } }; | |||
442 | ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),442,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
443 | ierr = PetscTableDestroy(&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),443,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
444 | ierr = PetscLLCondensedCreate_Scalable(Crmax,&lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),444,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
445 | ||||
446 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ | |||
447 | ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),447,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
448 | current_space = free_space; | |||
449 | ||||
450 | /* Determine ci and cj */ | |||
451 | for (i=0; i<am; i++) { | |||
452 | anzi = ai[i+1] - ai[i]; | |||
453 | aj = a->j + ai[i]; | |||
454 | for (j=0; j<anzi; j++) { | |||
455 | brow = aj[j]; | |||
456 | bnzj = bi[brow+1] - bi[brow]; | |||
457 | bj = b->j + bi[brow]; | |||
458 | /* add non-zero cols of B into the sorted linked list lnk */ | |||
459 | ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),459,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
460 | } | |||
461 | cnzi = lnk[0]; | |||
462 | ||||
463 | /* If free space is not available, make more free space */ | |||
464 | /* Double the amount of total space in the list */ | |||
465 | if (current_space->local_remaining<cnzi) { | |||
466 | ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),466,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
467 | ndouble++; | |||
468 | } | |||
469 | ||||
470 | /* Copy data into free space, then initialize lnk */ | |||
471 | ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),471,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
472 | ||||
473 | current_space->array += cnzi; | |||
474 | current_space->local_used += cnzi; | |||
475 | current_space->local_remaining -= cnzi; | |||
476 | ||||
477 | ci[i+1] = ci[i] + cnzi; | |||
478 | } | |||
479 | ||||
480 | /* Column indices are in the list of free space */ | |||
481 | /* Allocate space for cj, initialize cj, and */ | |||
482 | /* destroy list of free space and other temporary array(s) */ | |||
483 | ierr = PetscMalloc1(ci[am]+1,&cj)PetscMallocA(1,PETSC_FALSE,483,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[am]+1)*sizeof(**(&cj)),(&cj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),483,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
484 | ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),484,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
485 | ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),485,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
486 | ||||
487 | /* Allocate space for ca */ | |||
488 | /*-----------------------*/ | |||
489 | ierr = PetscMalloc1(ci[am]+1,&ca)PetscMallocA(1,PETSC_FALSE,489,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[am]+1)*sizeof(**(&ca)),(&ca));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),489,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
490 | ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),490,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
491 | ||||
492 | /* put together the new symbolic matrix */ | |||
493 | ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),493,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
494 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),494,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
495 | ierr = MatSetType(*C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),495,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
496 | ||||
497 | /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ | |||
498 | /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ | |||
499 | c = (Mat_SeqAIJ*)((*C)->data); | |||
500 | c->free_a = PETSC_TRUE; | |||
501 | c->free_ij = PETSC_TRUE; | |||
502 | c->nonew = 0; | |||
503 | ||||
504 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ | |||
505 | ||||
506 | /* set MatInfo */ | |||
507 | afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; | |||
508 | if (afill < 1.0) afill = 1.0; | |||
509 | c->maxnz = ci[am]; | |||
510 | c->nz = ci[am]; | |||
511 | (*C)->info.mallocs = ndouble; | |||
512 | (*C)->info.fill_ratio_given = fill; | |||
513 | (*C)->info.fill_ratio_needed = afill; | |||
514 | ||||
515 | #if defined(PETSC_USE_INFO1) | |||
516 | if (ci[am]) { | |||
517 | ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)PetscInfo_Private(__func__,(*C),"Reallocs %D; Fill ratio: given %g needed %g.\n" ,ndouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),517,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
518 | ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,(*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),518,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
519 | } else { | |||
520 | ierr = PetscInfo((*C),"Empty matrix product\n")PetscInfo_Private(__func__,(*C),"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),520,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
521 | } | |||
522 | #endif | |||
523 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
524 | } | |||
525 | ||||
526 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat *C) | |||
527 | { | |||
528 | PetscErrorCode ierr; | |||
529 | Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; | |||
530 | const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; | |||
531 | PetscInt *ci,*cj,*bb; | |||
532 | PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; | |||
533 | PetscReal afill; | |||
534 | PetscInt i,j,col,ndouble = 0; | |||
535 | PetscFreeSpaceList free_space=NULL((void*)0),current_space=NULL((void*)0); | |||
536 | PetscHeap h; | |||
537 | ||||
538 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 538; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
539 | /* Get ci and cj - by merging sorted rows using a heap */ | |||
540 | /*---------------------------------------------------------------------------------------------*/ | |||
541 | /* Allocate arrays for fill computation and free space for accumulating nonzero column */ | |||
542 | ierr = PetscMalloc1(am+2,&ci)PetscMallocA(1,PETSC_FALSE,542,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(am+2)*sizeof(**(&ci)),(&ci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),542,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
543 | ci[0] = 0; | |||
544 | ||||
545 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ | |||
546 | ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),546,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
547 | current_space = free_space; | |||
548 | ||||
549 | ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),549,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
550 | ierr = PetscMalloc1(a->rmax,&bb)PetscMallocA(1,PETSC_FALSE,550,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(a->rmax)*sizeof(**(&bb)),(&bb));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),550,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
551 | ||||
552 | /* Determine ci and cj */ | |||
553 | for (i=0; i<am; i++) { | |||
554 | const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ | |||
555 | const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ | |||
556 | ci[i+1] = ci[i]; | |||
557 | /* Populate the min heap */ | |||
558 | for (j=0; j<anzi; j++) { | |||
559 | bb[j] = bi[acol[j]]; /* bb points at the start of the row */ | |||
560 | if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ | |||
561 | ierr = PetscHeapAdd(h,j,bj[bb[j]++]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),561,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
562 | } | |||
563 | } | |||
564 | /* Pick off the min element, adding it to free space */ | |||
565 | ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),565,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
566 | while (j >= 0) { | |||
567 | if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ | |||
568 | ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20)(((PetscIntMultTruncate(2,current_space->total_array_size) )<(16 << 20)) ? (PetscIntMultTruncate(2,current_space ->total_array_size)) : (16 << 20)),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),568,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
569 | ndouble++; | |||
570 | } | |||
571 | *(current_space->array++) = col; | |||
572 | current_space->local_used++; | |||
573 | current_space->local_remaining--; | |||
574 | ci[i+1]++; | |||
575 | ||||
576 | /* stash if anything else remains in this row of B */ | |||
577 | if (bb[j] < bi[acol[j]+1]) {ierr = PetscHeapStash(h,j,bj[bb[j]++]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),577,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
578 | while (1) { /* pop and stash any other rows of B that also had an entry in this column */ | |||
579 | PetscInt j2,col2; | |||
580 | ierr = PetscHeapPeek(h,&j2,&col2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),580,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
581 | if (col2 != col) break; | |||
582 | ierr = PetscHeapPop(h,&j2,&col2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),582,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
583 | if (bb[j2] < bi[acol[j2]+1]) {ierr = PetscHeapStash(h,j2,bj[bb[j2]++]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),583,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
584 | } | |||
585 | /* Put any stashed elements back into the min heap */ | |||
586 | ierr = PetscHeapUnstash(h);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),586,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
587 | ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),587,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
588 | } | |||
589 | } | |||
590 | ierr = PetscFree(bb)((*PetscTrFree)((void*)(bb),590,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((bb) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),590,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
591 | ierr = PetscHeapDestroy(&h);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),591,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
592 | ||||
593 | /* Column indices are in the list of free space */ | |||
594 | /* Allocate space for cj, initialize cj, and */ | |||
595 | /* destroy list of free space and other temporary array(s) */ | |||
596 | ierr = PetscMalloc1(ci[am],&cj)PetscMallocA(1,PETSC_FALSE,596,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[am])*sizeof(**(&cj)),(&cj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),596,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
597 | ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),597,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
598 | ||||
599 | /* put together the new symbolic matrix */ | |||
600 | ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL((void*)0),C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),600,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
601 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),601,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
602 | ierr = MatSetType(*C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),602,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
603 | ||||
604 | /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ | |||
605 | /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ | |||
606 | c = (Mat_SeqAIJ*)((*C)->data); | |||
607 | c->free_a = PETSC_TRUE; | |||
608 | c->free_ij = PETSC_TRUE; | |||
609 | c->nonew = 0; | |||
610 | ||||
611 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; | |||
612 | ||||
613 | /* set MatInfo */ | |||
614 | afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; | |||
615 | if (afill < 1.0) afill = 1.0; | |||
616 | c->maxnz = ci[am]; | |||
617 | c->nz = ci[am]; | |||
618 | (*C)->info.mallocs = ndouble; | |||
619 | (*C)->info.fill_ratio_given = fill; | |||
620 | (*C)->info.fill_ratio_needed = afill; | |||
621 | ||||
622 | #if defined(PETSC_USE_INFO1) | |||
623 | if (ci[am]) { | |||
624 | ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)PetscInfo_Private(__func__,(*C),"Reallocs %D; Fill ratio: given %g needed %g.\n" ,ndouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),624,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
625 | ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,(*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),625,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
626 | } else { | |||
627 | ierr = PetscInfo((*C),"Empty matrix product\n")PetscInfo_Private(__func__,(*C),"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),627,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
628 | } | |||
629 | #endif | |||
630 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
631 | } | |||
632 | ||||
633 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat *C) | |||
634 | { | |||
635 | PetscErrorCode ierr; | |||
636 | Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; | |||
637 | const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; | |||
638 | PetscInt *ci,*cj,*bb; | |||
639 | PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; | |||
640 | PetscReal afill; | |||
641 | PetscInt i,j,col,ndouble = 0; | |||
642 | PetscFreeSpaceList free_space=NULL((void*)0),current_space=NULL((void*)0); | |||
643 | PetscHeap h; | |||
644 | PetscBT bt; | |||
645 | ||||
646 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 646; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
647 | /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ | |||
648 | /*---------------------------------------------------------------------------------------------*/ | |||
649 | /* Allocate arrays for fill computation and free space for accumulating nonzero column */ | |||
650 | ierr = PetscMalloc1(am+2,&ci)PetscMallocA(1,PETSC_FALSE,650,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(am+2)*sizeof(**(&ci)),(&ci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),650,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
651 | ci[0] = 0; | |||
652 | ||||
653 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ | |||
654 | ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),654,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
655 | ||||
656 | current_space = free_space; | |||
| ||||
657 | ||||
658 | ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),658,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
659 | ierr = PetscMalloc1(a->rmax,&bb)PetscMallocA(1,PETSC_FALSE,659,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(a->rmax)*sizeof(**(&bb)),(&bb));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),659,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
660 | ierr = PetscBTCreate(bn,&bt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),660,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
661 | ||||
662 | /* Determine ci and cj */ | |||
663 | for (i=0; i<am; i++) { | |||
664 | const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ | |||
665 | const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ | |||
666 | const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ | |||
667 | ci[i+1] = ci[i]; | |||
668 | /* Populate the min heap */ | |||
669 | for (j=0; j<anzi; j++) { | |||
670 | PetscInt brow = acol[j]; | |||
671 | for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { | |||
672 | PetscInt bcol = bj[bb[j]]; | |||
673 | if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ | |||
674 | ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),674,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
675 | bb[j]++; | |||
676 | break; | |||
677 | } | |||
678 | } | |||
679 | } | |||
680 | /* Pick off the min element, adding it to free space */ | |||
681 | ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),681,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
682 | while (j >= 0) { | |||
683 | if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ | |||
684 | fptr = NULL((void*)0); /* need PetscBTMemzero */ | |||
685 | ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20)(((PetscIntMultTruncate(2,current_space->total_array_size) )<(16 << 20)) ? (PetscIntMultTruncate(2,current_space ->total_array_size)) : (16 << 20)),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),685,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
686 | ndouble++; | |||
687 | } | |||
688 | *(current_space->array++) = col; | |||
| ||||
689 | current_space->local_used++; | |||
690 | current_space->local_remaining--; | |||
691 | ci[i+1]++; | |||
692 | ||||
693 | /* stash if anything else remains in this row of B */ | |||
694 | for (; bb[j] < bi[acol[j]+1]; bb[j]++) { | |||
695 | PetscInt bcol = bj[bb[j]]; | |||
696 | if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ | |||
697 | ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),697,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
698 | bb[j]++; | |||
699 | break; | |||
700 | } | |||
701 | } | |||
702 | ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),702,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
703 | } | |||
704 | if (fptr) { /* Clear the bits for this row */ | |||
705 | for (; fptr<current_space->array; fptr++) {ierr = PetscBTClear(bt,*fptr);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),705,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
706 | } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ | |||
707 | ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),707,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
708 | } | |||
709 | } | |||
710 | ierr = PetscFree(bb)((*PetscTrFree)((void*)(bb),710,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((bb) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),710,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
711 | ierr = PetscHeapDestroy(&h);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),711,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
712 | ierr = PetscBTDestroy(&bt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),712,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
713 | ||||
714 | /* Column indices are in the list of free space */ | |||
715 | /* Allocate space for cj, initialize cj, and */ | |||
716 | /* destroy list of free space and other temporary array(s) */ | |||
717 | ierr = PetscMalloc1(ci[am],&cj)PetscMallocA(1,PETSC_FALSE,717,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[am])*sizeof(**(&cj)),(&cj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),717,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
718 | ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),718,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
719 | ||||
720 | /* put together the new symbolic matrix */ | |||
721 | ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL((void*)0),C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),721,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
722 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),722,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
723 | ierr = MatSetType(*C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),723,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
724 | ||||
725 | /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ | |||
726 | /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ | |||
727 | c = (Mat_SeqAIJ*)((*C)->data); | |||
728 | c->free_a = PETSC_TRUE; | |||
729 | c->free_ij = PETSC_TRUE; | |||
730 | c->nonew = 0; | |||
731 | ||||
732 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; | |||
733 | ||||
734 | /* set MatInfo */ | |||
735 | afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; | |||
736 | if (afill < 1.0) afill = 1.0; | |||
737 | c->maxnz = ci[am]; | |||
738 | c->nz = ci[am]; | |||
739 | (*C)->info.mallocs = ndouble; | |||
740 | (*C)->info.fill_ratio_given = fill; | |||
741 | (*C)->info.fill_ratio_needed = afill; | |||
742 | ||||
743 | #if defined(PETSC_USE_INFO1) | |||
744 | if (ci[am]) { | |||
745 | ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)PetscInfo_Private(__func__,(*C),"Reallocs %D; Fill ratio: given %g needed %g.\n" ,ndouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),745,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
746 | ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,(*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),746,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
747 | } else { | |||
748 | ierr = PetscInfo((*C),"Empty matrix product\n")PetscInfo_Private(__func__,(*C),"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),748,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
749 | } | |||
750 | #endif | |||
751 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
752 | } | |||
753 | ||||
754 | ||||
755 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat *C) | |||
756 | { | |||
757 | PetscErrorCode ierr; | |||
758 | Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; | |||
759 | const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1; | |||
760 | PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9]; | |||
761 | PetscInt c_maxmem,a_maxrownnz=0,a_rownnz; | |||
762 | const PetscInt workcol[8]={0,1,2,3,4,5,6,7}; | |||
763 | const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; | |||
764 | const PetscInt *brow_ptr[8],*brow_end[8]; | |||
765 | PetscInt window[8]; | |||
766 | PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows; | |||
767 | PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft; | |||
768 | PetscReal afill; | |||
769 | PetscInt *workj_L1,*workj_L2,*workj_L3; | |||
770 | PetscInt L1_nnz,L2_nnz; | |||
771 | ||||
772 | /* Step 1: Get upper bound on memory required for allocation. | |||
773 | Because of the way virtual memory works, | |||
774 | only the memory pages that are actually needed will be physically allocated. */ | |||
775 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 775; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
776 | ierr = PetscMalloc1(am+1,&ci)PetscMallocA(1,PETSC_FALSE,776,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(am+1)*sizeof(**(&ci)),(&ci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),776,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
777 | for (i=0; i<am; i++) { | |||
778 | const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ | |||
779 | const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ | |||
780 | a_rownnz = 0; | |||
781 | for (k=0; k<anzi; ++k) { | |||
782 | a_rownnz += bi[acol[k]+1] - bi[acol[k]]; | |||
783 | if (a_rownnz > bn) { | |||
784 | a_rownnz = bn; | |||
785 | break; | |||
786 | } | |||
787 | } | |||
788 | a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz)(((a_maxrownnz)<(a_rownnz)) ? (a_rownnz) : (a_maxrownnz)); | |||
789 | } | |||
790 | /* temporary work areas for merging rows */ | |||
791 | ierr = PetscMalloc1(a_maxrownnz*8,&workj_L1)PetscMallocA(1,PETSC_FALSE,791,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(a_maxrownnz*8)*sizeof(**(&workj_L1)),(&workj_L1 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),791,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
792 | ierr = PetscMalloc1(a_maxrownnz*8,&workj_L2)PetscMallocA(1,PETSC_FALSE,792,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(a_maxrownnz*8)*sizeof(**(&workj_L2)),(&workj_L2 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),792,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
793 | ierr = PetscMalloc1(a_maxrownnz,&workj_L3)PetscMallocA(1,PETSC_FALSE,793,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(a_maxrownnz)*sizeof(**(&workj_L3)),(&workj_L3 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),793,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
794 | ||||
795 | /* This should be enough for almost all matrices. If not, memory is reallocated later. */ | |||
796 | c_maxmem = 8*(ai[am]+bi[bm]); | |||
797 | /* Step 2: Populate pattern for C */ | |||
798 | ierr = PetscMalloc1(c_maxmem,&cj)PetscMallocA(1,PETSC_FALSE,798,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(c_maxmem)*sizeof(**(&cj)),(&cj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),798,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
799 | ||||
800 | ci_nnz = 0; | |||
801 | ci[0] = 0; | |||
802 | worki_L1[0] = 0; | |||
803 | worki_L2[0] = 0; | |||
804 | for (i=0; i<am; i++) { | |||
805 | const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ | |||
806 | const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ | |||
807 | rowsleft = anzi; | |||
808 | inputcol_L1 = acol; | |||
809 | L2_nnz = 0; | |||
810 | L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ | |||
811 | worki_L2[1] = 0; | |||
812 | outputi_nnz = 0; | |||
813 | ||||
814 | /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ | |||
815 | while (ci_nnz+a_maxrownnz > c_maxmem) { | |||
816 | c_maxmem *= 2; | |||
817 | ndouble++; | |||
818 | ierr = PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj)((*PetscTrRealloc)((sizeof(PetscInt)*c_maxmem),818,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(void**)(&cj)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),818,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
819 | } | |||
820 | ||||
821 | while (rowsleft) { | |||
822 | L1_rowsleft = PetscMin(64, rowsleft)(((64)<(rowsleft)) ? (64) : (rowsleft)); /* In the inner loop max 64 rows of B can be merged */ | |||
823 | L1_nrows = 0; | |||
824 | L1_nnz = 0; | |||
825 | inputcol = inputcol_L1; | |||
826 | inputi = bi; | |||
827 | inputj = bj; | |||
828 | ||||
829 | /* The following macro is used to specialize for small rows in A. | |||
830 | This helps with compiler unrolling, improving performance substantially. | |||
831 | Input: inputj inputi inputcol bn | |||
832 | Output: outputj outputi_nnz */ | |||
833 | #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ | |||
834 | window_min = bn; \ | |||
835 | outputi_nnz = 0; \ | |||
836 | for (k=0; k<ANNZ; ++k) { \ | |||
837 | brow_ptr[k] = inputj + inputi[inputcol[k]]; \ | |||
838 | brow_end[k] = inputj + inputi[inputcol[k]+1]; \ | |||
839 | window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ | |||
840 | window_min = PetscMin(window[k], window_min)(((window[k])<(window_min)) ? (window[k]) : (window_min)); \ | |||
841 | } \ | |||
842 | while (window_min < bn) { \ | |||
843 | outputj[outputi_nnz++] = window_min; \ | |||
844 | /* advance front and compute new minimum */ \ | |||
845 | old_window_min = window_min; \ | |||
846 | window_min = bn; \ | |||
847 | for (k=0; k<ANNZ; ++k) { \ | |||
848 | if (window[k] == old_window_min) { \ | |||
849 | brow_ptr[k]++; \ | |||
850 | window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ | |||
851 | } \ | |||
852 | window_min = PetscMin(window[k], window_min)(((window[k])<(window_min)) ? (window[k]) : (window_min)); \ | |||
853 | } \ | |||
854 | } | |||
855 | ||||
856 | /************** L E V E L 1 ***************/ | |||
857 | /* Merge up to 8 rows of B to L1 work array*/ | |||
858 | while (L1_rowsleft) { | |||
859 | outputi_nnz = 0; | |||
860 | if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ | |||
861 | else outputj = cj + ci_nnz; /* Merge directly to C */ | |||
862 | ||||
863 | switch (L1_rowsleft) { | |||
864 | case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; | |||
865 | brow_end[0] = inputj + inputi[inputcol[0]+1]; | |||
866 | for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ | |||
867 | inputcol += L1_rowsleft; | |||
868 | rowsleft -= L1_rowsleft; | |||
869 | L1_rowsleft = 0; | |||
870 | break; | |||
871 | case 2: MatMatMultSymbolic_RowMergeMacro(2); | |||
872 | inputcol += L1_rowsleft; | |||
873 | rowsleft -= L1_rowsleft; | |||
874 | L1_rowsleft = 0; | |||
875 | break; | |||
876 | case 3: MatMatMultSymbolic_RowMergeMacro(3); | |||
877 | inputcol += L1_rowsleft; | |||
878 | rowsleft -= L1_rowsleft; | |||
879 | L1_rowsleft = 0; | |||
880 | break; | |||
881 | case 4: MatMatMultSymbolic_RowMergeMacro(4); | |||
882 | inputcol += L1_rowsleft; | |||
883 | rowsleft -= L1_rowsleft; | |||
884 | L1_rowsleft = 0; | |||
885 | break; | |||
886 | case 5: MatMatMultSymbolic_RowMergeMacro(5); | |||
887 | inputcol += L1_rowsleft; | |||
888 | rowsleft -= L1_rowsleft; | |||
889 | L1_rowsleft = 0; | |||
890 | break; | |||
891 | case 6: MatMatMultSymbolic_RowMergeMacro(6); | |||
892 | inputcol += L1_rowsleft; | |||
893 | rowsleft -= L1_rowsleft; | |||
894 | L1_rowsleft = 0; | |||
895 | break; | |||
896 | case 7: MatMatMultSymbolic_RowMergeMacro(7); | |||
897 | inputcol += L1_rowsleft; | |||
898 | rowsleft -= L1_rowsleft; | |||
899 | L1_rowsleft = 0; | |||
900 | break; | |||
901 | default: MatMatMultSymbolic_RowMergeMacro(8); | |||
902 | inputcol += 8; | |||
903 | rowsleft -= 8; | |||
904 | L1_rowsleft -= 8; | |||
905 | break; | |||
906 | } | |||
907 | inputcol_L1 = inputcol; | |||
908 | L1_nnz += outputi_nnz; | |||
909 | worki_L1[++L1_nrows] = L1_nnz; | |||
910 | } | |||
911 | ||||
912 | /********************** L E V E L 2 ************************/ | |||
913 | /* Merge from L1 work array to either C or to L2 work array */ | |||
914 | if (anzi > 8) { | |||
915 | inputi = worki_L1; | |||
916 | inputj = workj_L1; | |||
917 | inputcol = workcol; | |||
918 | outputi_nnz = 0; | |||
919 | ||||
920 | if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ | |||
921 | else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ | |||
922 | ||||
923 | switch (L1_nrows) { | |||
924 | case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; | |||
925 | brow_end[0] = inputj + inputi[inputcol[0]+1]; | |||
926 | for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ | |||
927 | break; | |||
928 | case 2: MatMatMultSymbolic_RowMergeMacro(2); break; | |||
929 | case 3: MatMatMultSymbolic_RowMergeMacro(3); break; | |||
930 | case 4: MatMatMultSymbolic_RowMergeMacro(4); break; | |||
931 | case 5: MatMatMultSymbolic_RowMergeMacro(5); break; | |||
932 | case 6: MatMatMultSymbolic_RowMergeMacro(6); break; | |||
933 | case 7: MatMatMultSymbolic_RowMergeMacro(7); break; | |||
934 | case 8: MatMatMultSymbolic_RowMergeMacro(8); break; | |||
935 | default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!")return PetscError(((MPI_Comm)0x44000001),935,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,56,PETSC_ERROR_INITIAL,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!" ); | |||
936 | } | |||
937 | L2_nnz += outputi_nnz; | |||
938 | worki_L2[++L2_nrows] = L2_nnz; | |||
939 | ||||
940 | /************************ L E V E L 3 **********************/ | |||
941 | /* Merge from L2 work array to either C or to L2 work array */ | |||
942 | if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { | |||
943 | inputi = worki_L2; | |||
944 | inputj = workj_L2; | |||
945 | inputcol = workcol; | |||
946 | outputi_nnz = 0; | |||
947 | if (rowsleft) outputj = workj_L3; | |||
948 | else outputj = cj + ci_nnz; | |||
949 | switch (L2_nrows) { | |||
950 | case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; | |||
951 | brow_end[0] = inputj + inputi[inputcol[0]+1]; | |||
952 | for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ | |||
953 | break; | |||
954 | case 2: MatMatMultSymbolic_RowMergeMacro(2); break; | |||
955 | case 3: MatMatMultSymbolic_RowMergeMacro(3); break; | |||
956 | case 4: MatMatMultSymbolic_RowMergeMacro(4); break; | |||
957 | case 5: MatMatMultSymbolic_RowMergeMacro(5); break; | |||
958 | case 6: MatMatMultSymbolic_RowMergeMacro(6); break; | |||
959 | case 7: MatMatMultSymbolic_RowMergeMacro(7); break; | |||
960 | case 8: MatMatMultSymbolic_RowMergeMacro(8); break; | |||
961 | default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!")return PetscError(((MPI_Comm)0x44000001),961,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,56,PETSC_ERROR_INITIAL,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!" ); | |||
962 | } | |||
963 | L2_nrows = 1; | |||
964 | L2_nnz = outputi_nnz; | |||
965 | worki_L2[1] = outputi_nnz; | |||
966 | /* Copy to workj_L2 */ | |||
967 | if (rowsleft) { | |||
968 | for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k]; | |||
969 | } | |||
970 | } | |||
971 | } | |||
972 | } /* while (rowsleft) */ | |||
973 | #undef MatMatMultSymbolic_RowMergeMacro | |||
974 | ||||
975 | /* terminate current row */ | |||
976 | ci_nnz += outputi_nnz; | |||
977 | ci[i+1] = ci_nnz; | |||
978 | } | |||
979 | ||||
980 | /* Step 3: Create the new symbolic matrix */ | |||
981 | ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL((void*)0),C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),981,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
982 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),982,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
983 | ierr = MatSetType(*C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),983,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
984 | ||||
985 | /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ | |||
986 | /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ | |||
987 | c = (Mat_SeqAIJ*)((*C)->data); | |||
988 | c->free_a = PETSC_TRUE; | |||
989 | c->free_ij = PETSC_TRUE; | |||
990 | c->nonew = 0; | |||
991 | ||||
992 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; | |||
993 | ||||
994 | /* set MatInfo */ | |||
995 | afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; | |||
996 | if (afill < 1.0) afill = 1.0; | |||
997 | c->maxnz = ci[am]; | |||
998 | c->nz = ci[am]; | |||
999 | (*C)->info.mallocs = ndouble; | |||
1000 | (*C)->info.fill_ratio_given = fill; | |||
1001 | (*C)->info.fill_ratio_needed = afill; | |||
1002 | ||||
1003 | #if defined(PETSC_USE_INFO1) | |||
1004 | if (ci[am]) { | |||
1005 | ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)PetscInfo_Private(__func__,(*C),"Reallocs %D; Fill ratio: given %g needed %g.\n" ,ndouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1005,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1006 | ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,(*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1006,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1007 | } else { | |||
1008 | ierr = PetscInfo((*C),"Empty matrix product\n")PetscInfo_Private(__func__,(*C),"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1008,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1009 | } | |||
1010 | #endif | |||
1011 | ||||
1012 | /* Step 4: Free temporary work areas */ | |||
1013 | ierr = PetscFree(workj_L1)((*PetscTrFree)((void*)(workj_L1),1013,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((workj_L1) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1013,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1014 | ierr = PetscFree(workj_L2)((*PetscTrFree)((void*)(workj_L2),1014,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((workj_L2) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1014,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1015 | ierr = PetscFree(workj_L3)((*PetscTrFree)((void*)(workj_L3),1015,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((workj_L3) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1015,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1016 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1017 | } | |||
1018 | ||||
1019 | /* concatenate unique entries and then sort */ | |||
1020 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat *C) | |||
1021 | { | |||
1022 | PetscErrorCode ierr; | |||
1023 | Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; | |||
1024 | const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; | |||
1025 | PetscInt *ci,*cj; | |||
1026 | PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; | |||
1027 | PetscReal afill; | |||
1028 | PetscInt i,j,ndouble = 0; | |||
1029 | PetscSegBuffer seg,segrow; | |||
1030 | char *seen; | |||
1031 | ||||
1032 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1032; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1033 | ierr = PetscMalloc1(am+1,&ci)PetscMallocA(1,PETSC_FALSE,1033,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(am+1)*sizeof(**(&ci)),(&ci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1033,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1034 | ci[0] = 0; | |||
1035 | ||||
1036 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ | |||
1037 | ierr = PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1037,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1038 | ierr = PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1038,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1039 | ierr = PetscMalloc1(bn,&seen)PetscMallocA(1,PETSC_FALSE,1039,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(bn)*sizeof(**(&seen)),(&seen));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1039,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1040 | ierr = PetscMemzero(seen,bn*sizeof(char));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1040,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1041 | ||||
1042 | /* Determine ci and cj */ | |||
1043 | for (i=0; i<am; i++) { | |||
1044 | const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ | |||
1045 | const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ | |||
1046 | PetscInt packlen = 0,*PETSC_RESTRICT__restrict crow; | |||
1047 | /* Pack segrow */ | |||
1048 | for (j=0; j<anzi; j++) { | |||
1049 | PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k; | |||
1050 | for (k=bjstart; k<bjend; k++) { | |||
1051 | PetscInt bcol = bj[k]; | |||
1052 | if (!seen[bcol]) { /* new entry */ | |||
1053 | PetscInt *PETSC_RESTRICT__restrict slot; | |||
1054 | ierr = PetscSegBufferGetInts(segrow,1,&slot);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1054,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1055 | *slot = bcol; | |||
1056 | seen[bcol] = 1; | |||
1057 | packlen++; | |||
1058 | } | |||
1059 | } | |||
1060 | } | |||
1061 | ierr = PetscSegBufferGetInts(seg,packlen,&crow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1061,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1062 | ierr = PetscSegBufferExtractTo(segrow,crow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1062,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1063 | ierr = PetscSortInt(packlen,crow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1063,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1064 | ci[i+1] = ci[i] + packlen; | |||
1065 | for (j=0; j<packlen; j++) seen[crow[j]] = 0; | |||
1066 | } | |||
1067 | ierr = PetscSegBufferDestroy(&segrow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1067,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1068 | ierr = PetscFree(seen)((*PetscTrFree)((void*)(seen),1068,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((seen) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1068,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1069 | ||||
1070 | /* Column indices are in the segmented buffer */ | |||
1071 | ierr = PetscSegBufferExtractAlloc(seg,&cj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1071,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1072 | ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1072,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1073 | ||||
1074 | /* put together the new symbolic matrix */ | |||
1075 | ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL((void*)0),C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1075,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1076 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1076,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1077 | ierr = MatSetType(*C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1077,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1078 | ||||
1079 | /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ | |||
1080 | /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ | |||
1081 | c = (Mat_SeqAIJ*)((*C)->data); | |||
1082 | c->free_a = PETSC_TRUE; | |||
1083 | c->free_ij = PETSC_TRUE; | |||
1084 | c->nonew = 0; | |||
1085 | ||||
1086 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; | |||
1087 | ||||
1088 | /* set MatInfo */ | |||
1089 | afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; | |||
1090 | if (afill < 1.0) afill = 1.0; | |||
1091 | c->maxnz = ci[am]; | |||
1092 | c->nz = ci[am]; | |||
1093 | (*C)->info.mallocs = ndouble; | |||
1094 | (*C)->info.fill_ratio_given = fill; | |||
1095 | (*C)->info.fill_ratio_needed = afill; | |||
1096 | ||||
1097 | #if defined(PETSC_USE_INFO1) | |||
1098 | if (ci[am]) { | |||
1099 | ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)PetscInfo_Private(__func__,(*C),"Reallocs %D; Fill ratio: given %g needed %g.\n" ,ndouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1099,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1100 | ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,(*C),"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1100,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1101 | } else { | |||
1102 | ierr = PetscInfo((*C),"Empty matrix product\n")PetscInfo_Private(__func__,(*C),"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1102,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1103 | } | |||
1104 | #endif | |||
1105 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1106 | } | |||
1107 | ||||
1108 | /* This routine is not used. Should be removed! */ | |||
1109 | PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) | |||
1110 | { | |||
1111 | PetscErrorCode ierr; | |||
1112 | ||||
1113 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1113; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1114 | if (scall == MAT_INITIAL_MATRIX) { | |||
1115 | ierr = PetscLogEventBegin(MAT_MatTransposeMultSymbolic,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatTransposeMultSymbolic].active) ? (*PetscLogPLB)((MAT_MatTransposeMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1115,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1116 | ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1116,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1117 | ierr = PetscLogEventEnd(MAT_MatTransposeMultSymbolic,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatTransposeMultSymbolic].active) ? (*PetscLogPLE)((MAT_MatTransposeMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1117,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1118 | } | |||
1119 | ierr = PetscLogEventBegin(MAT_MatTransposeMultNumeric,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatTransposeMultNumeric].active) ? (*PetscLogPLB)((MAT_MatTransposeMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1119,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1120 | ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1120,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1121 | ierr = PetscLogEventEnd(MAT_MatTransposeMultNumeric,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatTransposeMultNumeric].active) ? (*PetscLogPLE)((MAT_MatTransposeMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1121,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1122 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1123 | } | |||
1124 | ||||
1125 | PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) | |||
1126 | { | |||
1127 | PetscErrorCode ierr; | |||
1128 | Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; | |||
1129 | Mat_MatMatTransMult *abt=a->abt; | |||
1130 | ||||
1131 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1131; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1132 | ierr = (abt->destroy)(A);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1132,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1133 | ierr = MatTransposeColoringDestroy(&abt->matcoloring);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1133,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1134 | ierr = MatDestroy(&abt->Bt_den);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1134,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1135 | ierr = MatDestroy(&abt->ABt_den);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1135,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1136 | ierr = PetscFree(abt)((*PetscTrFree)((void*)(abt),1136,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((abt) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1136,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1137 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1138 | } | |||
1139 | ||||
1140 | PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) | |||
1141 | { | |||
1142 | PetscErrorCode ierr; | |||
1143 | Mat Bt; | |||
1144 | PetscInt *bti,*btj; | |||
1145 | Mat_MatMatTransMult *abt; | |||
1146 | Mat_SeqAIJ *c; | |||
1147 | ||||
1148 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1148; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1149 | /* create symbolic Bt */ | |||
1150 | ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1150,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1151 | ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF((MPI_Comm)0x44000001),B->cmap->n,B->rmap->n,bti,btj,NULL((void*)0),&Bt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1151,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1152 | ierr = MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs)(((A->cmap->bs) >= 0) ? (A->cmap->bs) : (-(A-> cmap->bs))),PetscAbs(B->cmap->bs)(((B->cmap->bs) >= 0) ? (B->cmap->bs) : (-(B-> cmap->bs))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1152,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1153 | ierr = MatSetType(Bt,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1153,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1154 | ||||
1155 | /* get symbolic C=A*Bt */ | |||
1156 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1156,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1157 | ||||
1158 | /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ | |||
1159 | ierr = PetscNew(&abt)PetscMallocA(1,PETSC_TRUE,1159,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(1)*sizeof(**((&abt))),((&abt)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1159,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1160 | c = (Mat_SeqAIJ*)(*C)->data; | |||
1161 | c->abt = abt; | |||
1162 | ||||
1163 | abt->usecoloring = PETSC_FALSE; | |||
1164 | abt->destroy = (*C)->ops->destroy; | |||
1165 | (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; | |||
1166 | ||||
1167 | ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL((void*)0),"-matmattransmult_color",&abt->usecoloring,NULL((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1167,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1168 | if (abt->usecoloring) { | |||
1169 | /* Create MatTransposeColoring from symbolic C=A*B^T */ | |||
1170 | MatTransposeColoring matcoloring; | |||
1171 | MatColoring coloring; | |||
1172 | ISColoring iscoloring; | |||
1173 | Mat Bt_dense,C_dense; | |||
1174 | Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; | |||
1175 | /* inode causes memory problem, don't know why */ | |||
1176 | if (c->inode.use) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MAT_USE_INODES is not supported. Use '-mat_no_inode'")return PetscError(((MPI_Comm)0x44000001),1176,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,56,PETSC_ERROR_INITIAL,"MAT_USE_INODES is not supported. Use '-mat_no_inode'" ); | |||
1177 | ||||
1178 | ierr = MatColoringCreate(*C,&coloring);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1178,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1179 | ierr = MatColoringSetDistance(coloring,2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1179,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1180 | ierr = MatColoringSetType(coloring,MATCOLORINGSL"sl");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1180,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1181 | ierr = MatColoringSetFromOptions(coloring);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1181,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1182 | ierr = MatColoringApply(coloring,&iscoloring);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1182,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1183 | ierr = MatColoringDestroy(&coloring);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1183,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1184 | ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1184,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1185 | ||||
1186 | abt->matcoloring = matcoloring; | |||
1187 | ||||
1188 | ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1188,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1189 | ||||
1190 | /* Create Bt_dense and C_dense = A*Bt_dense */ | |||
1191 | ierr = MatCreate(PETSC_COMM_SELF((MPI_Comm)0x44000001),&Bt_dense);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1191,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1192 | ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1192,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1193 | ierr = MatSetType(Bt_dense,MATSEQDENSE"seqdense");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1193,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1194 | ierr = MatSeqDenseSetPreallocation(Bt_dense,NULL((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1194,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1195 | ||||
1196 | Bt_dense->assembled = PETSC_TRUE; | |||
1197 | abt->Bt_den = Bt_dense; | |||
1198 | ||||
1199 | ierr = MatCreate(PETSC_COMM_SELF((MPI_Comm)0x44000001),&C_dense);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1199,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1200 | ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1200,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1201 | ierr = MatSetType(C_dense,MATSEQDENSE"seqdense");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1201,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1202 | ierr = MatSeqDenseSetPreallocation(C_dense,NULL((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1202,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1203 | ||||
1204 | Bt_dense->assembled = PETSC_TRUE; | |||
1205 | abt->ABt_den = C_dense; | |||
1206 | ||||
1207 | #if defined(PETSC_USE_INFO1) | |||
1208 | { | |||
1209 | Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*C)->data; | |||
1210 | ierr = PetscInfo7(*C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors))PetscInfo_Private(__func__,*C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n" ,B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense ->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors ,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1210,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1211 | } | |||
1212 | #endif | |||
1213 | } | |||
1214 | /* clean up */ | |||
1215 | ierr = MatDestroy(&Bt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1215,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1216 | ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1216,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1217 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1218 | } | |||
1219 | ||||
1220 | PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) | |||
1221 | { | |||
1222 | PetscErrorCode ierr; | |||
1223 | Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; | |||
1224 | PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; | |||
1225 | PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; | |||
1226 | PetscLogDouble flops=0.0; | |||
1227 | MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval; | |||
1228 | Mat_MatMatTransMult *abt = c->abt; | |||
1229 | ||||
1230 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1230; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1231 | /* clear old values in C */ | |||
1232 | if (!c->a) { | |||
1233 | ierr = PetscMalloc1(ci[cm]+1,&ca)PetscMallocA(1,PETSC_FALSE,1233,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[cm]+1)*sizeof(**(&ca)),(&ca));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1233,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1234 | c->a = ca; | |||
1235 | c->free_a = PETSC_TRUE; | |||
1236 | } else { | |||
1237 | ca = c->a; | |||
1238 | } | |||
1239 | ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1239,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1240 | ||||
1241 | if (abt->usecoloring) { | |||
1242 | MatTransposeColoring matcoloring = abt->matcoloring; | |||
1243 | Mat Bt_dense,C_dense = abt->ABt_den; | |||
1244 | ||||
1245 | /* Get Bt_dense by Apply MatTransposeColoring to B */ | |||
1246 | Bt_dense = abt->Bt_den; | |||
1247 | ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1247,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1248 | ||||
1249 | /* C_dense = A*Bt_dense */ | |||
1250 | ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1250,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1251 | ||||
1252 | /* Recover C from C_dense */ | |||
1253 | ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1253,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1254 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1255 | } | |||
1256 | ||||
1257 | for (i=0; i<cm; i++) { | |||
1258 | anzi = ai[i+1] - ai[i]; | |||
1259 | acol = aj + ai[i]; | |||
1260 | aval = aa + ai[i]; | |||
1261 | cnzi = ci[i+1] - ci[i]; | |||
1262 | ccol = cj + ci[i]; | |||
1263 | cval = ca + ci[i]; | |||
1264 | for (j=0; j<cnzi; j++) { | |||
1265 | brow = ccol[j]; | |||
1266 | bnzj = bi[brow+1] - bi[brow]; | |||
1267 | bcol = bj + bi[brow]; | |||
1268 | bval = ba + bi[brow]; | |||
1269 | ||||
1270 | /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ | |||
1271 | nexta = 0; nextb = 0; | |||
1272 | while (nexta<anzi && nextb<bnzj) { | |||
1273 | while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; | |||
1274 | if (nexta == anzi) break; | |||
1275 | while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; | |||
1276 | if (nextb == bnzj) break; | |||
1277 | if (acol[nexta] == bcol[nextb]) { | |||
1278 | cval[j] += aval[nexta]*bval[nextb]; | |||
1279 | nexta++; nextb++; | |||
1280 | flops += 2; | |||
1281 | } | |||
1282 | } | |||
1283 | } | |||
1284 | } | |||
1285 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1285,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1286 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1286,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1287 | ierr = PetscLogFlops(flops);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1287,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1288 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1289 | } | |||
1290 | ||||
1291 | PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(Mat A) | |||
1292 | { | |||
1293 | PetscErrorCode ierr; | |||
1294 | Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; | |||
1295 | Mat_MatTransMatMult *atb = a->atb; | |||
1296 | ||||
1297 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1297; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1298 | if (atb) { | |||
1299 | ierr = MatDestroy(&atb->At);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1299,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1300 | ierr = (*atb->destroy)(A);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1300,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1301 | } | |||
1302 | ierr = PetscFree(atb)((*PetscTrFree)((void*)(atb),1302,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((atb) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1302,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1303 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1304 | } | |||
1305 | ||||
1306 | PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) | |||
1307 | { | |||
1308 | PetscErrorCode ierr; | |||
1309 | const char *algTypes[2] = {"matmatmult","outerproduct"}; | |||
1310 | PetscInt alg=0; /* set default algorithm */ | |||
1311 | Mat At; | |||
1312 | Mat_MatTransMatMult *atb; | |||
1313 | Mat_SeqAIJ *c; | |||
1314 | ||||
1315 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1315; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1316 | if (scall == MAT_INITIAL_MATRIX) { | |||
1317 | ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatTransposeMatMult","Mat")0; do { PetscOptionItems PetscOptionsObjectBase; PetscOptionItems *PetscOptionsObject = &PetscOptionsObjectBase; PetscMemzero (PetscOptionsObject,sizeof(PetscOptionItems)); for (PetscOptionsObject ->count=(PetscOptionsPublish?-1:1); PetscOptionsObject-> count<2; PetscOptionsObject->count++) { PetscErrorCode _5_ierr = PetscOptionsBegin_Private(PetscOptionsObject,PetscObjectComm ((PetscObject)A),((PetscObject)A)->prefix,"MatTransposeMatMult" ,"Mat");do {if (__builtin_expect(!!(_5_ierr),0)) return PetscError (((MPI_Comm)0x44000001),1317,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1317,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1318 | ierr = PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,2,algTypes[0],&alg,NULL)PetscOptionsEList_Private(PetscOptionsObject,"-mattransposematmult_via" ,"Algorithmic approach","MatTransposeMatMult",algTypes,2,algTypes [0],&alg,((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1318,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1319 | ierr = PetscOptionsEnd()_5_ierr = PetscOptionsEnd_Private(PetscOptionsObject);do {if ( __builtin_expect(!!(_5_ierr),0)) return PetscError(((MPI_Comm )0x44000001),1319,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);}} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1319,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1320 | ||||
1321 | switch (alg) { | |||
1322 | case 1: | |||
1323 | ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1323,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1324 | break; | |||
1325 | default: | |||
1326 | ierr = PetscNew(&atb)PetscMallocA(1,PETSC_TRUE,1326,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(1)*sizeof(**((&atb))),((&atb)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1326,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1327 | ierr = MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1327,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1328 | ierr = MatMatMult_SeqAIJ_SeqAIJ(At,B,MAT_INITIAL_MATRIX,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1328,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1329 | ||||
1330 | c = (Mat_SeqAIJ*)(*C)->data; | |||
1331 | c->atb = atb; | |||
1332 | atb->At = At; | |||
1333 | atb->destroy = (*C)->ops->destroy; | |||
1334 | (*C)->ops->destroy = MatDestroy_SeqAIJ_MatTransMatMult; | |||
1335 | ||||
1336 | break; | |||
1337 | } | |||
1338 | } | |||
1339 | if (alg) { | |||
1340 | ierr = (*(*C)->ops->mattransposemultnumeric)(A,B,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1340,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1341 | } else if (!alg && scall == MAT_REUSE_MATRIX) { | |||
1342 | c = (Mat_SeqAIJ*)(*C)->data; | |||
1343 | atb = c->atb; | |||
1344 | At = atb->At; | |||
1345 | ierr = MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1345,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1346 | ierr = MatMatMult_SeqAIJ_SeqAIJ(At,B,MAT_REUSE_MATRIX,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1346,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1347 | } | |||
1348 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1349 | } | |||
1350 | ||||
1351 | PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) | |||
1352 | { | |||
1353 | PetscErrorCode ierr; | |||
1354 | Mat At; | |||
1355 | PetscInt *ati,*atj; | |||
1356 | ||||
1357 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1357; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1358 | /* create symbolic At */ | |||
1359 | ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1359,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1360 | ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF((MPI_Comm)0x44000001),A->cmap->n,A->rmap->n,ati,atj,NULL((void*)0),&At);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1360,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1361 | ierr = MatSetBlockSizes(At,PetscAbs(A->cmap->bs)(((A->cmap->bs) >= 0) ? (A->cmap->bs) : (-(A-> cmap->bs))),PetscAbs(B->cmap->bs)(((B->cmap->bs) >= 0) ? (B->cmap->bs) : (-(B-> cmap->bs))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1361,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1362 | ierr = MatSetType(At,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1362,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1363 | ||||
1364 | /* get symbolic C=At*B */ | |||
1365 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1365,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1366 | ||||
1367 | /* clean up */ | |||
1368 | ierr = MatDestroy(&At);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1368,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1369 | ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1369,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1370 | ||||
1371 | (*C)->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; | |||
1372 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1373 | } | |||
1374 | ||||
1375 | PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) | |||
1376 | { | |||
1377 | PetscErrorCode ierr; | |||
1378 | Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; | |||
1379 | PetscInt am =A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; | |||
1380 | PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; | |||
1381 | PetscLogDouble flops=0.0; | |||
1382 | MatScalar *aa =a->a,*ba,*ca,*caj; | |||
1383 | ||||
1384 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1384; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1385 | if (!c->a) { | |||
1386 | ierr = PetscMalloc1(ci[cm]+1,&ca)PetscMallocA(1,PETSC_FALSE,1386,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(ci[cm]+1)*sizeof(**(&ca)),(&ca));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1386,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1387 | ||||
1388 | c->a = ca; | |||
1389 | c->free_a = PETSC_TRUE; | |||
1390 | } else { | |||
1391 | ca = c->a; | |||
1392 | } | |||
1393 | /* clear old values in C */ | |||
1394 | ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1394,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1395 | ||||
1396 | /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ | |||
1397 | for (i=0; i<am; i++) { | |||
1398 | bj = b->j + bi[i]; | |||
1399 | ba = b->a + bi[i]; | |||
1400 | bnzi = bi[i+1] - bi[i]; | |||
1401 | anzi = ai[i+1] - ai[i]; | |||
1402 | for (j=0; j<anzi; j++) { | |||
1403 | nextb = 0; | |||
1404 | crow = *aj++; | |||
1405 | cjj = cj + ci[crow]; | |||
1406 | caj = ca + ci[crow]; | |||
1407 | /* perform sparse axpy operation. Note cjj includes bj. */ | |||
1408 | for (k=0; nextb<bnzi; k++) { | |||
1409 | if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ | |||
1410 | caj[k] += (*aa)*(*(ba+nextb)); | |||
1411 | nextb++; | |||
1412 | } | |||
1413 | } | |||
1414 | flops += 2*bnzi; | |||
1415 | aa++; | |||
1416 | } | |||
1417 | } | |||
1418 | ||||
1419 | /* Assemble the final matrix and clean up */ | |||
1420 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1420,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1421 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1421,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1422 | ierr = PetscLogFlops(flops);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1422,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1423 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1424 | } | |||
1425 | ||||
1426 | PETSC_INTERNextern __attribute__((visibility ("hidden"))) PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) | |||
1427 | { | |||
1428 | PetscErrorCode ierr; | |||
1429 | ||||
1430 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1430; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1431 | if (scall == MAT_INITIAL_MATRIX) { | |||
1432 | ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLB)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1432,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1433 | ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1433,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1434 | ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLE)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1434,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1435 | } | |||
1436 | ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLB)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1436,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1437 | ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1437,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1438 | ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLE)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1438,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1439 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1440 | } | |||
1441 | ||||
1442 | PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) | |||
1443 | { | |||
1444 | PetscErrorCode ierr; | |||
1445 | ||||
1446 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1446; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1447 | ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1447,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1448 | ||||
1449 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; | |||
1450 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1451 | } | |||
1452 | ||||
1453 | PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) | |||
1454 | { | |||
1455 | Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; | |||
1456 | Mat_SeqDense *bd = (Mat_SeqDense*)B->data; | |||
1457 | PetscErrorCode ierr; | |||
1458 | PetscScalar *c,*b,r1,r2,r3,r4,*c1,*c2,*c3,*c4,aatmp; | |||
1459 | const PetscScalar *aa,*b1,*b2,*b3,*b4; | |||
1460 | const PetscInt *aj; | |||
1461 | PetscInt cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n; | |||
1462 | PetscInt am4=4*am,bm4=4*bm,col,i,j,n,ajtmp; | |||
1463 | ||||
1464 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1464; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1465 | if (!cm || !cn) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1466 | if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n)return PetscError(((MPI_Comm)0x44000001),1466,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,60,PETSC_ERROR_INITIAL,"Number columns in A %D not equal rows in B %D\n" ,A->cmap->n,B->rmap->n); | |||
1467 | if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n)return PetscError(((MPI_Comm)0x44000001),1467,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,60,PETSC_ERROR_INITIAL,"Number rows in C %D not equal rows in A %D\n" ,C->rmap->n,A->rmap->n); | |||
1468 | if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n)return PetscError(((MPI_Comm)0x44000001),1468,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,60,PETSC_ERROR_INITIAL,"Number columns in B %D not equal columns in C %D\n" ,B->cmap->n,C->cmap->n); | |||
1469 | b = bd->v; | |||
1470 | ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1470,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1471 | b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; | |||
1472 | c1 = c; c2 = c1 + am; c3 = c2 + am; c4 = c3 + am; | |||
1473 | for (col=0; col<cn-4; col += 4) { /* over columns of C */ | |||
1474 | for (i=0; i<am; i++) { /* over rows of C in those columns */ | |||
1475 | r1 = r2 = r3 = r4 = 0.0; | |||
1476 | n = a->i[i+1] - a->i[i]; | |||
1477 | aj = a->j + a->i[i]; | |||
1478 | aa = a->a + a->i[i]; | |||
1479 | for (j=0; j<n; j++) { | |||
1480 | aatmp = aa[j]; ajtmp = aj[j]; | |||
1481 | r1 += aatmp*b1[ajtmp]; | |||
1482 | r2 += aatmp*b2[ajtmp]; | |||
1483 | r3 += aatmp*b3[ajtmp]; | |||
1484 | r4 += aatmp*b4[ajtmp]; | |||
1485 | } | |||
1486 | c1[i] = r1; | |||
1487 | c2[i] = r2; | |||
1488 | c3[i] = r3; | |||
1489 | c4[i] = r4; | |||
1490 | } | |||
1491 | b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; | |||
1492 | c1 += am4; c2 += am4; c3 += am4; c4 += am4; | |||
1493 | } | |||
1494 | for (; col<cn; col++) { /* over extra columns of C */ | |||
1495 | for (i=0; i<am; i++) { /* over rows of C in those columns */ | |||
1496 | r1 = 0.0; | |||
1497 | n = a->i[i+1] - a->i[i]; | |||
1498 | aj = a->j + a->i[i]; | |||
1499 | aa = a->a + a->i[i]; | |||
1500 | for (j=0; j<n; j++) { | |||
1501 | r1 += aa[j]*b1[aj[j]]; | |||
1502 | } | |||
1503 | c1[i] = r1; | |||
1504 | } | |||
1505 | b1 += bm; | |||
1506 | c1 += am; | |||
1507 | } | |||
1508 | ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1508,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1509 | ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1509,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1510 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1510,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1511 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1511,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1512 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1513 | } | |||
1514 | ||||
1515 | /* | |||
1516 | Note very similar to MatMult_SeqAIJ(), should generate both codes from same base | |||
1517 | */ | |||
1518 | PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) | |||
1519 | { | |||
1520 | Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; | |||
1521 | Mat_SeqDense *bd = (Mat_SeqDense*)B->data; | |||
1522 | PetscErrorCode ierr; | |||
1523 | PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; | |||
1524 | MatScalar *aa; | |||
1525 | PetscInt cm = C->rmap->n, cn=B->cmap->n, bm=bd->lda, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; | |||
1526 | PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; | |||
1527 | ||||
1528 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1528; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1529 | if (!cm || !cn) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1530 | b = bd->v; | |||
1531 | ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1531,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1532 | b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; | |||
1533 | ||||
1534 | if (a->compressedrow.use) { /* use compressed row format */ | |||
1535 | for (col=0; col<cn-4; col += 4) { /* over columns of C */ | |||
1536 | colam = col*am; | |||
1537 | arm = a->compressedrow.nrows; | |||
1538 | ii = a->compressedrow.i; | |||
1539 | ridx = a->compressedrow.rindex; | |||
1540 | for (i=0; i<arm; i++) { /* over rows of C in those columns */ | |||
1541 | r1 = r2 = r3 = r4 = 0.0; | |||
1542 | n = ii[i+1] - ii[i]; | |||
1543 | aj = a->j + ii[i]; | |||
1544 | aa = a->a + ii[i]; | |||
1545 | for (j=0; j<n; j++) { | |||
1546 | r1 += (*aa)*b1[*aj]; | |||
1547 | r2 += (*aa)*b2[*aj]; | |||
1548 | r3 += (*aa)*b3[*aj]; | |||
1549 | r4 += (*aa++)*b4[*aj++]; | |||
1550 | } | |||
1551 | c[colam + ridx[i]] += r1; | |||
1552 | c[colam + am + ridx[i]] += r2; | |||
1553 | c[colam + am2 + ridx[i]] += r3; | |||
1554 | c[colam + am3 + ridx[i]] += r4; | |||
1555 | } | |||
1556 | b1 += bm4; | |||
1557 | b2 += bm4; | |||
1558 | b3 += bm4; | |||
1559 | b4 += bm4; | |||
1560 | } | |||
1561 | for (; col<cn; col++) { /* over extra columns of C */ | |||
1562 | colam = col*am; | |||
1563 | arm = a->compressedrow.nrows; | |||
1564 | ii = a->compressedrow.i; | |||
1565 | ridx = a->compressedrow.rindex; | |||
1566 | for (i=0; i<arm; i++) { /* over rows of C in those columns */ | |||
1567 | r1 = 0.0; | |||
1568 | n = ii[i+1] - ii[i]; | |||
1569 | aj = a->j + ii[i]; | |||
1570 | aa = a->a + ii[i]; | |||
1571 | ||||
1572 | for (j=0; j<n; j++) { | |||
1573 | r1 += (*aa++)*b1[*aj++]; | |||
1574 | } | |||
1575 | c[colam + ridx[i]] += r1; | |||
1576 | } | |||
1577 | b1 += bm; | |||
1578 | } | |||
1579 | } else { | |||
1580 | for (col=0; col<cn-4; col += 4) { /* over columns of C */ | |||
1581 | colam = col*am; | |||
1582 | for (i=0; i<am; i++) { /* over rows of C in those columns */ | |||
1583 | r1 = r2 = r3 = r4 = 0.0; | |||
1584 | n = a->i[i+1] - a->i[i]; | |||
1585 | aj = a->j + a->i[i]; | |||
1586 | aa = a->a + a->i[i]; | |||
1587 | for (j=0; j<n; j++) { | |||
1588 | r1 += (*aa)*b1[*aj]; | |||
1589 | r2 += (*aa)*b2[*aj]; | |||
1590 | r3 += (*aa)*b3[*aj]; | |||
1591 | r4 += (*aa++)*b4[*aj++]; | |||
1592 | } | |||
1593 | c[colam + i] += r1; | |||
1594 | c[colam + am + i] += r2; | |||
1595 | c[colam + am2 + i] += r3; | |||
1596 | c[colam + am3 + i] += r4; | |||
1597 | } | |||
1598 | b1 += bm4; | |||
1599 | b2 += bm4; | |||
1600 | b3 += bm4; | |||
1601 | b4 += bm4; | |||
1602 | } | |||
1603 | for (; col<cn; col++) { /* over extra columns of C */ | |||
1604 | colam = col*am; | |||
1605 | for (i=0; i<am; i++) { /* over rows of C in those columns */ | |||
1606 | r1 = 0.0; | |||
1607 | n = a->i[i+1] - a->i[i]; | |||
1608 | aj = a->j + a->i[i]; | |||
1609 | aa = a->a + a->i[i]; | |||
1610 | ||||
1611 | for (j=0; j<n; j++) { | |||
1612 | r1 += (*aa++)*b1[*aj++]; | |||
1613 | } | |||
1614 | c[colam + i] += r1; | |||
1615 | } | |||
1616 | b1 += bm; | |||
1617 | } | |||
1618 | } | |||
1619 | ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1619,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1620 | ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1620,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1621 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1622 | } | |||
1623 | ||||
1624 | PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) | |||
1625 | { | |||
1626 | PetscErrorCode ierr; | |||
1627 | Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; | |||
1628 | Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; | |||
1629 | PetscInt *bi = b->i,*bj=b->j; | |||
1630 | PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; | |||
1631 | MatScalar *btval,*btval_den,*ba=b->a; | |||
1632 | PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; | |||
1633 | ||||
1634 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1634; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1635 | btval_den=btdense->v; | |||
1636 | ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1636,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1637 | for (k=0; k<ncolors; k++) { | |||
1638 | ncolumns = coloring->ncolumns[k]; | |||
1639 | for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ | |||
1640 | col = *(columns + colorforcol[k] + l); | |||
1641 | btcol = bj + bi[col]; | |||
1642 | btval = ba + bi[col]; | |||
1643 | anz = bi[col+1] - bi[col]; | |||
1644 | for (j=0; j<anz; j++) { | |||
1645 | brow = btcol[j]; | |||
1646 | btval_den[brow] = btval[j]; | |||
1647 | } | |||
1648 | } | |||
1649 | btval_den += m; | |||
1650 | } | |||
1651 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1652 | } | |||
1653 | ||||
1654 | PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) | |||
1655 | { | |||
1656 | PetscErrorCode ierr; | |||
1657 | Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; | |||
1658 | PetscScalar *ca_den,*ca_den_ptr,*ca=csp->a; | |||
1659 | PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors; | |||
1660 | PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp; | |||
1661 | PetscInt nrows,*row,*idx; | |||
1662 | PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow; | |||
1663 | ||||
1664 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1664; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1665 | ierr = MatDenseGetArray(Cden,&ca_den);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1665,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1666 | ||||
1667 | if (brows > 0) { | |||
1668 | PetscInt *lstart,row_end,row_start; | |||
1669 | lstart = matcoloring->lstart; | |||
1670 | ierr = PetscMemzero(lstart,ncolors*sizeof(PetscInt));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1670,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1671 | ||||
1672 | row_end = brows; | |||
1673 | if (row_end > m) row_end = m; | |||
1674 | for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */ | |||
1675 | ca_den_ptr = ca_den; | |||
1676 | for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */ | |||
1677 | nrows = matcoloring->nrows[k]; | |||
1678 | row = rows + colorforrow[k]; | |||
1679 | idx = den2sp + colorforrow[k]; | |||
1680 | for (l=lstart[k]; l<nrows; l++) { | |||
1681 | if (row[l] >= row_end) { | |||
1682 | lstart[k] = l; | |||
1683 | break; | |||
1684 | } else { | |||
1685 | ca[idx[l]] = ca_den_ptr[row[l]]; | |||
1686 | } | |||
1687 | } | |||
1688 | ca_den_ptr += m; | |||
1689 | } | |||
1690 | row_end += brows; | |||
1691 | if (row_end > m) row_end = m; | |||
1692 | } | |||
1693 | } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ | |||
1694 | ca_den_ptr = ca_den; | |||
1695 | for (k=0; k<ncolors; k++) { | |||
1696 | nrows = matcoloring->nrows[k]; | |||
1697 | row = rows + colorforrow[k]; | |||
1698 | idx = den2sp + colorforrow[k]; | |||
1699 | for (l=0; l<nrows; l++) { | |||
1700 | ca[idx[l]] = ca_den_ptr[row[l]]; | |||
1701 | } | |||
1702 | ca_den_ptr += m; | |||
1703 | } | |||
1704 | } | |||
1705 | ||||
1706 | ierr = MatDenseRestoreArray(Cden,&ca_den);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1706,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1707 | #if defined(PETSC_USE_INFO1) | |||
1708 | if (matcoloring->brows > 0) { | |||
1709 | ierr = PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows)PetscInfo_Private(__func__,Csp,"Loop over %D row blocks for den2sp\n" ,brows);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1709,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1710 | } else { | |||
1711 | ierr = PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n")PetscInfo_Private(__func__,Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n" );CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1711,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1712 | } | |||
1713 | #endif | |||
1714 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1715 | } | |||
1716 | ||||
1717 | PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) | |||
1718 | { | |||
1719 | PetscErrorCode ierr; | |||
1720 | PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm; | |||
1721 | const PetscInt *is,*ci,*cj,*row_idx; | |||
1722 | PetscInt nis = iscoloring->n,*rowhit,bs = 1; | |||
1723 | IS *isa; | |||
1724 | Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; | |||
1725 | PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i; | |||
1726 | PetscInt *colorforcol,*columns,*columns_i,brows; | |||
1727 | PetscBool flg; | |||
1728 | ||||
1729 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1729; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1730 | ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE((void*)0),&isa);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1730,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1731 | ||||
1732 | /* bs >1 is not being tested yet! */ | |||
1733 | Nbs = mat->cmap->N/bs; | |||
1734 | c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ | |||
1735 | c->N = Nbs; | |||
1736 | c->m = c->M; | |||
1737 | c->rstart = 0; | |||
1738 | c->brows = 100; | |||
1739 | ||||
1740 | c->ncolors = nis; | |||
1741 | ierr = PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow)PetscMallocA(3,PETSC_FALSE,1741,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(nis)*sizeof(**(&c->ncolumns)),(&c->ncolumns ),(size_t)(nis)*sizeof(**(&c->nrows)),(&c->nrows ),(size_t)(nis+1)*sizeof(**(&colorforrow)),(&colorforrow ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1741,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1742 | ierr = PetscMalloc1(csp->nz+1,&rows)PetscMallocA(1,PETSC_FALSE,1742,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(csp->nz+1)*sizeof(**(&rows)),(&rows));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1742,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1743 | ierr = PetscMalloc1(csp->nz+1,&den2sp)PetscMallocA(1,PETSC_FALSE,1743,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(csp->nz+1)*sizeof(**(&den2sp)),(&den2sp) );CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1743,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1744 | ||||
1745 | brows = c->brows; | |||
1746 | ierr = PetscOptionsGetInt(NULL((void*)0),NULL((void*)0),"-matden2sp_brows",&brows,&flg);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1746,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1747 | if (flg) c->brows = brows; | |||
1748 | if (brows > 0) { | |||
1749 | ierr = PetscMalloc1(nis+1,&c->lstart)PetscMallocA(1,PETSC_FALSE,1749,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(nis+1)*sizeof(**(&c->lstart)),(&c->lstart ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1749,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1750 | } | |||
1751 | ||||
1752 | colorforrow[0] = 0; | |||
1753 | rows_i = rows; | |||
1754 | den2sp_i = den2sp; | |||
1755 | ||||
1756 | ierr = PetscMalloc1(nis+1,&colorforcol)PetscMallocA(1,PETSC_FALSE,1756,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(nis+1)*sizeof(**(&colorforcol)),(&colorforcol ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1756,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1757 | ierr = PetscMalloc1(Nbs+1,&columns)PetscMallocA(1,PETSC_FALSE,1757,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(Nbs+1)*sizeof(**(&columns)),(&columns));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1757,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1758 | ||||
1759 | colorforcol[0] = 0; | |||
1760 | columns_i = columns; | |||
1761 | ||||
1762 | /* get column-wise storage of mat */ | |||
1763 | ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1763,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1764 | ||||
1765 | cm = c->m; | |||
1766 | ierr = PetscMalloc1(cm+1,&rowhit)PetscMallocA(1,PETSC_FALSE,1766,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(cm+1)*sizeof(**(&rowhit)),(&rowhit));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1766,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1767 | ierr = PetscMalloc1(cm+1,&idxhit)PetscMallocA(1,PETSC_FALSE,1767,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(cm+1)*sizeof(**(&idxhit)),(&idxhit));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1767,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1768 | for (i=0; i<nis; i++) { /* loop over color */ | |||
1769 | ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1769,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1770 | ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1770,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1771 | ||||
1772 | c->ncolumns[i] = n; | |||
1773 | if (n) { | |||
1774 | ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1774,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1775 | } | |||
1776 | colorforcol[i+1] = colorforcol[i] + n; | |||
1777 | columns_i += n; | |||
1778 | ||||
1779 | /* fast, crude version requires O(N*N) work */ | |||
1780 | ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1780,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1781 | ||||
1782 | for (j=0; j<n; j++) { /* loop over columns*/ | |||
1783 | col = is[j]; | |||
1784 | row_idx = cj + ci[col]; | |||
1785 | m = ci[col+1] - ci[col]; | |||
1786 | for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ | |||
1787 | idxhit[*row_idx] = spidx[ci[col] + k]; | |||
1788 | rowhit[*row_idx++] = col + 1; | |||
1789 | } | |||
1790 | } | |||
1791 | /* count the number of hits */ | |||
1792 | nrows = 0; | |||
1793 | for (j=0; j<cm; j++) { | |||
1794 | if (rowhit[j]) nrows++; | |||
1795 | } | |||
1796 | c->nrows[i] = nrows; | |||
1797 | colorforrow[i+1] = colorforrow[i] + nrows; | |||
1798 | ||||
1799 | nrows = 0; | |||
1800 | for (j=0; j<cm; j++) { /* loop over rows */ | |||
1801 | if (rowhit[j]) { | |||
1802 | rows_i[nrows] = j; | |||
1803 | den2sp_i[nrows] = idxhit[j]; | |||
1804 | nrows++; | |||
1805 | } | |||
1806 | } | |||
1807 | den2sp_i += nrows; | |||
1808 | ||||
1809 | ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1809,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1810 | rows_i += nrows; | |||
1811 | } | |||
1812 | ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1812,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1813 | ierr = PetscFree(rowhit)((*PetscTrFree)((void*)(rowhit),1813,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((rowhit) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1813,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1814 | ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1814,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1815 | if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis])return PetscError(((MPI_Comm)0x44000001),1815,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,77,PETSC_ERROR_INITIAL,"csp->nz %d != colorforrow[nis] %d" ,csp->nz,colorforrow[nis]); | |||
1816 | ||||
1817 | c->colorforrow = colorforrow; | |||
1818 | c->rows = rows; | |||
1819 | c->den2sp = den2sp; | |||
1820 | c->colorforcol = colorforcol; | |||
1821 | c->columns = columns; | |||
1822 | ||||
1823 | ierr = PetscFree(idxhit)((*PetscTrFree)((void*)(idxhit),1823,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((idxhit) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1823,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1824 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1825 | } | |||
1826 | ||||
1827 | /* The combine method has done the symbolic and numeric in the first phase, and so we just return here */ | |||
1828 | PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Combined(Mat A,Mat B,Mat C) | |||
1829 | { | |||
1830 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1830; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1831 | /* Empty function */ | |||
1832 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1833 | } | |||
1834 | ||||
1835 | /* This algorithm combines the symbolic and numeric phase of matrix-matrix multiplication. */ | |||
1836 | PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ_Combined(Mat A,Mat B,PetscReal fill,Mat *C) | |||
1837 | { | |||
1838 | PetscErrorCode ierr; | |||
1839 | PetscLogDouble flops=0.0; | |||
1840 | Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; | |||
1841 | const PetscInt *ai=a->i,*bi=b->i; | |||
1842 | PetscInt *ci,*cj,*cj_i; | |||
1843 | PetscScalar *ca,*ca_i; | |||
1844 | PetscInt b_maxmemrow,c_maxmem,a_col; | |||
1845 | PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; | |||
1846 | PetscInt i,k,ndouble=0; | |||
1847 | PetscReal afill; | |||
1848 | PetscScalar *c_row_val_dense; | |||
1849 | PetscBool *c_row_idx_flags; | |||
1850 | PetscInt *aj_i=a->j; | |||
1851 | PetscScalar *aa_i=a->a; | |||
1852 | ||||
1853 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ; petscstack->line[petscstack->currentsize] = 1853; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1854 | ||||
1855 | /* Step 1: Determine upper bounds on memory for C and allocate memory */ | |||
1856 | /* This should be enough for almost all matrices. If still more memory is needed, it is reallocated later. */ | |||
1857 | c_maxmem = 8*(ai[am]+bi[bm]); | |||
1858 | b_maxmemrow = PetscMin(bi[bm],bn)(((bi[bm])<(bn)) ? (bi[bm]) : (bn)); | |||
1859 | ierr = PetscMalloc1(am+1,&ci)PetscMallocA(1,PETSC_FALSE,1859,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(am+1)*sizeof(**(&ci)),(&ci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1859,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1860 | ierr = PetscMalloc1(bn,&c_row_val_dense)PetscMallocA(1,PETSC_FALSE,1860,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(bn)*sizeof(**(&c_row_val_dense)),(&c_row_val_dense ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1860,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1861 | ierr = PetscMalloc1(bn,&c_row_idx_flags)PetscMallocA(1,PETSC_FALSE,1861,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(bn)*sizeof(**(&c_row_idx_flags)),(&c_row_idx_flags ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1861,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1862 | ierr = PetscMalloc1(c_maxmem,&cj)PetscMallocA(1,PETSC_FALSE,1862,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(c_maxmem)*sizeof(**(&cj)),(&cj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1862,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1863 | ierr = PetscMalloc1(c_maxmem,&ca)PetscMallocA(1,PETSC_FALSE,1863,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(size_t)(c_maxmem)*sizeof(**(&ca)),(&ca));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1863,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1864 | ca_i = ca; | |||
1865 | cj_i = cj; | |||
1866 | ci[0] = 0; | |||
1867 | ierr = PetscMemzero(c_row_val_dense,bn*sizeof(PetscScalar));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1867,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1868 | ierr = PetscMemzero(c_row_idx_flags,bn*sizeof(PetscBool));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1868,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1869 | for (i=0; i<am; i++) { | |||
1870 | /* Step 2: Initialize the dense row vector for C */ | |||
1871 | const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ | |||
1872 | PetscInt cnzi = 0; | |||
1873 | PetscInt *bj_i; | |||
1874 | PetscScalar *ba_i; | |||
1875 | /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory | |||
1876 | Usually, there is enough memory in the first place, so this is not executed. */ | |||
1877 | while (ci[i] + b_maxmemrow > c_maxmem) { | |||
1878 | c_maxmem *= 2; | |||
1879 | ndouble++; | |||
1880 | ierr = PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj)((*PetscTrRealloc)((sizeof(PetscInt)*c_maxmem),1880,__func__, "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(void**)(&cj)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1880,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1881 | ierr = PetscRealloc(sizeof(PetscScalar)*c_maxmem,&ca)((*PetscTrRealloc)((sizeof(PetscScalar)*c_maxmem),1881,__func__ ,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,(void**)(&ca)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1881,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1882 | } | |||
1883 | ||||
1884 | /* Step 3: Do the numerical calculations */ | |||
1885 | for (a_col=0; a_col<anzi; a_col++) { /* iterate over all non zero values in a row of A */ | |||
1886 | PetscInt a_col_index = aj_i[a_col]; | |||
1887 | const PetscInt bnzi = bi[a_col_index+1] - bi[a_col_index]; | |||
1888 | flops += 2*bnzi; | |||
1889 | bj_i = b->j + bi[a_col_index]; /* points to the current row in bj */ | |||
1890 | ba_i = b->a + bi[a_col_index]; /* points to the current row in ba */ | |||
1891 | for (k=0; k<bnzi; ++k) { /* iterate over all non zeros of this row in B */ | |||
1892 | if (c_row_idx_flags[bj_i[k]] == PETSC_FALSE) { | |||
1893 | cj_i[cnzi++] = bj_i[k]; | |||
1894 | c_row_idx_flags[bj_i[k]] = PETSC_TRUE; | |||
1895 | } | |||
1896 | c_row_val_dense[bj_i[k]] += aa_i[a_col] * ba_i[k]; | |||
1897 | } | |||
1898 | } | |||
1899 | ||||
1900 | /* Sort array */ | |||
1901 | ierr = PetscSortInt(cnzi,cj_i);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1901,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1902 | /* Step 4 */ | |||
1903 | for (k=0; k<cnzi; k++) { | |||
1904 | ca_i[k] = c_row_val_dense[cj_i[k]]; | |||
1905 | c_row_val_dense[cj_i[k]] = 0.; | |||
1906 | c_row_idx_flags[cj_i[k]] = PETSC_FALSE; | |||
1907 | } | |||
1908 | /* terminate current row */ | |||
1909 | aa_i += anzi; | |||
1910 | aj_i += anzi; | |||
1911 | ca_i += cnzi; | |||
1912 | cj_i += cnzi; | |||
1913 | ci[i+1] = ci[i] + cnzi; | |||
1914 | flops += cnzi; | |||
1915 | } | |||
1916 | ||||
1917 | /* Step 5 */ | |||
1918 | /* Create the new matrix */ | |||
1919 | ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL((void*)0),C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1919,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1920 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1920,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1921 | ierr = MatSetType(*C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1921,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1922 | ||||
1923 | /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ | |||
1924 | /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ | |||
1925 | c = (Mat_SeqAIJ*)((*C)->data); | |||
1926 | c->a = ca; | |||
1927 | c->free_a = PETSC_TRUE; | |||
1928 | c->free_ij = PETSC_TRUE; | |||
1929 | c->nonew = 0; | |||
1930 | ||||
1931 | /* set MatInfo */ | |||
1932 | afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; | |||
1933 | if (afill < 1.0) afill = 1.0; | |||
1934 | c->maxnz = ci[am]; | |||
1935 | c->nz = ci[am]; | |||
1936 | (*C)->info.mallocs = ndouble; | |||
1937 | (*C)->info.fill_ratio_given = fill; | |||
1938 | (*C)->info.fill_ratio_needed = afill; | |||
1939 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Combined; | |||
1940 | ||||
1941 | ierr = PetscFree(c_row_val_dense)((*PetscTrFree)((void*)(c_row_val_dense),1941,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((c_row_val_dense) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1941,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1942 | ierr = PetscFree(c_row_idx_flags)((*PetscTrFree)((void*)(c_row_idx_flags),1942,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ) || ((c_row_idx_flags) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1942,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1943 | ||||
1944 | ierr = MatAssemblyBegin(*C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1944,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1945 | ierr = MatAssemblyEnd(*C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1945,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1946 | ierr = PetscLogFlops(flops);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1946,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/seq/matmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1947 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1948 | } |