File: | mat/impls/baij/seq/baij2.c |
Warning: | line 2581, column 5 Value stored to 'ierr' is never read |
[?] Use j/k keys for keyboard navigation
1 | |
2 | #include <../src/mat/impls/baij/seq/baij.h> |
3 | #include <../src/mat/impls/dense/seq/dense.h> |
4 | #include <petsc/private/kernels/blockinvert.h> |
5 | #include <petscbt.h> |
6 | #include <petscblaslapack.h> |
7 | |
8 | #if defined(PETSC_HAVE_IMMINTRIN_H1) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE1) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) |
9 | #include <immintrin.h> |
10 | #endif |
11 | |
12 | PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov) |
13 | { |
14 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
15 | PetscErrorCode ierr; |
16 | PetscInt row,i,j,k,l,m,n,*nidx,isz,val,ival; |
17 | const PetscInt *idx; |
18 | PetscInt start,end,*ai,*aj,bs,*nidx2; |
19 | PetscBT table; |
20 | |
21 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 21; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
22 | m = a->mbs; |
23 | ai = a->i; |
24 | aj = a->j; |
25 | bs = A->rmap->bs; |
26 | |
27 | if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified")return PetscError(((MPI_Comm)0x44000001),27,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,63,PETSC_ERROR_INITIAL,"Negative overlap specified"); |
28 | |
29 | ierr = PetscBTCreate(m,&table);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),29,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
30 | ierr = PetscMalloc1(m+1,&nidx)PetscMallocA(1,PETSC_FALSE,30,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(m+1)*sizeof(**(&nidx)),(&nidx));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),30,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
31 | ierr = PetscMalloc1(A->rmap->N+1,&nidx2)PetscMallocA(1,PETSC_FALSE,31,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(A->rmap->N+1)*sizeof(**(&nidx2)),(&nidx2 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),31,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
32 | |
33 | for (i=0; i<is_max; i++) { |
34 | /* Initialise the two local arrays */ |
35 | isz = 0; |
36 | ierr = PetscBTMemzero(m,table);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),36,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
37 | |
38 | /* Extract the indices, assume there can be duplicate entries */ |
39 | ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),39,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
40 | ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),40,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
41 | |
42 | /* Enter these into the temp arrays i.e mark table[row], enter row into new index */ |
43 | for (j=0; j<n; ++j) { |
44 | ival = idx[j]/bs; /* convert the indices into block indices */ |
45 | if (ival>=m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim")return PetscError(((MPI_Comm)0x44000001),45,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,63,PETSC_ERROR_INITIAL,"index greater than mat-dim"); |
46 | if (!PetscBTLookupSet(table,ival)) nidx[isz++] = ival; |
47 | } |
48 | ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),48,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
49 | ierr = ISDestroy(&is[i]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),49,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
50 | |
51 | k = 0; |
52 | for (j=0; j<ov; j++) { /* for each overlap*/ |
53 | n = isz; |
54 | for (; k<n; k++) { /* do only those rows in nidx[k], which are not done yet */ |
55 | row = nidx[k]; |
56 | start = ai[row]; |
57 | end = ai[row+1]; |
58 | for (l = start; l<end; l++) { |
59 | val = aj[l]; |
60 | if (!PetscBTLookupSet(table,val)) nidx[isz++] = val; |
61 | } |
62 | } |
63 | } |
64 | /* expand the Index Set */ |
65 | for (j=0; j<isz; j++) { |
66 | for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k; |
67 | } |
68 | ierr = ISCreateGeneral(PETSC_COMM_SELF((MPI_Comm)0x44000001),isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),68,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
69 | } |
70 | ierr = PetscBTDestroy(&table);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),70,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
71 | ierr = PetscFree(nidx)((*PetscTrFree)((void*)(nidx),71,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((nidx) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),71,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
72 | ierr = PetscFree(nidx2)((*PetscTrFree)((void*)(nidx2),72,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((nidx2) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),72,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
73 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
74 | } |
75 | |
76 | PetscErrorCode MatCreateSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) |
77 | { |
78 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c; |
79 | PetscErrorCode ierr; |
80 | PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens; |
81 | PetscInt row,mat_i,*mat_j,tcol,*mat_ilen; |
82 | const PetscInt *irow,*icol; |
83 | PetscInt nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2; |
84 | PetscInt *aj = a->j,*ai = a->i; |
85 | MatScalar *mat_a; |
86 | Mat C; |
87 | PetscBool flag; |
88 | |
89 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 89; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
90 | ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),90,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
91 | ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),91,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
92 | ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),92,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
93 | ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),93,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
94 | |
95 | ierr = PetscCalloc1(1+oldcols,&smap)PetscMallocA(1,PETSC_TRUE,95,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(1+oldcols)*sizeof(**(&smap)),(&smap));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),95,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
96 | ssmap = smap; |
97 | ierr = PetscMalloc1(1+nrows,&lens)PetscMallocA(1,PETSC_FALSE,97,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(1+nrows)*sizeof(**(&lens)),(&lens));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),97,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
98 | for (i=0; i<ncols; i++) smap[icol[i]] = i+1; |
99 | /* determine lens of each row */ |
100 | for (i=0; i<nrows; i++) { |
101 | kstart = ai[irow[i]]; |
102 | kend = kstart + a->ilen[irow[i]]; |
103 | lens[i] = 0; |
104 | for (k=kstart; k<kend; k++) { |
105 | if (ssmap[aj[k]]) lens[i]++; |
106 | } |
107 | } |
108 | /* Create and fill new matrix */ |
109 | if (scall == MAT_REUSE_MATRIX) { |
110 | c = (Mat_SeqBAIJ*)((*B)->data); |
111 | |
112 | if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size")return PetscError(((MPI_Comm)0x44000001),112,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Submatrix wrong size"); |
113 | ierr = PetscArraycmp(c->ilen,lens,c->mbs,&flag)((sizeof(*(c->ilen)) != sizeof(*(lens))) || PetscMemcmp(c-> ilen,lens,(c->mbs)*sizeof(*(c->ilen)),&flag));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),113,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
114 | if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros")return PetscError(((MPI_Comm)0x44000001),114,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Cannot reuse matrix. wrong no of nonzeros" ); |
115 | ierr = PetscArrayzero(c->ilen,c->mbs)PetscMemzero(c->ilen,(c->mbs)*sizeof(*(c->ilen)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),115,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
116 | C = *B; |
117 | } else { |
118 | ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),118,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
119 | ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE-1,PETSC_DETERMINE-1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),119,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
120 | ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),120,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
121 | ierr = MatSeqBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),121,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
122 | } |
123 | c = (Mat_SeqBAIJ*)(C->data); |
124 | for (i=0; i<nrows; i++) { |
125 | row = irow[i]; |
126 | kstart = ai[row]; |
127 | kend = kstart + a->ilen[row]; |
128 | mat_i = c->i[i]; |
129 | mat_j = c->j + mat_i; |
130 | mat_a = c->a + mat_i*bs2; |
131 | mat_ilen = c->ilen + i; |
132 | for (k=kstart; k<kend; k++) { |
133 | if ((tcol=ssmap[a->j[k]])) { |
134 | *mat_j++ = tcol - 1; |
135 | ierr = PetscArraycpy(mat_a,a->a+k*bs2,bs2)((sizeof(*(mat_a)) != sizeof(*(a->a+k*bs2))) || PetscMemcpy (mat_a,a->a+k*bs2,(bs2)*sizeof(*(mat_a))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),135,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
136 | mat_a += bs2; |
137 | (*mat_ilen)++; |
138 | } |
139 | } |
140 | } |
141 | /* sort */ |
142 | { |
143 | MatScalar *work; |
144 | ierr = PetscMalloc1(bs2,&work)PetscMallocA(1,PETSC_FALSE,144,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(bs2)*sizeof(**(&work)),(&work));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),144,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
145 | for (i=0; i<nrows; i++) { |
146 | PetscInt ilen; |
147 | mat_i = c->i[i]; |
148 | mat_j = c->j + mat_i; |
149 | mat_a = c->a + mat_i*bs2; |
150 | ilen = c->ilen[i]; |
151 | ierr = PetscSortIntWithDataArray(ilen,mat_j,mat_a,bs2*sizeof(MatScalar),work);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),151,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
152 | } |
153 | ierr = PetscFree(work)((*PetscTrFree)((void*)(work),153,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((work) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),153,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
154 | } |
155 | |
156 | /* Free work space */ |
157 | ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),157,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
158 | ierr = PetscFree(smap)((*PetscTrFree)((void*)(smap),158,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((smap) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),158,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
159 | ierr = PetscFree(lens)((*PetscTrFree)((void*)(lens),159,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((lens) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),159,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
160 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),160,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
161 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),161,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
162 | |
163 | ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),163,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
164 | *B = C; |
165 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
166 | } |
167 | |
168 | PetscErrorCode MatCreateSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) |
169 | { |
170 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
171 | IS is1,is2; |
172 | PetscErrorCode ierr; |
173 | PetscInt *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count,maxmnbs,j; |
174 | const PetscInt *irow,*icol; |
175 | |
176 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 176; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
177 | ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),177,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
178 | ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),178,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
179 | ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),179,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
180 | ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),180,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
181 | |
182 | /* Verify if the indices corespond to each element in a block |
183 | and form the IS with compressed IS */ |
184 | maxmnbs = PetscMax(a->mbs,a->nbs)(((a->mbs)<(a->nbs)) ? (a->nbs) : (a->mbs)); |
185 | ierr = PetscMalloc2(maxmnbs,&vary,maxmnbs,&iary)PetscMallocA(2,PETSC_FALSE,185,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(maxmnbs)*sizeof(**(&vary)),(&vary),(size_t) (maxmnbs)*sizeof(**(&iary)),(&iary));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),185,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
186 | ierr = PetscArrayzero(vary,a->mbs)PetscMemzero(vary,(a->mbs)*sizeof(*(vary)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),186,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
187 | for (i=0; i<nrows; i++) vary[irow[i]/bs]++; |
188 | for (i=0; i<a->mbs; i++) { |
189 | if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Index set does not match blocks")return PetscError(((MPI_Comm)0x44000001),189,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Index set does not match blocks"); |
190 | } |
191 | count = 0; |
192 | for (i=0; i<nrows; i++) { |
193 | j = irow[i] / bs; |
194 | if ((vary[j]--)==bs) iary[count++] = j; |
195 | } |
196 | ierr = ISCreateGeneral(PETSC_COMM_SELF((MPI_Comm)0x44000001),count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),196,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
197 | |
198 | ierr = PetscArrayzero(vary,a->nbs)PetscMemzero(vary,(a->nbs)*sizeof(*(vary)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),198,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
199 | for (i=0; i<ncols; i++) vary[icol[i]/bs]++; |
200 | for (i=0; i<a->nbs; i++) { |
201 | if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal error in PETSc")return PetscError(((MPI_Comm)0x44000001),201,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,77,PETSC_ERROR_INITIAL,"Internal error in PETSc"); |
202 | } |
203 | count = 0; |
204 | for (i=0; i<ncols; i++) { |
205 | j = icol[i] / bs; |
206 | if ((vary[j]--)==bs) iary[count++] = j; |
207 | } |
208 | ierr = ISCreateGeneral(PETSC_COMM_SELF((MPI_Comm)0x44000001),count,iary,PETSC_COPY_VALUES,&is2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),208,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
209 | ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),209,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
210 | ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),210,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
211 | ierr = PetscFree2(vary,iary)PetscFreeA(2,211,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,&(vary),&(iary));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),211,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
212 | |
213 | ierr = MatCreateSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),213,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
214 | ierr = ISDestroy(&is1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),214,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
215 | ierr = ISDestroy(&is2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),215,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
216 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
217 | } |
218 | |
219 | PetscErrorCode MatDestroySubMatrix_SeqBAIJ(Mat C) |
220 | { |
221 | PetscErrorCode ierr; |
222 | Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data; |
223 | Mat_SubSppt *submatj = c->submatis1; |
224 | |
225 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 225; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
226 | ierr = (*submatj->destroy)(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),226,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
227 | ierr = MatDestroySubMatrix_Private(submatj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),227,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
228 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
229 | } |
230 | |
231 | PetscErrorCode MatDestroySubMatrices_SeqBAIJ(PetscInt n,Mat *mat[]) |
232 | { |
233 | PetscErrorCode ierr; |
234 | PetscInt i; |
235 | Mat C; |
236 | Mat_SeqBAIJ *c; |
237 | Mat_SubSppt *submatj; |
238 | |
239 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 239; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
240 | for (i=0; i<n; i++) { |
241 | C = (*mat)[i]; |
242 | c = (Mat_SeqBAIJ*)C->data; |
243 | submatj = c->submatis1; |
244 | if (submatj) { |
245 | ierr = (*submatj->destroy)(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),245,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
246 | ierr = MatDestroySubMatrix_Private(submatj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),246,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
247 | ierr = PetscFree(C->defaultvectype)((*PetscTrFree)((void*)(C->defaultvectype),247,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((C->defaultvectype) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),247,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
248 | ierr = PetscLayoutDestroy(&C->rmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),248,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
249 | ierr = PetscLayoutDestroy(&C->cmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),249,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
250 | ierr = PetscHeaderDestroy(&C)(PetscHeaderDestroy_Private((PetscObject)(*&C)) || ((*PetscTrFree )((void*)(*&C),250,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((*&C) = 0,0)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),250,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
251 | } else { |
252 | ierr = MatDestroy(&C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),252,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
253 | } |
254 | } |
255 | ierr = PetscFree(*mat)((*PetscTrFree)((void*)(*mat),255,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((*mat) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),255,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
256 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
257 | } |
258 | |
259 | PetscErrorCode MatCreateSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) |
260 | { |
261 | PetscErrorCode ierr; |
262 | PetscInt i; |
263 | |
264 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 264; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
265 | if (scall == MAT_INITIAL_MATRIX) { |
266 | ierr = PetscCalloc1(n+1,B)PetscMallocA(1,PETSC_TRUE,266,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(n+1)*sizeof(**(B)),(B));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),266,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
267 | } |
268 | |
269 | for (i=0; i<n; i++) { |
270 | ierr = MatCreateSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),270,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
271 | } |
272 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
273 | } |
274 | |
275 | /* -------------------------------------------------------*/ |
276 | /* Should check that shapes of vectors and matrices match */ |
277 | /* -------------------------------------------------------*/ |
278 | |
279 | PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) |
280 | { |
281 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
282 | PetscScalar *z,sum; |
283 | const PetscScalar *x; |
284 | const MatScalar *v; |
285 | PetscErrorCode ierr; |
286 | PetscInt mbs,i,n; |
287 | const PetscInt *idx,*ii,*ridx=NULL((void*)0); |
288 | PetscBool usecprow=a->compressedrow.use; |
289 | |
290 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 290; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
291 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),291,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
292 | ierr = VecGetArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),292,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
293 | |
294 | if (usecprow) { |
295 | mbs = a->compressedrow.nrows; |
296 | ii = a->compressedrow.i; |
297 | ridx = a->compressedrow.rindex; |
298 | ierr = PetscArrayzero(z,a->mbs)PetscMemzero(z,(a->mbs)*sizeof(*(z)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),298,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
299 | } else { |
300 | mbs = a->mbs; |
301 | ii = a->i; |
302 | } |
303 | |
304 | for (i=0; i<mbs; i++) { |
305 | n = ii[1] - ii[0]; |
306 | v = a->a + ii[0]; |
307 | idx = a->j + ii[0]; |
308 | ii++; |
309 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
310 | PetscPrefetchBlock(v+1*n,1*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+1*n),*_end = (const char *)((v+1*n)+(1*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
311 | sum = 0.0; |
312 | PetscSparseDensePlusDot(sum,x,v,idx,n){ PetscInt __i; for (__i=0; __i<n; __i++) sum += v[__i] * x [idx[__i]];}; |
313 | if (usecprow) { |
314 | z[ridx[i]] = sum; |
315 | } else { |
316 | z[i] = sum; |
317 | } |
318 | } |
319 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),319,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
320 | ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),320,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
321 | ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),321,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
322 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
323 | } |
324 | |
325 | PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) |
326 | { |
327 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
328 | PetscScalar *z = 0,sum1,sum2,*zarray; |
329 | const PetscScalar *x,*xb; |
330 | PetscScalar x1,x2; |
331 | const MatScalar *v; |
332 | PetscErrorCode ierr; |
333 | PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0); |
334 | PetscBool usecprow=a->compressedrow.use; |
335 | |
336 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 336; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
337 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),337,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
338 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),338,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
339 | |
340 | idx = a->j; |
341 | v = a->a; |
342 | if (usecprow) { |
343 | mbs = a->compressedrow.nrows; |
344 | ii = a->compressedrow.i; |
345 | ridx = a->compressedrow.rindex; |
346 | ierr = PetscArrayzero(zarray,2*a->mbs)PetscMemzero(zarray,(2*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),346,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
347 | } else { |
348 | mbs = a->mbs; |
349 | ii = a->i; |
350 | z = zarray; |
351 | } |
352 | |
353 | for (i=0; i<mbs; i++) { |
354 | n = ii[1] - ii[0]; ii++; |
355 | sum1 = 0.0; sum2 = 0.0; |
356 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
357 | PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+4*n),*_end = (const char *)((v+4*n)+(4*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
358 | for (j=0; j<n; j++) { |
359 | xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; |
360 | sum1 += v[0]*x1 + v[2]*x2; |
361 | sum2 += v[1]*x1 + v[3]*x2; |
362 | v += 4; |
363 | } |
364 | if (usecprow) z = zarray + 2*ridx[i]; |
365 | z[0] = sum1; z[1] = sum2; |
366 | if (!usecprow) z += 2; |
367 | } |
368 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),368,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
369 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),369,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
370 | ierr = PetscLogFlops(8.0*a->nz - 2.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),370,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
371 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
372 | } |
373 | |
374 | PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) |
375 | { |
376 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
377 | PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray; |
378 | const PetscScalar *x,*xb; |
379 | const MatScalar *v; |
380 | PetscErrorCode ierr; |
381 | PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0); |
382 | PetscBool usecprow=a->compressedrow.use; |
383 | |
384 | #if defined(PETSC_HAVE_PRAGMA_DISJOINT) |
385 | #pragma disjoint(*v,*z,*xb) |
386 | #endif |
387 | |
388 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 388; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
389 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),389,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
390 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),390,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
391 | |
392 | idx = a->j; |
393 | v = a->a; |
394 | if (usecprow) { |
395 | mbs = a->compressedrow.nrows; |
396 | ii = a->compressedrow.i; |
397 | ridx = a->compressedrow.rindex; |
398 | ierr = PetscArrayzero(zarray,3*a->mbs)PetscMemzero(zarray,(3*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),398,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
399 | } else { |
400 | mbs = a->mbs; |
401 | ii = a->i; |
402 | z = zarray; |
403 | } |
404 | |
405 | for (i=0; i<mbs; i++) { |
406 | n = ii[1] - ii[0]; ii++; |
407 | sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; |
408 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
409 | PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+9*n),*_end = (const char *)((v+9*n)+(9*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
410 | for (j=0; j<n; j++) { |
411 | xb = x + 3*(*idx++); |
412 | x1 = xb[0]; |
413 | x2 = xb[1]; |
414 | x3 = xb[2]; |
415 | |
416 | sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; |
417 | sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; |
418 | sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; |
419 | v += 9; |
420 | } |
421 | if (usecprow) z = zarray + 3*ridx[i]; |
422 | z[0] = sum1; z[1] = sum2; z[2] = sum3; |
423 | if (!usecprow) z += 3; |
424 | } |
425 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),425,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
426 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),426,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
427 | ierr = PetscLogFlops(18.0*a->nz - 3.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),427,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
428 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
429 | } |
430 | |
431 | PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) |
432 | { |
433 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
434 | PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; |
435 | const PetscScalar *x,*xb; |
436 | const MatScalar *v; |
437 | PetscErrorCode ierr; |
438 | PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0); |
439 | PetscBool usecprow=a->compressedrow.use; |
440 | |
441 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 441; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
442 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),442,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
443 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),443,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
444 | |
445 | idx = a->j; |
446 | v = a->a; |
447 | if (usecprow) { |
448 | mbs = a->compressedrow.nrows; |
449 | ii = a->compressedrow.i; |
450 | ridx = a->compressedrow.rindex; |
451 | ierr = PetscArrayzero(zarray,4*a->mbs)PetscMemzero(zarray,(4*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),451,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
452 | } else { |
453 | mbs = a->mbs; |
454 | ii = a->i; |
455 | z = zarray; |
456 | } |
457 | |
458 | for (i=0; i<mbs; i++) { |
459 | n = ii[1] - ii[0]; |
460 | ii++; |
461 | sum1 = 0.0; |
462 | sum2 = 0.0; |
463 | sum3 = 0.0; |
464 | sum4 = 0.0; |
465 | |
466 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
467 | PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+16*n),*_end = (const char *)((v+16*n)+(16*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
468 | for (j=0; j<n; j++) { |
469 | xb = x + 4*(*idx++); |
470 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; |
471 | sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; |
472 | sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; |
473 | sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; |
474 | sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; |
475 | v += 16; |
476 | } |
477 | if (usecprow) z = zarray + 4*ridx[i]; |
478 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; |
479 | if (!usecprow) z += 4; |
480 | } |
481 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),481,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
482 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),482,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
483 | ierr = PetscLogFlops(32.0*a->nz - 4.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),483,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
484 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
485 | } |
486 | |
487 | PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) |
488 | { |
489 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
490 | PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray; |
491 | const PetscScalar *xb,*x; |
492 | const MatScalar *v; |
493 | PetscErrorCode ierr; |
494 | const PetscInt *idx,*ii,*ridx=NULL((void*)0); |
495 | PetscInt mbs,i,j,n; |
496 | PetscBool usecprow=a->compressedrow.use; |
497 | |
498 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 498; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
499 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),499,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
500 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),500,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
501 | |
502 | idx = a->j; |
503 | v = a->a; |
504 | if (usecprow) { |
505 | mbs = a->compressedrow.nrows; |
506 | ii = a->compressedrow.i; |
507 | ridx = a->compressedrow.rindex; |
508 | ierr = PetscArrayzero(zarray,5*a->mbs)PetscMemzero(zarray,(5*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),508,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
509 | } else { |
510 | mbs = a->mbs; |
511 | ii = a->i; |
512 | z = zarray; |
513 | } |
514 | |
515 | for (i=0; i<mbs; i++) { |
516 | n = ii[1] - ii[0]; ii++; |
517 | sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; |
518 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
519 | PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+25*n),*_end = (const char *)((v+25*n)+(25*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
520 | for (j=0; j<n; j++) { |
521 | xb = x + 5*(*idx++); |
522 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; |
523 | sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; |
524 | sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; |
525 | sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; |
526 | sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; |
527 | sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; |
528 | v += 25; |
529 | } |
530 | if (usecprow) z = zarray + 5*ridx[i]; |
531 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; |
532 | if (!usecprow) z += 5; |
533 | } |
534 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),534,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
535 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),535,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
536 | ierr = PetscLogFlops(50.0*a->nz - 5.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),536,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
537 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
538 | } |
539 | |
540 | |
541 | |
542 | PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz) |
543 | { |
544 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
545 | PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6; |
546 | const PetscScalar *x,*xb; |
547 | PetscScalar x1,x2,x3,x4,x5,x6,*zarray; |
548 | const MatScalar *v; |
549 | PetscErrorCode ierr; |
550 | PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0); |
551 | PetscBool usecprow=a->compressedrow.use; |
552 | |
553 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 553; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
554 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),554,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
555 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),555,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
556 | |
557 | idx = a->j; |
558 | v = a->a; |
559 | if (usecprow) { |
560 | mbs = a->compressedrow.nrows; |
561 | ii = a->compressedrow.i; |
562 | ridx = a->compressedrow.rindex; |
563 | ierr = PetscArrayzero(zarray,6*a->mbs)PetscMemzero(zarray,(6*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),563,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
564 | } else { |
565 | mbs = a->mbs; |
566 | ii = a->i; |
567 | z = zarray; |
568 | } |
569 | |
570 | for (i=0; i<mbs; i++) { |
571 | n = ii[1] - ii[0]; |
572 | ii++; |
573 | sum1 = 0.0; |
574 | sum2 = 0.0; |
575 | sum3 = 0.0; |
576 | sum4 = 0.0; |
577 | sum5 = 0.0; |
578 | sum6 = 0.0; |
579 | |
580 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
581 | PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+36*n),*_end = (const char *)((v+36*n)+(36*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
582 | for (j=0; j<n; j++) { |
583 | xb = x + 6*(*idx++); |
584 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; |
585 | sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; |
586 | sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; |
587 | sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; |
588 | sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; |
589 | sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; |
590 | sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; |
591 | v += 36; |
592 | } |
593 | if (usecprow) z = zarray + 6*ridx[i]; |
594 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; |
595 | if (!usecprow) z += 6; |
596 | } |
597 | |
598 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),598,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
599 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),599,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
600 | ierr = PetscLogFlops(72.0*a->nz - 6.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),600,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
601 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
602 | } |
603 | |
604 | PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) |
605 | { |
606 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
607 | PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; |
608 | const PetscScalar *x,*xb; |
609 | PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray; |
610 | const MatScalar *v; |
611 | PetscErrorCode ierr; |
612 | PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0); |
613 | PetscBool usecprow=a->compressedrow.use; |
614 | |
615 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 615; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
616 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),616,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
617 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),617,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
618 | |
619 | idx = a->j; |
620 | v = a->a; |
621 | if (usecprow) { |
622 | mbs = a->compressedrow.nrows; |
623 | ii = a->compressedrow.i; |
624 | ridx = a->compressedrow.rindex; |
625 | ierr = PetscArrayzero(zarray,7*a->mbs)PetscMemzero(zarray,(7*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),625,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
626 | } else { |
627 | mbs = a->mbs; |
628 | ii = a->i; |
629 | z = zarray; |
630 | } |
631 | |
632 | for (i=0; i<mbs; i++) { |
633 | n = ii[1] - ii[0]; |
634 | ii++; |
635 | sum1 = 0.0; |
636 | sum2 = 0.0; |
637 | sum3 = 0.0; |
638 | sum4 = 0.0; |
639 | sum5 = 0.0; |
640 | sum6 = 0.0; |
641 | sum7 = 0.0; |
642 | |
643 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
644 | PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+49*n),*_end = (const char *)((v+49*n)+(49*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
645 | for (j=0; j<n; j++) { |
646 | xb = x + 7*(*idx++); |
647 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; |
648 | sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; |
649 | sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; |
650 | sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; |
651 | sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; |
652 | sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; |
653 | sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; |
654 | sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; |
655 | v += 49; |
656 | } |
657 | if (usecprow) z = zarray + 7*ridx[i]; |
658 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; |
659 | if (!usecprow) z += 7; |
660 | } |
661 | |
662 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),662,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
663 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),663,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
664 | ierr = PetscLogFlops(98.0*a->nz - 7.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),664,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
665 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
666 | } |
667 | |
668 | #if defined(PETSC_HAVE_IMMINTRIN_H1) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE1) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) |
669 | PetscErrorCode MatMult_SeqBAIJ_9_AVX2(Mat A,Vec xx,Vec zz) |
670 | { |
671 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
672 | PetscScalar *z = 0,*work,*workt,*zarray; |
673 | const PetscScalar *x,*xb; |
674 | const MatScalar *v; |
675 | PetscErrorCode ierr; |
676 | PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; |
677 | const PetscInt *idx,*ii,*ridx=NULL((void*)0); |
678 | PetscInt k; |
679 | PetscBool usecprow=a->compressedrow.use; |
680 | |
681 | __m256d a0,a1,a2,a3,a4,a5; |
682 | __m256d w0,w1,w2,w3; |
683 | __m256d z0,z1,z2; |
684 | __m256i mask1 = _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL<<63); |
685 | |
686 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 686; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
687 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),687,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
688 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),688,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
689 | |
690 | idx = a->j; |
691 | v = a->a; |
692 | if (usecprow) { |
693 | mbs = a->compressedrow.nrows; |
694 | ii = a->compressedrow.i; |
695 | ridx = a->compressedrow.rindex; |
696 | ierr = PetscArrayzero(zarray,bs*a->mbs)PetscMemzero(zarray,(bs*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),696,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
697 | } else { |
698 | mbs = a->mbs; |
699 | ii = a->i; |
700 | z = zarray; |
701 | } |
702 | |
703 | if (!a->mult_work) { |
704 | k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap-> n) : (A->rmap->n)); |
705 | ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,705,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),705,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
706 | } |
707 | |
708 | work = a->mult_work; |
709 | for (i=0; i<mbs; i++) { |
710 | n = ii[1] - ii[0]; ii++; |
711 | workt = work; |
712 | for (j=0; j<n; j++) { |
713 | xb = x + bs*(*idx++); |
714 | for (k=0; k<bs; k++) workt[k] = xb[k]; |
715 | workt += bs; |
716 | } |
717 | if (usecprow) z = zarray + bs*ridx[i]; |
718 | |
719 | z0 = _mm256_setzero_pd(); z1 = _mm256_setzero_pd(); z2 = _mm256_setzero_pd(); |
720 | |
721 | for (j=0; j<n; j++) { |
722 | // first column of a |
723 | w0 = _mm256_set1_pd(work[j*9 ]); |
724 | a0 = _mm256_loadu_pd(&v[j*81 ]); z0 = _mm256_fmadd_pd(a0,w0,z0); |
725 | a1 = _mm256_loadu_pd(&v[j*81+4]); z1 = _mm256_fmadd_pd(a1,w0,z1); |
726 | a2 = _mm256_loadu_pd(&v[j*81+8]); z2 = _mm256_fmadd_pd(a2,w0,z2); |
727 | |
728 | // second column of a |
729 | w1 = _mm256_set1_pd(work[j*9+ 1]); |
730 | a0 = _mm256_loadu_pd(&v[j*81+ 9]); z0 = _mm256_fmadd_pd(a0,w1,z0); |
731 | a1 = _mm256_loadu_pd(&v[j*81+13]); z1 = _mm256_fmadd_pd(a1,w1,z1); |
732 | a2 = _mm256_loadu_pd(&v[j*81+17]); z2 = _mm256_fmadd_pd(a2,w1,z2); |
733 | |
734 | // third column of a |
735 | w2 = _mm256_set1_pd(work[j*9 +2]); |
736 | a3 = _mm256_loadu_pd(&v[j*81+18]); z0 = _mm256_fmadd_pd(a3,w2,z0); |
737 | a4 = _mm256_loadu_pd(&v[j*81+22]); z1 = _mm256_fmadd_pd(a4,w2,z1); |
738 | a5 = _mm256_loadu_pd(&v[j*81+26]); z2 = _mm256_fmadd_pd(a5,w2,z2); |
739 | |
740 | // fourth column of a |
741 | w3 = _mm256_set1_pd(work[j*9+ 3]); |
742 | a0 = _mm256_loadu_pd(&v[j*81+27]); z0 = _mm256_fmadd_pd(a0,w3,z0); |
743 | a1 = _mm256_loadu_pd(&v[j*81+31]); z1 = _mm256_fmadd_pd(a1,w3,z1); |
744 | a2 = _mm256_loadu_pd(&v[j*81+35]); z2 = _mm256_fmadd_pd(a2,w3,z2); |
745 | |
746 | // fifth column of a |
747 | w0 = _mm256_set1_pd(work[j*9+ 4]); |
748 | a3 = _mm256_loadu_pd(&v[j*81+36]); z0 = _mm256_fmadd_pd(a3,w0,z0); |
749 | a4 = _mm256_loadu_pd(&v[j*81+40]); z1 = _mm256_fmadd_pd(a4,w0,z1); |
750 | a5 = _mm256_loadu_pd(&v[j*81+44]); z2 = _mm256_fmadd_pd(a5,w0,z2); |
751 | |
752 | // sixth column of a |
753 | w1 = _mm256_set1_pd(work[j*9+ 5]); |
754 | a0 = _mm256_loadu_pd(&v[j*81+45]); z0 = _mm256_fmadd_pd(a0,w1,z0); |
755 | a1 = _mm256_loadu_pd(&v[j*81+49]); z1 = _mm256_fmadd_pd(a1,w1,z1); |
756 | a2 = _mm256_loadu_pd(&v[j*81+53]); z2 = _mm256_fmadd_pd(a2,w1,z2); |
757 | |
758 | // seventh column of a |
759 | w2 = _mm256_set1_pd(work[j*9+ 6]); |
760 | a0 = _mm256_loadu_pd(&v[j*81+54]); z0 = _mm256_fmadd_pd(a0,w2,z0); |
761 | a1 = _mm256_loadu_pd(&v[j*81+58]); z1 = _mm256_fmadd_pd(a1,w2,z1); |
762 | a2 = _mm256_loadu_pd(&v[j*81+62]); z2 = _mm256_fmadd_pd(a2,w2,z2); |
763 | |
764 | // eigth column of a |
765 | w3 = _mm256_set1_pd(work[j*9+ 7]); |
766 | a3 = _mm256_loadu_pd(&v[j*81+63]); z0 = _mm256_fmadd_pd(a3,w3,z0); |
767 | a4 = _mm256_loadu_pd(&v[j*81+67]); z1 = _mm256_fmadd_pd(a4,w3,z1); |
768 | a5 = _mm256_loadu_pd(&v[j*81+71]); z2 = _mm256_fmadd_pd(a5,w3,z2); |
769 | |
770 | // ninth column of a |
771 | w0 = _mm256_set1_pd(work[j*9+ 8]); |
772 | a0 = _mm256_loadu_pd(&v[j*81+72]); z0 = _mm256_fmadd_pd(a0,w0,z0); |
773 | a1 = _mm256_loadu_pd(&v[j*81+76]); z1 = _mm256_fmadd_pd(a1,w0,z1); |
774 | a2 = _mm256_maskload_pd(&v[j*81+80],mask1); z2 = _mm256_fmadd_pd(a2,w0,z2); |
775 | } |
776 | |
777 | _mm256_storeu_pd(&z[ 0], z0); _mm256_storeu_pd(&z[ 4], z1); _mm256_maskstore_pd(&z[8], mask1, z2); |
778 | |
779 | v += n*bs2; |
780 | if (!usecprow) z += bs; |
781 | } |
782 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),782,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
783 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),783,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
784 | ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),784,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
785 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
786 | } |
787 | #endif |
788 | |
789 | PetscErrorCode MatMult_SeqBAIJ_11(Mat A,Vec xx,Vec zz) |
790 | { |
791 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
792 | PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11; |
793 | const PetscScalar *x,*xb; |
794 | PetscScalar *zarray,xv; |
795 | const MatScalar *v; |
796 | PetscErrorCode ierr; |
797 | const PetscInt *ii,*ij=a->j,*idx; |
798 | PetscInt mbs,i,j,k,n,*ridx=NULL((void*)0); |
799 | PetscBool usecprow=a->compressedrow.use; |
800 | |
801 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 801; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
802 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),802,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
803 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),803,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
804 | |
805 | v = a->a; |
806 | if (usecprow) { |
807 | mbs = a->compressedrow.nrows; |
808 | ii = a->compressedrow.i; |
809 | ridx = a->compressedrow.rindex; |
810 | ierr = PetscArrayzero(zarray,11*a->mbs)PetscMemzero(zarray,(11*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),810,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
811 | } else { |
812 | mbs = a->mbs; |
813 | ii = a->i; |
814 | z = zarray; |
815 | } |
816 | |
817 | for (i=0; i<mbs; i++) { |
818 | n = ii[i+1] - ii[i]; |
819 | idx = ij + ii[i]; |
820 | sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; |
821 | sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; |
822 | |
823 | for (j=0; j<n; j++) { |
824 | xb = x + 11*(idx[j]); |
825 | |
826 | for (k=0; k<11; k++) { |
827 | xv = xb[k]; |
828 | sum1 += v[0]*xv; |
829 | sum2 += v[1]*xv; |
830 | sum3 += v[2]*xv; |
831 | sum4 += v[3]*xv; |
832 | sum5 += v[4]*xv; |
833 | sum6 += v[5]*xv; |
834 | sum7 += v[6]*xv; |
835 | sum8 += v[7]*xv; |
836 | sum9 += v[8]*xv; |
837 | sum10 += v[9]*xv; |
838 | sum11 += v[10]*xv; |
839 | v += 11; |
840 | } |
841 | } |
842 | if (usecprow) z = zarray + 11*ridx[i]; |
843 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; |
844 | z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; |
845 | |
846 | if (!usecprow) z += 11; |
847 | } |
848 | |
849 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),849,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
850 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),850,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
851 | ierr = PetscLogFlops(242.0*a->nz - 11.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),851,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
852 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
853 | } |
854 | |
855 | /* MatMult_SeqBAIJ_15 version 1: Columns in the block are accessed one at a time */ |
856 | /* Default MatMult for block size 15 */ |
857 | |
858 | PetscErrorCode MatMult_SeqBAIJ_15_ver1(Mat A,Vec xx,Vec zz) |
859 | { |
860 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
861 | PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15; |
862 | const PetscScalar *x,*xb; |
863 | PetscScalar *zarray,xv; |
864 | const MatScalar *v; |
865 | PetscErrorCode ierr; |
866 | const PetscInt *ii,*ij=a->j,*idx; |
867 | PetscInt mbs,i,j,k,n,*ridx=NULL((void*)0); |
868 | PetscBool usecprow=a->compressedrow.use; |
869 | |
870 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 870; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
871 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),871,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
872 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),872,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
873 | |
874 | v = a->a; |
875 | if (usecprow) { |
876 | mbs = a->compressedrow.nrows; |
877 | ii = a->compressedrow.i; |
878 | ridx = a->compressedrow.rindex; |
879 | ierr = PetscArrayzero(zarray,15*a->mbs)PetscMemzero(zarray,(15*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),879,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
880 | } else { |
881 | mbs = a->mbs; |
882 | ii = a->i; |
883 | z = zarray; |
884 | } |
885 | |
886 | for (i=0; i<mbs; i++) { |
887 | n = ii[i+1] - ii[i]; |
888 | idx = ij + ii[i]; |
889 | sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; |
890 | sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0; |
891 | |
892 | for (j=0; j<n; j++) { |
893 | xb = x + 15*(idx[j]); |
894 | |
895 | for (k=0; k<15; k++) { |
896 | xv = xb[k]; |
897 | sum1 += v[0]*xv; |
898 | sum2 += v[1]*xv; |
899 | sum3 += v[2]*xv; |
900 | sum4 += v[3]*xv; |
901 | sum5 += v[4]*xv; |
902 | sum6 += v[5]*xv; |
903 | sum7 += v[6]*xv; |
904 | sum8 += v[7]*xv; |
905 | sum9 += v[8]*xv; |
906 | sum10 += v[9]*xv; |
907 | sum11 += v[10]*xv; |
908 | sum12 += v[11]*xv; |
909 | sum13 += v[12]*xv; |
910 | sum14 += v[13]*xv; |
911 | sum15 += v[14]*xv; |
912 | v += 15; |
913 | } |
914 | } |
915 | if (usecprow) z = zarray + 15*ridx[i]; |
916 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; |
917 | z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15; |
918 | |
919 | if (!usecprow) z += 15; |
920 | } |
921 | |
922 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),922,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
923 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),923,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
924 | ierr = PetscLogFlops(450.0*a->nz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),924,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
925 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
926 | } |
927 | |
928 | /* MatMult_SeqBAIJ_15_ver2 : Columns in the block are accessed in sets of 4,4,4,3 */ |
929 | PetscErrorCode MatMult_SeqBAIJ_15_ver2(Mat A,Vec xx,Vec zz) |
930 | { |
931 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
932 | PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15; |
933 | const PetscScalar *x,*xb; |
934 | PetscScalar x1,x2,x3,x4,*zarray; |
935 | const MatScalar *v; |
936 | PetscErrorCode ierr; |
937 | const PetscInt *ii,*ij=a->j,*idx; |
938 | PetscInt mbs,i,j,n,*ridx=NULL((void*)0); |
939 | PetscBool usecprow=a->compressedrow.use; |
940 | |
941 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 941; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
942 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),942,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
943 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),943,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
944 | |
945 | v = a->a; |
946 | if (usecprow) { |
947 | mbs = a->compressedrow.nrows; |
948 | ii = a->compressedrow.i; |
949 | ridx = a->compressedrow.rindex; |
950 | ierr = PetscArrayzero(zarray,15*a->mbs)PetscMemzero(zarray,(15*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),950,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
951 | } else { |
952 | mbs = a->mbs; |
953 | ii = a->i; |
954 | z = zarray; |
955 | } |
956 | |
957 | for (i=0; i<mbs; i++) { |
958 | n = ii[i+1] - ii[i]; |
959 | idx = ij + ii[i]; |
960 | sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; |
961 | sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0; |
962 | |
963 | for (j=0; j<n; j++) { |
964 | xb = x + 15*(idx[j]); |
965 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; |
966 | |
967 | sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4; |
968 | sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4; |
969 | sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4; |
970 | sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4; |
971 | sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4; |
972 | sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4; |
973 | sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4; |
974 | sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4; |
975 | sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4; |
976 | sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4; |
977 | sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4; |
978 | sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4; |
979 | sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4; |
980 | sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4; |
981 | sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4; |
982 | |
983 | v += 60; |
984 | |
985 | x1 = xb[4]; x2 = xb[5]; x3 = xb[6]; x4 = xb[7]; |
986 | |
987 | sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4; |
988 | sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4; |
989 | sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4; |
990 | sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4; |
991 | sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4; |
992 | sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4; |
993 | sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4; |
994 | sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4; |
995 | sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4; |
996 | sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4; |
997 | sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4; |
998 | sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4; |
999 | sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4; |
1000 | sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4; |
1001 | sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4; |
1002 | v += 60; |
1003 | |
1004 | x1 = xb[8]; x2 = xb[9]; x3 = xb[10]; x4 = xb[11]; |
1005 | sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4; |
1006 | sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4; |
1007 | sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4; |
1008 | sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4; |
1009 | sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4; |
1010 | sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4; |
1011 | sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4; |
1012 | sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4; |
1013 | sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4; |
1014 | sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4; |
1015 | sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4; |
1016 | sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4; |
1017 | sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4; |
1018 | sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4; |
1019 | sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4; |
1020 | v += 60; |
1021 | |
1022 | x1 = xb[12]; x2 = xb[13]; x3 = xb[14]; |
1023 | sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3; |
1024 | sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3; |
1025 | sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3; |
1026 | sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3; |
1027 | sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3; |
1028 | sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3; |
1029 | sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3; |
1030 | sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3; |
1031 | sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3; |
1032 | sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3; |
1033 | sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3; |
1034 | sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3; |
1035 | sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3; |
1036 | sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3; |
1037 | sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3; |
1038 | v += 45; |
1039 | } |
1040 | if (usecprow) z = zarray + 15*ridx[i]; |
1041 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; |
1042 | z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15; |
1043 | |
1044 | if (!usecprow) z += 15; |
1045 | } |
1046 | |
1047 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1047,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1048 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1048,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1049 | ierr = PetscLogFlops(450.0*a->nz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1049,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1050 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1051 | } |
1052 | |
1053 | /* MatMult_SeqBAIJ_15_ver3 : Columns in the block are accessed in sets of 8,7 */ |
1054 | PetscErrorCode MatMult_SeqBAIJ_15_ver3(Mat A,Vec xx,Vec zz) |
1055 | { |
1056 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1057 | PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15; |
1058 | const PetscScalar *x,*xb; |
1059 | PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,*zarray; |
1060 | const MatScalar *v; |
1061 | PetscErrorCode ierr; |
1062 | const PetscInt *ii,*ij=a->j,*idx; |
1063 | PetscInt mbs,i,j,n,*ridx=NULL((void*)0); |
1064 | PetscBool usecprow=a->compressedrow.use; |
1065 | |
1066 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1066; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1067 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1067,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1068 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1068,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1069 | |
1070 | v = a->a; |
1071 | if (usecprow) { |
1072 | mbs = a->compressedrow.nrows; |
1073 | ii = a->compressedrow.i; |
1074 | ridx = a->compressedrow.rindex; |
1075 | ierr = PetscArrayzero(zarray,15*a->mbs)PetscMemzero(zarray,(15*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1075,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1076 | } else { |
1077 | mbs = a->mbs; |
1078 | ii = a->i; |
1079 | z = zarray; |
1080 | } |
1081 | |
1082 | for (i=0; i<mbs; i++) { |
1083 | n = ii[i+1] - ii[i]; |
1084 | idx = ij + ii[i]; |
1085 | sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; |
1086 | sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0; |
1087 | |
1088 | for (j=0; j<n; j++) { |
1089 | xb = x + 15*(idx[j]); |
1090 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; |
1091 | x8 = xb[7]; |
1092 | |
1093 | sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7 + v[105]*x8; |
1094 | sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7 + v[106]*x8; |
1095 | sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7 + v[107]*x8; |
1096 | sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7 + v[108]*x8; |
1097 | sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7 + v[109]*x8; |
1098 | sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7 + v[110]*x8; |
1099 | sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7 + v[111]*x8; |
1100 | sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7 + v[112]*x8; |
1101 | sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7 + v[113]*x8; |
1102 | sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7 + v[114]*x8; |
1103 | sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7 + v[115]*x8; |
1104 | sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7 + v[116]*x8; |
1105 | sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7 + v[117]*x8; |
1106 | sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7 + v[118]*x8; |
1107 | sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7 + v[119]*x8; |
1108 | v += 120; |
1109 | |
1110 | x1 = xb[8]; x2 = xb[9]; x3 = xb[10]; x4 = xb[11]; x5 = xb[12]; x6 = xb[13]; x7 = xb[14]; |
1111 | |
1112 | sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7; |
1113 | sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7; |
1114 | sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7; |
1115 | sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7; |
1116 | sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7; |
1117 | sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7; |
1118 | sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7; |
1119 | sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7; |
1120 | sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7; |
1121 | sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7; |
1122 | sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7; |
1123 | sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7; |
1124 | sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7; |
1125 | sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7; |
1126 | sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7; |
1127 | v += 105; |
1128 | } |
1129 | if (usecprow) z = zarray + 15*ridx[i]; |
1130 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; |
1131 | z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15; |
1132 | |
1133 | if (!usecprow) z += 15; |
1134 | } |
1135 | |
1136 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1136,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1137 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1137,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1138 | ierr = PetscLogFlops(450.0*a->nz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1138,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1139 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1140 | } |
1141 | |
1142 | /* MatMult_SeqBAIJ_15_ver4 : All columns in the block are accessed at once */ |
1143 | |
1144 | PetscErrorCode MatMult_SeqBAIJ_15_ver4(Mat A,Vec xx,Vec zz) |
1145 | { |
1146 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1147 | PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15; |
1148 | const PetscScalar *x,*xb; |
1149 | PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,*zarray; |
1150 | const MatScalar *v; |
1151 | PetscErrorCode ierr; |
1152 | const PetscInt *ii,*ij=a->j,*idx; |
1153 | PetscInt mbs,i,j,n,*ridx=NULL((void*)0); |
1154 | PetscBool usecprow=a->compressedrow.use; |
1155 | |
1156 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1156; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1157 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1157,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1158 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1158,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1159 | |
1160 | v = a->a; |
1161 | if (usecprow) { |
1162 | mbs = a->compressedrow.nrows; |
1163 | ii = a->compressedrow.i; |
1164 | ridx = a->compressedrow.rindex; |
1165 | ierr = PetscArrayzero(zarray,15*a->mbs)PetscMemzero(zarray,(15*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1165,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1166 | } else { |
1167 | mbs = a->mbs; |
1168 | ii = a->i; |
1169 | z = zarray; |
1170 | } |
1171 | |
1172 | for (i=0; i<mbs; i++) { |
1173 | n = ii[i+1] - ii[i]; |
1174 | idx = ij + ii[i]; |
1175 | sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; |
1176 | sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0; |
1177 | |
1178 | for (j=0; j<n; j++) { |
1179 | xb = x + 15*(idx[j]); |
1180 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; |
1181 | x8 = xb[7]; x9 = xb[8]; x10 = xb[9]; x11 = xb[10]; x12 = xb[11]; x13 = xb[12]; x14 = xb[13];x15 = xb[14]; |
1182 | |
1183 | sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7 + v[105]*x8 + v[120]*x9 + v[135]*x10 + v[150]*x11 + v[165]*x12 + v[180]*x13 + v[195]*x14 + v[210]*x15; |
1184 | sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7 + v[106]*x8 + v[121]*x9 + v[136]*x10 + v[151]*x11 + v[166]*x12 + v[181]*x13 + v[196]*x14 + v[211]*x15; |
1185 | sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7 + v[107]*x8 + v[122]*x9 + v[137]*x10 + v[152]*x11 + v[167]*x12 + v[182]*x13 + v[197]*x14 + v[212]*x15; |
1186 | sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7 + v[108]*x8 + v[123]*x9 + v[138]*x10 + v[153]*x11 + v[168]*x12 + v[183]*x13 + v[198]*x14 + v[213]*x15; |
1187 | sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7 + v[109]*x8 + v[124]*x9 + v[139]*x10 + v[154]*x11 + v[169]*x12 + v[184]*x13 + v[199]*x14 + v[214]*x15; |
1188 | sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7 + v[110]*x8 + v[125]*x9 + v[140]*x10 + v[155]*x11 + v[170]*x12 + v[185]*x13 + v[200]*x14 + v[215]*x15; |
1189 | sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7 + v[111]*x8 + v[126]*x9 + v[141]*x10 + v[156]*x11 + v[171]*x12 + v[186]*x13 + v[201]*x14 + v[216]*x15; |
1190 | sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7 + v[112]*x8 + v[127]*x9 + v[142]*x10 + v[157]*x11 + v[172]*x12 + v[187]*x13 + v[202]*x14 + v[217]*x15; |
1191 | sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7 + v[113]*x8 + v[128]*x9 + v[143]*x10 + v[158]*x11 + v[173]*x12 + v[188]*x13 + v[203]*x14 + v[218]*x15; |
1192 | sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7 + v[114]*x8 + v[129]*x9 + v[144]*x10 + v[159]*x11 + v[174]*x12 + v[189]*x13 + v[204]*x14 + v[219]*x15; |
1193 | sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7 + v[115]*x8 + v[130]*x9 + v[145]*x10 + v[160]*x11 + v[175]*x12 + v[190]*x13 + v[205]*x14 + v[220]*x15; |
1194 | sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7 + v[116]*x8 + v[131]*x9 + v[146]*x10 + v[161]*x11 + v[176]*x12 + v[191]*x13 + v[206]*x14 + v[221]*x15; |
1195 | sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7 + v[117]*x8 + v[132]*x9 + v[147]*x10 + v[162]*x11 + v[177]*x12 + v[192]*x13 + v[207]*x14 + v[222]*x15; |
1196 | sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7 + v[118]*x8 + v[133]*x9 + v[148]*x10 + v[163]*x11 + v[178]*x12 + v[193]*x13 + v[208]*x14 + v[223]*x15; |
1197 | sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7 + v[119]*x8 + v[134]*x9 + v[149]*x10 + v[164]*x11 + v[179]*x12 + v[194]*x13 + v[209]*x14 + v[224]*x15; |
1198 | v += 225; |
1199 | } |
1200 | if (usecprow) z = zarray + 15*ridx[i]; |
1201 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; |
1202 | z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15; |
1203 | |
1204 | if (!usecprow) z += 15; |
1205 | } |
1206 | |
1207 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1207,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1208 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1208,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1209 | ierr = PetscLogFlops(450.0*a->nz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1209,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1210 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1211 | } |
1212 | |
1213 | |
1214 | /* |
1215 | This will not work with MatScalar == float because it calls the BLAS |
1216 | */ |
1217 | PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) |
1218 | { |
1219 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1220 | PetscScalar *z = 0,*work,*workt,*zarray; |
1221 | const PetscScalar *x,*xb; |
1222 | const MatScalar *v; |
1223 | PetscErrorCode ierr; |
1224 | PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; |
1225 | const PetscInt *idx,*ii,*ridx=NULL((void*)0); |
1226 | PetscInt ncols,k; |
1227 | PetscBool usecprow=a->compressedrow.use; |
1228 | |
1229 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1229; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1230 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1230,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1231 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1231,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1232 | |
1233 | idx = a->j; |
1234 | v = a->a; |
1235 | if (usecprow) { |
1236 | mbs = a->compressedrow.nrows; |
1237 | ii = a->compressedrow.i; |
1238 | ridx = a->compressedrow.rindex; |
1239 | ierr = PetscArrayzero(zarray,bs*a->mbs)PetscMemzero(zarray,(bs*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1239,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1240 | } else { |
1241 | mbs = a->mbs; |
1242 | ii = a->i; |
1243 | z = zarray; |
1244 | } |
1245 | |
1246 | if (!a->mult_work) { |
1247 | k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap-> n) : (A->rmap->n)); |
1248 | ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,1248,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1248,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1249 | } |
1250 | work = a->mult_work; |
1251 | for (i=0; i<mbs; i++) { |
1252 | n = ii[1] - ii[0]; ii++; |
1253 | ncols = n*bs; |
1254 | workt = work; |
1255 | for (j=0; j<n; j++) { |
1256 | xb = x + bs*(*idx++); |
1257 | for (k=0; k<bs; k++) workt[k] = xb[k]; |
1258 | workt += bs; |
1259 | } |
1260 | if (usecprow) z = zarray + bs*ridx[i]; |
1261 | PetscKernel_w_gets_Ar_times_v(bs,ncols,work,v,z){ PetscScalar _one = 1.0,_zero = 0.0; PetscBLASInt _ione = 1, _bbs,_bncols; PetscErrorCode _ierr; _ierr = PetscBLASIntCast( bs,&_bbs);do {if (__builtin_expect(!!(_ierr),0)) return PetscError (((MPI_Comm)0x44000001),1261,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_ierr,PETSC_ERROR_REPEAT," ");} while (0); _ierr = PetscBLASIntCast (ncols,&_bncols);do {if (__builtin_expect(!!(_ierr),0)) return PetscError(((MPI_Comm)0x44000001),1261,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_ierr,PETSC_ERROR_REPEAT," ");} while (0); do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack ->function[petscstack->currentsize] = "BLASgemv"; petscstack ->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1261; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dgemv_("N",&(_bbs),&_bncols,&_one,v,&(_bbs), work,&_ione,&_zero,z,&_ione); do { do {PetscErrorCode _7_ierr = PetscMallocValidate(1261,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),1261,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); }; |
1262 | /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */ |
1263 | v += n*bs2; |
1264 | if (!usecprow) z += bs; |
1265 | } |
1266 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1266,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1267 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1267,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1268 | ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1268,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1269 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1270 | } |
1271 | |
1272 | PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) |
1273 | { |
1274 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1275 | const PetscScalar *x; |
1276 | PetscScalar *y,*z,sum; |
1277 | const MatScalar *v; |
1278 | PetscErrorCode ierr; |
1279 | PetscInt mbs=a->mbs,i,n,*ridx=NULL((void*)0); |
1280 | const PetscInt *idx,*ii; |
1281 | PetscBool usecprow=a->compressedrow.use; |
1282 | |
1283 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1283; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1284 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1284,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1285 | ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1285,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1286 | |
1287 | idx = a->j; |
1288 | v = a->a; |
1289 | if (usecprow) { |
1290 | if (zz != yy) { |
1291 | ierr = PetscArraycpy(z,y,mbs)((sizeof(*(z)) != sizeof(*(y))) || PetscMemcpy(z,y,(mbs)*sizeof (*(z))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1291,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1292 | } |
1293 | mbs = a->compressedrow.nrows; |
1294 | ii = a->compressedrow.i; |
1295 | ridx = a->compressedrow.rindex; |
1296 | } else { |
1297 | ii = a->i; |
1298 | } |
1299 | |
1300 | for (i=0; i<mbs; i++) { |
1301 | n = ii[1] - ii[0]; |
1302 | ii++; |
1303 | if (!usecprow) { |
1304 | sum = y[i]; |
1305 | } else { |
1306 | sum = y[ridx[i]]; |
1307 | } |
1308 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
1309 | PetscPrefetchBlock(v+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+n),*_end = (const char* )((v+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
1310 | PetscSparseDensePlusDot(sum,x,v,idx,n){ PetscInt __i; for (__i=0; __i<n; __i++) sum += v[__i] * x [idx[__i]];}; |
1311 | v += n; |
1312 | idx += n; |
1313 | if (usecprow) { |
1314 | z[ridx[i]] = sum; |
1315 | } else { |
1316 | z[i] = sum; |
1317 | } |
1318 | } |
1319 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1319,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1320 | ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1320,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1321 | ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1321,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1322 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1323 | } |
1324 | |
1325 | PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) |
1326 | { |
1327 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1328 | PetscScalar *y = 0,*z = 0,sum1,sum2; |
1329 | const PetscScalar *x,*xb; |
1330 | PetscScalar x1,x2,*yarray,*zarray; |
1331 | const MatScalar *v; |
1332 | PetscErrorCode ierr; |
1333 | PetscInt mbs = a->mbs,i,n,j; |
1334 | const PetscInt *idx,*ii,*ridx = NULL((void*)0); |
1335 | PetscBool usecprow = a->compressedrow.use; |
1336 | |
1337 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1337; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1338 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1338,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1339 | ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1339,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1340 | |
1341 | idx = a->j; |
1342 | v = a->a; |
1343 | if (usecprow) { |
1344 | if (zz != yy) { |
1345 | ierr = PetscArraycpy(zarray,yarray,2*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray ,yarray,(2*mbs)*sizeof(*(zarray))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1345,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1346 | } |
1347 | mbs = a->compressedrow.nrows; |
1348 | ii = a->compressedrow.i; |
1349 | ridx = a->compressedrow.rindex; |
1350 | } else { |
1351 | ii = a->i; |
1352 | y = yarray; |
1353 | z = zarray; |
1354 | } |
1355 | |
1356 | for (i=0; i<mbs; i++) { |
1357 | n = ii[1] - ii[0]; ii++; |
1358 | if (usecprow) { |
1359 | z = zarray + 2*ridx[i]; |
1360 | y = yarray + 2*ridx[i]; |
1361 | } |
1362 | sum1 = y[0]; sum2 = y[1]; |
1363 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
1364 | PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+4*n),*_end = (const char *)((v+4*n)+(4*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
1365 | for (j=0; j<n; j++) { |
1366 | xb = x + 2*(*idx++); |
1367 | x1 = xb[0]; |
1368 | x2 = xb[1]; |
1369 | |
1370 | sum1 += v[0]*x1 + v[2]*x2; |
1371 | sum2 += v[1]*x1 + v[3]*x2; |
1372 | v += 4; |
1373 | } |
1374 | z[0] = sum1; z[1] = sum2; |
1375 | if (!usecprow) { |
1376 | z += 2; y += 2; |
1377 | } |
1378 | } |
1379 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1379,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1380 | ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1380,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1381 | ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1381,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1382 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1383 | } |
1384 | |
1385 | PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) |
1386 | { |
1387 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1388 | PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray; |
1389 | const PetscScalar *x,*xb; |
1390 | const MatScalar *v; |
1391 | PetscErrorCode ierr; |
1392 | PetscInt mbs = a->mbs,i,j,n; |
1393 | const PetscInt *idx,*ii,*ridx = NULL((void*)0); |
1394 | PetscBool usecprow = a->compressedrow.use; |
1395 | |
1396 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1396; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1397 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1397,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1398 | ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1398,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1399 | |
1400 | idx = a->j; |
1401 | v = a->a; |
1402 | if (usecprow) { |
1403 | if (zz != yy) { |
1404 | ierr = PetscArraycpy(zarray,yarray,3*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray ,yarray,(3*mbs)*sizeof(*(zarray))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1404,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1405 | } |
1406 | mbs = a->compressedrow.nrows; |
1407 | ii = a->compressedrow.i; |
1408 | ridx = a->compressedrow.rindex; |
1409 | } else { |
1410 | ii = a->i; |
1411 | y = yarray; |
1412 | z = zarray; |
1413 | } |
1414 | |
1415 | for (i=0; i<mbs; i++) { |
1416 | n = ii[1] - ii[0]; ii++; |
1417 | if (usecprow) { |
1418 | z = zarray + 3*ridx[i]; |
1419 | y = yarray + 3*ridx[i]; |
1420 | } |
1421 | sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; |
1422 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
1423 | PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+9*n),*_end = (const char *)((v+9*n)+(9*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
1424 | for (j=0; j<n; j++) { |
1425 | xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; |
1426 | sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; |
1427 | sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; |
1428 | sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; |
1429 | v += 9; |
1430 | } |
1431 | z[0] = sum1; z[1] = sum2; z[2] = sum3; |
1432 | if (!usecprow) { |
1433 | z += 3; y += 3; |
1434 | } |
1435 | } |
1436 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1436,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1437 | ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1437,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1438 | ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1438,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1439 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1440 | } |
1441 | |
1442 | PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) |
1443 | { |
1444 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1445 | PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray; |
1446 | const PetscScalar *x,*xb; |
1447 | const MatScalar *v; |
1448 | PetscErrorCode ierr; |
1449 | PetscInt mbs = a->mbs,i,j,n; |
1450 | const PetscInt *idx,*ii,*ridx=NULL((void*)0); |
1451 | PetscBool usecprow=a->compressedrow.use; |
1452 | |
1453 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1453; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1454 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1454,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1455 | ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1455,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1456 | |
1457 | idx = a->j; |
1458 | v = a->a; |
1459 | if (usecprow) { |
1460 | if (zz != yy) { |
1461 | ierr = PetscArraycpy(zarray,yarray,4*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray ,yarray,(4*mbs)*sizeof(*(zarray))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1461,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1462 | } |
1463 | mbs = a->compressedrow.nrows; |
1464 | ii = a->compressedrow.i; |
1465 | ridx = a->compressedrow.rindex; |
1466 | } else { |
1467 | ii = a->i; |
1468 | y = yarray; |
1469 | z = zarray; |
1470 | } |
1471 | |
1472 | for (i=0; i<mbs; i++) { |
1473 | n = ii[1] - ii[0]; ii++; |
1474 | if (usecprow) { |
1475 | z = zarray + 4*ridx[i]; |
1476 | y = yarray + 4*ridx[i]; |
1477 | } |
1478 | sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; |
1479 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
1480 | PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+16*n),*_end = (const char *)((v+16*n)+(16*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
1481 | for (j=0; j<n; j++) { |
1482 | xb = x + 4*(*idx++); |
1483 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; |
1484 | sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; |
1485 | sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; |
1486 | sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; |
1487 | sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; |
1488 | v += 16; |
1489 | } |
1490 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; |
1491 | if (!usecprow) { |
1492 | z += 4; y += 4; |
1493 | } |
1494 | } |
1495 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1495,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1496 | ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1496,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1497 | ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1497,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1498 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1499 | } |
1500 | |
1501 | PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) |
1502 | { |
1503 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1504 | PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; |
1505 | const PetscScalar *x,*xb; |
1506 | PetscScalar *yarray,*zarray; |
1507 | const MatScalar *v; |
1508 | PetscErrorCode ierr; |
1509 | PetscInt mbs = a->mbs,i,j,n; |
1510 | const PetscInt *idx,*ii,*ridx = NULL((void*)0); |
1511 | PetscBool usecprow=a->compressedrow.use; |
1512 | |
1513 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1513; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1514 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1514,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1515 | ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1515,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1516 | |
1517 | idx = a->j; |
1518 | v = a->a; |
1519 | if (usecprow) { |
1520 | if (zz != yy) { |
1521 | ierr = PetscArraycpy(zarray,yarray,5*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray ,yarray,(5*mbs)*sizeof(*(zarray))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1521,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1522 | } |
1523 | mbs = a->compressedrow.nrows; |
1524 | ii = a->compressedrow.i; |
1525 | ridx = a->compressedrow.rindex; |
1526 | } else { |
1527 | ii = a->i; |
1528 | y = yarray; |
1529 | z = zarray; |
1530 | } |
1531 | |
1532 | for (i=0; i<mbs; i++) { |
1533 | n = ii[1] - ii[0]; ii++; |
1534 | if (usecprow) { |
1535 | z = zarray + 5*ridx[i]; |
1536 | y = yarray + 5*ridx[i]; |
1537 | } |
1538 | sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; |
1539 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
1540 | PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+25*n),*_end = (const char *)((v+25*n)+(25*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
1541 | for (j=0; j<n; j++) { |
1542 | xb = x + 5*(*idx++); |
1543 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; |
1544 | sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; |
1545 | sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; |
1546 | sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; |
1547 | sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; |
1548 | sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; |
1549 | v += 25; |
1550 | } |
1551 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; |
1552 | if (!usecprow) { |
1553 | z += 5; y += 5; |
1554 | } |
1555 | } |
1556 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1556,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1557 | ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1557,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1558 | ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1558,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1559 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1560 | } |
1561 | PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) |
1562 | { |
1563 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1564 | PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6; |
1565 | const PetscScalar *x,*xb; |
1566 | PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray; |
1567 | const MatScalar *v; |
1568 | PetscErrorCode ierr; |
1569 | PetscInt mbs = a->mbs,i,j,n; |
1570 | const PetscInt *idx,*ii,*ridx=NULL((void*)0); |
1571 | PetscBool usecprow=a->compressedrow.use; |
1572 | |
1573 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1573; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1574 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1574,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1575 | ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1575,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1576 | |
1577 | idx = a->j; |
1578 | v = a->a; |
1579 | if (usecprow) { |
1580 | if (zz != yy) { |
1581 | ierr = PetscArraycpy(zarray,yarray,6*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray ,yarray,(6*mbs)*sizeof(*(zarray))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1581,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1582 | } |
1583 | mbs = a->compressedrow.nrows; |
1584 | ii = a->compressedrow.i; |
1585 | ridx = a->compressedrow.rindex; |
1586 | } else { |
1587 | ii = a->i; |
1588 | y = yarray; |
1589 | z = zarray; |
1590 | } |
1591 | |
1592 | for (i=0; i<mbs; i++) { |
1593 | n = ii[1] - ii[0]; ii++; |
1594 | if (usecprow) { |
1595 | z = zarray + 6*ridx[i]; |
1596 | y = yarray + 6*ridx[i]; |
1597 | } |
1598 | sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; |
1599 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
1600 | PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+36*n),*_end = (const char *)((v+36*n)+(36*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
1601 | for (j=0; j<n; j++) { |
1602 | xb = x + 6*(*idx++); |
1603 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; |
1604 | sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; |
1605 | sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; |
1606 | sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; |
1607 | sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; |
1608 | sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; |
1609 | sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; |
1610 | v += 36; |
1611 | } |
1612 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; |
1613 | if (!usecprow) { |
1614 | z += 6; y += 6; |
1615 | } |
1616 | } |
1617 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1617,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1618 | ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1618,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1619 | ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1619,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1620 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1621 | } |
1622 | |
1623 | PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) |
1624 | { |
1625 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1626 | PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7; |
1627 | const PetscScalar *x,*xb; |
1628 | PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray; |
1629 | const MatScalar *v; |
1630 | PetscErrorCode ierr; |
1631 | PetscInt mbs = a->mbs,i,j,n; |
1632 | const PetscInt *idx,*ii,*ridx = NULL((void*)0); |
1633 | PetscBool usecprow=a->compressedrow.use; |
1634 | |
1635 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1635; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1636 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1636,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1637 | ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1637,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1638 | |
1639 | idx = a->j; |
1640 | v = a->a; |
1641 | if (usecprow) { |
1642 | if (zz != yy) { |
1643 | ierr = PetscArraycpy(zarray,yarray,7*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray ,yarray,(7*mbs)*sizeof(*(zarray))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1643,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1644 | } |
1645 | mbs = a->compressedrow.nrows; |
1646 | ii = a->compressedrow.i; |
1647 | ridx = a->compressedrow.rindex; |
1648 | } else { |
1649 | ii = a->i; |
1650 | y = yarray; |
1651 | z = zarray; |
1652 | } |
1653 | |
1654 | for (i=0; i<mbs; i++) { |
1655 | n = ii[1] - ii[0]; ii++; |
1656 | if (usecprow) { |
1657 | z = zarray + 7*ridx[i]; |
1658 | y = yarray + 7*ridx[i]; |
1659 | } |
1660 | sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; |
1661 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
1662 | PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+49*n),*_end = (const char *)((v+49*n)+(49*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
1663 | for (j=0; j<n; j++) { |
1664 | xb = x + 7*(*idx++); |
1665 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; |
1666 | sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; |
1667 | sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; |
1668 | sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; |
1669 | sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; |
1670 | sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; |
1671 | sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; |
1672 | sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; |
1673 | v += 49; |
1674 | } |
1675 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; |
1676 | if (!usecprow) { |
1677 | z += 7; y += 7; |
1678 | } |
1679 | } |
1680 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1680,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1681 | ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1681,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1682 | ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1682,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1683 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1684 | } |
1685 | |
1686 | #if defined(PETSC_HAVE_IMMINTRIN_H1) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE1) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) |
1687 | PetscErrorCode MatMultAdd_SeqBAIJ_9_AVX2(Mat A,Vec xx,Vec yy,Vec zz) |
1688 | { |
1689 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1690 | PetscScalar *z = 0,*work,*workt,*zarray; |
1691 | const PetscScalar *x,*xb; |
1692 | const MatScalar *v; |
1693 | PetscErrorCode ierr; |
1694 | PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; |
1695 | PetscInt k; |
1696 | PetscBool usecprow=a->compressedrow.use; |
1697 | const PetscInt *idx,*ii,*ridx=NULL((void*)0); |
1698 | |
1699 | __m256d a0,a1,a2,a3,a4,a5; |
1700 | __m256d w0,w1,w2,w3; |
1701 | __m256d z0,z1,z2; |
1702 | __m256i mask1 = _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL<<63); |
1703 | |
1704 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1704; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1705 | ierr = VecCopy(yy,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1705,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1706 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1706,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1707 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1707,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1708 | |
1709 | idx = a->j; |
1710 | v = a->a; |
1711 | if (usecprow) { |
1712 | mbs = a->compressedrow.nrows; |
1713 | ii = a->compressedrow.i; |
1714 | ridx = a->compressedrow.rindex; |
1715 | } else { |
1716 | mbs = a->mbs; |
1717 | ii = a->i; |
1718 | z = zarray; |
1719 | } |
1720 | |
1721 | if (!a->mult_work) { |
1722 | k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap-> n) : (A->rmap->n)); |
1723 | ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,1723,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1723,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1724 | } |
1725 | |
1726 | work = a->mult_work; |
1727 | for (i=0; i<mbs; i++) { |
1728 | n = ii[1] - ii[0]; ii++; |
1729 | workt = work; |
1730 | for (j=0; j<n; j++) { |
1731 | xb = x + bs*(*idx++); |
1732 | for (k=0; k<bs; k++) workt[k] = xb[k]; |
1733 | workt += bs; |
1734 | } |
1735 | if (usecprow) z = zarray + bs*ridx[i]; |
1736 | |
1737 | z0 = _mm256_loadu_pd(&z[ 0]); z1 = _mm256_loadu_pd(&z[ 4]); z2 = _mm256_set1_pd(z[ 8]); |
1738 | |
1739 | for (j=0; j<n; j++) { |
1740 | // first column of a |
1741 | w0 = _mm256_set1_pd(work[j*9 ]); |
1742 | a0 = _mm256_loadu_pd(&v[j*81 ]); z0 = _mm256_fmadd_pd(a0,w0,z0); |
1743 | a1 = _mm256_loadu_pd(&v[j*81+4]); z1 = _mm256_fmadd_pd(a1,w0,z1); |
1744 | a2 = _mm256_loadu_pd(&v[j*81+8]); z2 = _mm256_fmadd_pd(a2,w0,z2); |
1745 | |
1746 | // second column of a |
1747 | w1 = _mm256_set1_pd(work[j*9+ 1]); |
1748 | a0 = _mm256_loadu_pd(&v[j*81+ 9]); z0 = _mm256_fmadd_pd(a0,w1,z0); |
1749 | a1 = _mm256_loadu_pd(&v[j*81+13]); z1 = _mm256_fmadd_pd(a1,w1,z1); |
1750 | a2 = _mm256_loadu_pd(&v[j*81+17]); z2 = _mm256_fmadd_pd(a2,w1,z2); |
1751 | |
1752 | // third column of a |
1753 | w2 = _mm256_set1_pd(work[j*9+ 2]); |
1754 | a3 = _mm256_loadu_pd(&v[j*81+18]); z0 = _mm256_fmadd_pd(a3,w2,z0); |
1755 | a4 = _mm256_loadu_pd(&v[j*81+22]); z1 = _mm256_fmadd_pd(a4,w2,z1); |
1756 | a5 = _mm256_loadu_pd(&v[j*81+26]); z2 = _mm256_fmadd_pd(a5,w2,z2); |
1757 | |
1758 | // fourth column of a |
1759 | w3 = _mm256_set1_pd(work[j*9+ 3]); |
1760 | a0 = _mm256_loadu_pd(&v[j*81+27]); z0 = _mm256_fmadd_pd(a0,w3,z0); |
1761 | a1 = _mm256_loadu_pd(&v[j*81+31]); z1 = _mm256_fmadd_pd(a1,w3,z1); |
1762 | a2 = _mm256_loadu_pd(&v[j*81+35]); z2 = _mm256_fmadd_pd(a2,w3,z2); |
1763 | |
1764 | // fifth column of a |
1765 | w0 = _mm256_set1_pd(work[j*9+ 4]); |
1766 | a3 = _mm256_loadu_pd(&v[j*81+36]); z0 = _mm256_fmadd_pd(a3,w0,z0); |
1767 | a4 = _mm256_loadu_pd(&v[j*81+40]); z1 = _mm256_fmadd_pd(a4,w0,z1); |
1768 | a5 = _mm256_loadu_pd(&v[j*81+44]); z2 = _mm256_fmadd_pd(a5,w0,z2); |
1769 | |
1770 | // sixth column of a |
1771 | w1 = _mm256_set1_pd(work[j*9+ 5]); |
1772 | a0 = _mm256_loadu_pd(&v[j*81+45]); z0 = _mm256_fmadd_pd(a0,w1,z0); |
1773 | a1 = _mm256_loadu_pd(&v[j*81+49]); z1 = _mm256_fmadd_pd(a1,w1,z1); |
1774 | a2 = _mm256_loadu_pd(&v[j*81+53]); z2 = _mm256_fmadd_pd(a2,w1,z2); |
1775 | |
1776 | // seventh column of a |
1777 | w2 = _mm256_set1_pd(work[j*9+ 6]); |
1778 | a0 = _mm256_loadu_pd(&v[j*81+54]); z0 = _mm256_fmadd_pd(a0,w2,z0); |
1779 | a1 = _mm256_loadu_pd(&v[j*81+58]); z1 = _mm256_fmadd_pd(a1,w2,z1); |
1780 | a2 = _mm256_loadu_pd(&v[j*81+62]); z2 = _mm256_fmadd_pd(a2,w2,z2); |
1781 | |
1782 | // eigth column of a |
1783 | w3 = _mm256_set1_pd(work[j*9+ 7]); |
1784 | a3 = _mm256_loadu_pd(&v[j*81+63]); z0 = _mm256_fmadd_pd(a3,w3,z0); |
1785 | a4 = _mm256_loadu_pd(&v[j*81+67]); z1 = _mm256_fmadd_pd(a4,w3,z1); |
1786 | a5 = _mm256_loadu_pd(&v[j*81+71]); z2 = _mm256_fmadd_pd(a5,w3,z2); |
1787 | |
1788 | // ninth column of a |
1789 | w0 = _mm256_set1_pd(work[j*9+ 8]); |
1790 | a0 = _mm256_loadu_pd(&v[j*81+72]); z0 = _mm256_fmadd_pd(a0,w0,z0); |
1791 | a1 = _mm256_loadu_pd(&v[j*81+76]); z1 = _mm256_fmadd_pd(a1,w0,z1); |
1792 | a2 = _mm256_maskload_pd(&v[j*81+80],mask1); z2 = _mm256_fmadd_pd(a2,w0,z2); |
1793 | } |
1794 | |
1795 | _mm256_storeu_pd(&z[ 0], z0); _mm256_storeu_pd(&z[ 4], z1); _mm256_maskstore_pd(&z[8], mask1, z2); |
1796 | |
1797 | v += n*bs2; |
1798 | if (!usecprow) z += bs; |
1799 | } |
1800 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1800,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1801 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1801,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1802 | ierr = PetscLogFlops(162.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1802,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1803 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1804 | } |
1805 | #endif |
1806 | |
1807 | PetscErrorCode MatMultAdd_SeqBAIJ_11(Mat A,Vec xx,Vec yy,Vec zz) |
1808 | { |
1809 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1810 | PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11; |
1811 | const PetscScalar *x,*xb; |
1812 | PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,*yarray,*zarray; |
1813 | const MatScalar *v; |
1814 | PetscErrorCode ierr; |
1815 | PetscInt mbs = a->mbs,i,j,n; |
1816 | const PetscInt *idx,*ii,*ridx = NULL((void*)0); |
1817 | PetscBool usecprow=a->compressedrow.use; |
1818 | |
1819 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1819; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1820 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1820,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1821 | ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1821,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1822 | |
1823 | idx = a->j; |
1824 | v = a->a; |
1825 | if (usecprow) { |
1826 | if (zz != yy) { |
1827 | ierr = PetscArraycpy(zarray,yarray,7*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray ,yarray,(7*mbs)*sizeof(*(zarray))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1827,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1828 | } |
1829 | mbs = a->compressedrow.nrows; |
1830 | ii = a->compressedrow.i; |
1831 | ridx = a->compressedrow.rindex; |
1832 | } else { |
1833 | ii = a->i; |
1834 | y = yarray; |
1835 | z = zarray; |
1836 | } |
1837 | |
1838 | for (i=0; i<mbs; i++) { |
1839 | n = ii[1] - ii[0]; ii++; |
1840 | if (usecprow) { |
1841 | z = zarray + 11*ridx[i]; |
1842 | y = yarray + 11*ridx[i]; |
1843 | } |
1844 | sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; |
1845 | sum8 = y[7]; sum9 = y[8]; sum10 = y[9]; sum11 = y[10]; |
1846 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
1847 | PetscPrefetchBlock(v+121*n,121*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+121*n),*_end = (const char *)((v+121*n)+(121*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
1848 | for (j=0; j<n; j++) { |
1849 | xb = x + 11*(*idx++); |
1850 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];x8 = xb[7]; x9 = xb[8]; x10 = xb[9]; x11 = xb[10]; |
1851 | sum1 += v[ 0]*x1 + v[ 11]*x2 + v[2*11]*x3 + v[3*11]*x4 + v[4*11]*x5 + v[5*11]*x6 + v[6*11]*x7+ v[7*11]*x8 + v[8*11]*x9 + v[9*11]*x10 + v[10*11]*x11; |
1852 | sum2 += v[1+0]*x1 + v[1+11]*x2 + v[1+2*11]*x3 + v[1+3*11]*x4 + v[1+4*11]*x5 + v[1+5*11]*x6 + v[1+6*11]*x7+ v[1+7*11]*x8 + v[1+8*11]*x9 + v[1+9*11]*x10 + v[1+10*11]*x11; |
1853 | sum3 += v[2+0]*x1 + v[2+11]*x2 + v[2+2*11]*x3 + v[2+3*11]*x4 + v[2+4*11]*x5 + v[2+5*11]*x6 + v[2+6*11]*x7+ v[2+7*11]*x8 + v[2+8*11]*x9 + v[2+9*11]*x10 + v[2+10*11]*x11; |
1854 | sum4 += v[3+0]*x1 + v[3+11]*x2 + v[3+2*11]*x3 + v[3+3*11]*x4 + v[3+4*11]*x5 + v[3+5*11]*x6 + v[3+6*11]*x7+ v[3+7*11]*x8 + v[3+8*11]*x9 + v[3+9*11]*x10 + v[3+10*11]*x11; |
1855 | sum5 += v[4+0]*x1 + v[4+11]*x2 + v[4+2*11]*x3 + v[4+3*11]*x4 + v[4+4*11]*x5 + v[4+5*11]*x6 + v[4+6*11]*x7+ v[4+7*11]*x8 + v[4+8*11]*x9 + v[4+9*11]*x10 + v[4+10*11]*x11; |
1856 | sum6 += v[5+0]*x1 + v[5+11]*x2 + v[5+2*11]*x3 + v[5+3*11]*x4 + v[5+4*11]*x5 + v[5+5*11]*x6 + v[5+6*11]*x7+ v[5+7*11]*x8 + v[5+8*11]*x9 + v[5+9*11]*x10 + v[5+10*11]*x11; |
1857 | sum7 += v[6+0]*x1 + v[6+11]*x2 + v[6+2*11]*x3 + v[6+3*11]*x4 + v[6+4*11]*x5 + v[6+5*11]*x6 + v[6+6*11]*x7+ v[6+7*11]*x8 + v[6+8*11]*x9 + v[6+9*11]*x10 + v[6+10*11]*x11; |
1858 | sum8 += v[7+0]*x1 + v[7+11]*x2 + v[7+2*11]*x3 + v[7+3*11]*x4 + v[7+4*11]*x5 + v[7+5*11]*x6 + v[7+6*11]*x7+ v[7+7*11]*x8 + v[7+8*11]*x9 + v[7+9*11]*x10 + v[7+10*11]*x11; |
1859 | sum9 += v[8+0]*x1 + v[8+11]*x2 + v[8+2*11]*x3 + v[8+3*11]*x4 + v[8+4*11]*x5 + v[8+5*11]*x6 + v[8+6*11]*x7+ v[8+7*11]*x8 + v[8+8*11]*x9 + v[8+9*11]*x10 + v[8+10*11]*x11; |
1860 | sum10 += v[9+0]*x1 + v[9+11]*x2 + v[9+2*11]*x3 + v[9+3*11]*x4 + v[9+4*11]*x5 + v[9+5*11]*x6 + v[9+6*11]*x7+ v[9+7*11]*x8 + v[9+8*11]*x9 + v[9+9*11]*x10 + v[9+10*11]*x11; |
1861 | sum11 += v[10+0]*x1 + v[10+11]*x2 + v[10+2*11]*x3 + v[10+3*11]*x4 + v[10+4*11]*x5 + v[10+5*11]*x6 + v[10+6*11]*x7+ v[10+7*11]*x8 + v[10+8*11]*x9 + v[10+9*11]*x10 + v[10+10*11]*x11; |
1862 | v += 121; |
1863 | } |
1864 | z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; |
1865 | z[7] = sum6; z[8] = sum7; z[9] = sum8; z[10] = sum9; z[11] = sum10; |
1866 | if (!usecprow) { |
1867 | z += 11; y += 11; |
1868 | } |
1869 | } |
1870 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1870,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1871 | ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1871,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1872 | ierr = PetscLogFlops(242.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1872,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1873 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1874 | } |
1875 | |
1876 | PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) |
1877 | { |
1878 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1879 | PetscScalar *z = 0,*work,*workt,*zarray; |
1880 | const PetscScalar *x,*xb; |
1881 | const MatScalar *v; |
1882 | PetscErrorCode ierr; |
1883 | PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; |
1884 | PetscInt ncols,k; |
1885 | const PetscInt *ridx = NULL((void*)0),*idx,*ii; |
1886 | PetscBool usecprow = a->compressedrow.use; |
1887 | |
1888 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1888; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1889 | ierr = VecCopy(yy,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1889,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1890 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1890,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1891 | ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1891,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1892 | |
1893 | idx = a->j; |
1894 | v = a->a; |
1895 | if (usecprow) { |
1896 | mbs = a->compressedrow.nrows; |
1897 | ii = a->compressedrow.i; |
1898 | ridx = a->compressedrow.rindex; |
1899 | } else { |
1900 | mbs = a->mbs; |
1901 | ii = a->i; |
1902 | z = zarray; |
1903 | } |
1904 | |
1905 | if (!a->mult_work) { |
1906 | k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap-> n) : (A->rmap->n)); |
1907 | ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,1907,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1907,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1908 | } |
1909 | work = a->mult_work; |
1910 | for (i=0; i<mbs; i++) { |
1911 | n = ii[1] - ii[0]; ii++; |
1912 | ncols = n*bs; |
1913 | workt = work; |
1914 | for (j=0; j<n; j++) { |
1915 | xb = x + bs*(*idx++); |
1916 | for (k=0; k<bs; k++) workt[k] = xb[k]; |
1917 | workt += bs; |
1918 | } |
1919 | if (usecprow) z = zarray + bs*ridx[i]; |
1920 | PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z){ PetscScalar _one = 1.0; PetscBLASInt _ione = 1,_bbs,_bncols ; PetscErrorCode _ierr; _ierr = PetscBLASIntCast(bs,&_bbs );do {if (__builtin_expect(!!(_ierr),0)) return PetscError((( MPI_Comm)0x44000001),1920,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_ierr,PETSC_ERROR_REPEAT," ");} while (0); _ierr = PetscBLASIntCast (ncols,&_bncols);do {if (__builtin_expect(!!(_ierr),0)) return PetscError(((MPI_Comm)0x44000001),1920,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_ierr,PETSC_ERROR_REPEAT," ");} while (0); do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack ->function[petscstack->currentsize] = "BLASgemv"; petscstack ->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1920; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dgemv_("N",&(_bbs),&(_bncols),&_one,v,&(_bbs ),work,&_ione,&_one,z,&_ione); do { do {PetscErrorCode _7_ierr = PetscMallocValidate(1920,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),1920,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); }; |
1921 | /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */ |
1922 | v += n*bs2; |
1923 | if (!usecprow) z += bs; |
1924 | } |
1925 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1925,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1926 | ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1926,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1927 | ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1927,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1928 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1929 | } |
1930 | |
1931 | PetscErrorCode MatMultHermitianTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) |
1932 | { |
1933 | PetscScalar zero = 0.0; |
1934 | PetscErrorCode ierr; |
1935 | |
1936 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1936; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1937 | ierr = VecSet(zz,zero);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1937,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1938 | ierr = MatMultHermitianTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1938,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1939 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1940 | } |
1941 | |
1942 | PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz) |
1943 | { |
1944 | PetscScalar zero = 0.0; |
1945 | PetscErrorCode ierr; |
1946 | |
1947 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1947; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1948 | ierr = VecSet(zz,zero);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1948,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1949 | ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1949,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1950 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
1951 | } |
1952 | |
1953 | PetscErrorCode MatMultHermitianTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) |
1954 | { |
1955 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
1956 | PetscScalar *z,x1,x2,x3,x4,x5; |
1957 | const PetscScalar *x,*xb = NULL((void*)0); |
1958 | const MatScalar *v; |
1959 | PetscErrorCode ierr; |
1960 | PetscInt mbs,i,rval,bs=A->rmap->bs,j,n; |
1961 | const PetscInt *idx,*ii,*ib,*ridx = NULL((void*)0); |
1962 | Mat_CompressedRow cprow = a->compressedrow; |
1963 | PetscBool usecprow = cprow.use; |
1964 | |
1965 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 1965; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
1966 | if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1966,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); } |
1967 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1967,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1968 | ierr = VecGetArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1968,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
1969 | |
1970 | idx = a->j; |
1971 | v = a->a; |
1972 | if (usecprow) { |
1973 | mbs = cprow.nrows; |
1974 | ii = cprow.i; |
1975 | ridx = cprow.rindex; |
1976 | } else { |
1977 | mbs=a->mbs; |
1978 | ii = a->i; |
1979 | xb = x; |
1980 | } |
1981 | |
1982 | switch (bs) { |
1983 | case 1: |
1984 | for (i=0; i<mbs; i++) { |
1985 | if (usecprow) xb = x + ridx[i]; |
1986 | x1 = xb[0]; |
1987 | ib = idx + ii[0]; |
1988 | n = ii[1] - ii[0]; ii++; |
1989 | for (j=0; j<n; j++) { |
1990 | rval = ib[j]; |
1991 | z[rval] += PetscConj(*v)(*v) * x1; |
1992 | v++; |
1993 | } |
1994 | if (!usecprow) xb++; |
1995 | } |
1996 | break; |
1997 | case 2: |
1998 | for (i=0; i<mbs; i++) { |
1999 | if (usecprow) xb = x + 2*ridx[i]; |
2000 | x1 = xb[0]; x2 = xb[1]; |
2001 | ib = idx + ii[0]; |
2002 | n = ii[1] - ii[0]; ii++; |
2003 | for (j=0; j<n; j++) { |
2004 | rval = ib[j]*2; |
2005 | z[rval++] += PetscConj(v[0])(v[0])*x1 + PetscConj(v[1])(v[1])*x2; |
2006 | z[rval++] += PetscConj(v[2])(v[2])*x1 + PetscConj(v[3])(v[3])*x2; |
2007 | v += 4; |
2008 | } |
2009 | if (!usecprow) xb += 2; |
2010 | } |
2011 | break; |
2012 | case 3: |
2013 | for (i=0; i<mbs; i++) { |
2014 | if (usecprow) xb = x + 3*ridx[i]; |
2015 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; |
2016 | ib = idx + ii[0]; |
2017 | n = ii[1] - ii[0]; ii++; |
2018 | for (j=0; j<n; j++) { |
2019 | rval = ib[j]*3; |
2020 | z[rval++] += PetscConj(v[0])(v[0])*x1 + PetscConj(v[1])(v[1])*x2 + PetscConj(v[2])(v[2])*x3; |
2021 | z[rval++] += PetscConj(v[3])(v[3])*x1 + PetscConj(v[4])(v[4])*x2 + PetscConj(v[5])(v[5])*x3; |
2022 | z[rval++] += PetscConj(v[6])(v[6])*x1 + PetscConj(v[7])(v[7])*x2 + PetscConj(v[8])(v[8])*x3; |
2023 | v += 9; |
2024 | } |
2025 | if (!usecprow) xb += 3; |
2026 | } |
2027 | break; |
2028 | case 4: |
2029 | for (i=0; i<mbs; i++) { |
2030 | if (usecprow) xb = x + 4*ridx[i]; |
2031 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; |
2032 | ib = idx + ii[0]; |
2033 | n = ii[1] - ii[0]; ii++; |
2034 | for (j=0; j<n; j++) { |
2035 | rval = ib[j]*4; |
2036 | z[rval++] += PetscConj(v[0])(v[0])*x1 + PetscConj(v[1])(v[1])*x2 + PetscConj(v[2])(v[2])*x3 + PetscConj(v[3])(v[3])*x4; |
2037 | z[rval++] += PetscConj(v[4])(v[4])*x1 + PetscConj(v[5])(v[5])*x2 + PetscConj(v[6])(v[6])*x3 + PetscConj(v[7])(v[7])*x4; |
2038 | z[rval++] += PetscConj(v[8])(v[8])*x1 + PetscConj(v[9])(v[9])*x2 + PetscConj(v[10])(v[10])*x3 + PetscConj(v[11])(v[11])*x4; |
2039 | z[rval++] += PetscConj(v[12])(v[12])*x1 + PetscConj(v[13])(v[13])*x2 + PetscConj(v[14])(v[14])*x3 + PetscConj(v[15])(v[15])*x4; |
2040 | v += 16; |
2041 | } |
2042 | if (!usecprow) xb += 4; |
2043 | } |
2044 | break; |
2045 | case 5: |
2046 | for (i=0; i<mbs; i++) { |
2047 | if (usecprow) xb = x + 5*ridx[i]; |
2048 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; |
2049 | x4 = xb[3]; x5 = xb[4]; |
2050 | ib = idx + ii[0]; |
2051 | n = ii[1] - ii[0]; ii++; |
2052 | for (j=0; j<n; j++) { |
2053 | rval = ib[j]*5; |
2054 | z[rval++] += PetscConj(v[0])(v[0])*x1 + PetscConj(v[1])(v[1])*x2 + PetscConj(v[2])(v[2])*x3 + PetscConj(v[3])(v[3])*x4 + PetscConj(v[4])(v[4])*x5; |
2055 | z[rval++] += PetscConj(v[5])(v[5])*x1 + PetscConj(v[6])(v[6])*x2 + PetscConj(v[7])(v[7])*x3 + PetscConj(v[8])(v[8])*x4 + PetscConj(v[9])(v[9])*x5; |
2056 | z[rval++] += PetscConj(v[10])(v[10])*x1 + PetscConj(v[11])(v[11])*x2 + PetscConj(v[12])(v[12])*x3 + PetscConj(v[13])(v[13])*x4 + PetscConj(v[14])(v[14])*x5; |
2057 | z[rval++] += PetscConj(v[15])(v[15])*x1 + PetscConj(v[16])(v[16])*x2 + PetscConj(v[17])(v[17])*x3 + PetscConj(v[18])(v[18])*x4 + PetscConj(v[19])(v[19])*x5; |
2058 | z[rval++] += PetscConj(v[20])(v[20])*x1 + PetscConj(v[21])(v[21])*x2 + PetscConj(v[22])(v[22])*x3 + PetscConj(v[23])(v[23])*x4 + PetscConj(v[24])(v[24])*x5; |
2059 | v += 25; |
2060 | } |
2061 | if (!usecprow) xb += 5; |
2062 | } |
2063 | break; |
2064 | default: /* block sizes larger than 5 by 5 are handled by BLAS */ |
2065 | SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size larger than 5 is not supported yet")return PetscError(((MPI_Comm)0x44000001),2065,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,56,PETSC_ERROR_INITIAL,"block size larger than 5 is not supported yet" ); |
2066 | #if 0 |
2067 | { |
2068 | PetscInt ncols,k,bs2=a->bs2; |
2069 | PetscScalar *work,*workt,zb; |
2070 | const PetscScalar *xtmp; |
2071 | if (!a->mult_work) { |
2072 | k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap-> n) : (A->rmap->n)); |
2073 | ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,2073,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2073,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2074 | } |
2075 | work = a->mult_work; |
2076 | xtmp = x; |
2077 | for (i=0; i<mbs; i++) { |
2078 | n = ii[1] - ii[0]; ii++; |
2079 | ncols = n*bs; |
2080 | ierr = PetscArrayzero(work,ncols)PetscMemzero(work,(ncols)*sizeof(*(work)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2080,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2081 | if (usecprow) xtmp = x + bs*ridx[i]; |
2082 | PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work){ PetscScalar _one = 1.0; PetscBLASInt _ione = 1,_bbs,_bncols ; PetscErrorCode _ierr; _ierr = PetscBLASIntCast(bs,&_bbs );do {if (__builtin_expect(!!(_ierr),0)) return PetscError((( MPI_Comm)0x44000001),2082,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_ierr,PETSC_ERROR_REPEAT," ");} while (0); _ierr = PetscBLASIntCast (ncols,&_bncols);do {if (__builtin_expect(!!(_ierr),0)) return PetscError(((MPI_Comm)0x44000001),2082,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_ierr,PETSC_ERROR_REPEAT," ");} while (0); do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack ->function[petscstack->currentsize] = "BLASgemv"; petscstack ->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2082; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dgemv_("T",&_bbs,&_bncols,&_one,v,&_bbs,xtmp ,&_ione,&_one,work,&_ione); do { do {PetscErrorCode _7_ierr = PetscMallocValidate(2082,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),2082,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); }; |
2083 | /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ |
2084 | v += n*bs2; |
2085 | if (!usecprow) xtmp += bs; |
2086 | workt = work; |
2087 | for (j=0; j<n; j++) { |
2088 | zb = z + bs*(*idx++); |
2089 | for (k=0; k<bs; k++) zb[k] += workt[k] ; |
2090 | workt += bs; |
2091 | } |
2092 | } |
2093 | } |
2094 | #endif |
2095 | } |
2096 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2096,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2097 | ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2097,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2098 | ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2098,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2099 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2100 | } |
2101 | |
2102 | PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) |
2103 | { |
2104 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
2105 | PetscScalar *zb,*z,x1,x2,x3,x4,x5; |
2106 | const PetscScalar *x,*xb = 0; |
2107 | const MatScalar *v; |
2108 | PetscErrorCode ierr; |
2109 | PetscInt mbs,i,rval,bs=A->rmap->bs,j,n,bs2=a->bs2; |
2110 | const PetscInt *idx,*ii,*ib,*ridx = NULL((void*)0); |
2111 | Mat_CompressedRow cprow = a->compressedrow; |
2112 | PetscBool usecprow=cprow.use; |
2113 | |
2114 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2114; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2115 | if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2115,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); } |
2116 | ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2116,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2117 | ierr = VecGetArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2117,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2118 | |
2119 | idx = a->j; |
2120 | v = a->a; |
2121 | if (usecprow) { |
2122 | mbs = cprow.nrows; |
2123 | ii = cprow.i; |
2124 | ridx = cprow.rindex; |
2125 | } else { |
2126 | mbs=a->mbs; |
2127 | ii = a->i; |
2128 | xb = x; |
2129 | } |
2130 | |
2131 | switch (bs) { |
2132 | case 1: |
2133 | for (i=0; i<mbs; i++) { |
2134 | if (usecprow) xb = x + ridx[i]; |
2135 | x1 = xb[0]; |
2136 | ib = idx + ii[0]; |
2137 | n = ii[1] - ii[0]; ii++; |
2138 | for (j=0; j<n; j++) { |
2139 | rval = ib[j]; |
2140 | z[rval] += *v * x1; |
2141 | v++; |
2142 | } |
2143 | if (!usecprow) xb++; |
2144 | } |
2145 | break; |
2146 | case 2: |
2147 | for (i=0; i<mbs; i++) { |
2148 | if (usecprow) xb = x + 2*ridx[i]; |
2149 | x1 = xb[0]; x2 = xb[1]; |
2150 | ib = idx + ii[0]; |
2151 | n = ii[1] - ii[0]; ii++; |
2152 | for (j=0; j<n; j++) { |
2153 | rval = ib[j]*2; |
2154 | z[rval++] += v[0]*x1 + v[1]*x2; |
2155 | z[rval++] += v[2]*x1 + v[3]*x2; |
2156 | v += 4; |
2157 | } |
2158 | if (!usecprow) xb += 2; |
2159 | } |
2160 | break; |
2161 | case 3: |
2162 | for (i=0; i<mbs; i++) { |
2163 | if (usecprow) xb = x + 3*ridx[i]; |
2164 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; |
2165 | ib = idx + ii[0]; |
2166 | n = ii[1] - ii[0]; ii++; |
2167 | for (j=0; j<n; j++) { |
2168 | rval = ib[j]*3; |
2169 | z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; |
2170 | z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; |
2171 | z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; |
2172 | v += 9; |
2173 | } |
2174 | if (!usecprow) xb += 3; |
2175 | } |
2176 | break; |
2177 | case 4: |
2178 | for (i=0; i<mbs; i++) { |
2179 | if (usecprow) xb = x + 4*ridx[i]; |
2180 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; |
2181 | ib = idx + ii[0]; |
2182 | n = ii[1] - ii[0]; ii++; |
2183 | for (j=0; j<n; j++) { |
2184 | rval = ib[j]*4; |
2185 | z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; |
2186 | z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; |
2187 | z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; |
2188 | z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; |
2189 | v += 16; |
2190 | } |
2191 | if (!usecprow) xb += 4; |
2192 | } |
2193 | break; |
2194 | case 5: |
2195 | for (i=0; i<mbs; i++) { |
2196 | if (usecprow) xb = x + 5*ridx[i]; |
2197 | x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; |
2198 | x4 = xb[3]; x5 = xb[4]; |
2199 | ib = idx + ii[0]; |
2200 | n = ii[1] - ii[0]; ii++; |
2201 | for (j=0; j<n; j++) { |
2202 | rval = ib[j]*5; |
2203 | z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; |
2204 | z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; |
2205 | z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; |
2206 | z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; |
2207 | z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; |
2208 | v += 25; |
2209 | } |
2210 | if (!usecprow) xb += 5; |
2211 | } |
2212 | break; |
2213 | default: { /* block sizes larger then 5 by 5 are handled by BLAS */ |
2214 | PetscInt ncols,k; |
2215 | PetscScalar *work,*workt; |
2216 | const PetscScalar *xtmp; |
2217 | if (!a->mult_work) { |
2218 | k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap-> n) : (A->rmap->n)); |
2219 | ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,2219,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2219,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2220 | } |
2221 | work = a->mult_work; |
2222 | xtmp = x; |
2223 | for (i=0; i<mbs; i++) { |
2224 | n = ii[1] - ii[0]; ii++; |
2225 | ncols = n*bs; |
2226 | ierr = PetscArrayzero(work,ncols)PetscMemzero(work,(ncols)*sizeof(*(work)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2226,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2227 | if (usecprow) xtmp = x + bs*ridx[i]; |
2228 | PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work){ PetscScalar _one = 1.0; PetscBLASInt _ione = 1,_bbs,_bncols ; PetscErrorCode _ierr; _ierr = PetscBLASIntCast(bs,&_bbs );do {if (__builtin_expect(!!(_ierr),0)) return PetscError((( MPI_Comm)0x44000001),2228,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_ierr,PETSC_ERROR_REPEAT," ");} while (0); _ierr = PetscBLASIntCast (ncols,&_bncols);do {if (__builtin_expect(!!(_ierr),0)) return PetscError(((MPI_Comm)0x44000001),2228,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_ierr,PETSC_ERROR_REPEAT," ");} while (0); do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack ->function[petscstack->currentsize] = "BLASgemv"; petscstack ->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2228; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dgemv_("T",&_bbs,&_bncols,&_one,v,&_bbs,xtmp ,&_ione,&_one,work,&_ione); do { do {PetscErrorCode _7_ierr = PetscMallocValidate(2228,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),2228,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); }; |
2229 | /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */ |
2230 | v += n*bs2; |
2231 | if (!usecprow) xtmp += bs; |
2232 | workt = work; |
2233 | for (j=0; j<n; j++) { |
2234 | zb = z + bs*(*idx++); |
2235 | for (k=0; k<bs; k++) zb[k] += workt[k]; |
2236 | workt += bs; |
2237 | } |
2238 | } |
2239 | } |
2240 | } |
2241 | ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2241,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2242 | ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2242,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2243 | ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2243,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2244 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2245 | } |
2246 | |
2247 | PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha) |
2248 | { |
2249 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; |
2250 | PetscInt totalnz = a->bs2*a->nz; |
2251 | PetscScalar oalpha = alpha; |
2252 | PetscErrorCode ierr; |
2253 | PetscBLASInt one = 1,tnz; |
2254 | |
2255 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2255; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2256 | ierr = PetscBLASIntCast(totalnz,&tnz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2256,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2257 | PetscStackCallBLAS("BLASscal",BLASscal_(&tnz,&oalpha,a->a,&one))do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = "BLASscal"; petscstack->file[petscstack->currentsize ] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2257; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dscal_(&tnz,&oalpha,a->a,&one); do { do {PetscErrorCode _7_ierr = PetscMallocValidate(2257,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),2257,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); |
2258 | ierr = PetscLogFlops(totalnz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2258,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2259 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2260 | } |
2261 | |
2262 | PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm) |
2263 | { |
2264 | PetscErrorCode ierr; |
2265 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
2266 | MatScalar *v = a->a; |
2267 | PetscReal sum = 0.0; |
2268 | PetscInt i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1; |
2269 | |
2270 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2270; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2271 | if (type == NORM_FROBENIUS) { |
2272 | #if defined(PETSC_USE_REAL___FP16) |
2273 | PetscBLASInt one = 1,cnt = bs2*nz; |
2274 | *norm = BLASnrm2_dnrm2_(&cnt,v,&one); |
2275 | #else |
2276 | for (i=0; i<bs2*nz; i++) { |
2277 | sum += PetscRealPart(PetscConj(*v)*(*v))((*v)*(*v)); v++; |
2278 | } |
2279 | #endif |
2280 | *norm = PetscSqrtReal(sum)sqrt(sum); |
2281 | ierr = PetscLogFlops(2*bs2*nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2281,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2282 | } else if (type == NORM_1) { /* maximum column sum */ |
2283 | PetscReal *tmp; |
2284 | PetscInt *bcol = a->j; |
2285 | ierr = PetscCalloc1(A->cmap->n+1,&tmp)PetscMallocA(1,PETSC_TRUE,2285,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,(size_t)(A->cmap->n+1)*sizeof(**(&tmp)),(&tmp) );CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2285,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2286 | for (i=0; i<nz; i++) { |
2287 | for (j=0; j<bs; j++) { |
2288 | k1 = bs*(*bcol) + j; /* column index */ |
2289 | for (k=0; k<bs; k++) { |
2290 | tmp[k1] += PetscAbsScalar(*v); v++; |
2291 | } |
2292 | } |
2293 | bcol++; |
2294 | } |
2295 | *norm = 0.0; |
2296 | for (j=0; j<A->cmap->n; j++) { |
2297 | if (tmp[j] > *norm) *norm = tmp[j]; |
2298 | } |
2299 | ierr = PetscFree(tmp)((*PetscTrFree)((void*)(tmp),2299,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ) || ((tmp) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2299,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2300 | ierr = PetscLogFlops(PetscMax(bs2*nz-1,0)(((bs2*nz-1)<(0)) ? (0) : (bs2*nz-1)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2300,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2301 | } else if (type == NORM_INFINITY) { /* maximum row sum */ |
2302 | *norm = 0.0; |
2303 | for (k=0; k<bs; k++) { |
2304 | for (j=0; j<a->mbs; j++) { |
2305 | v = a->a + bs2*a->i[j] + k; |
2306 | sum = 0.0; |
2307 | for (i=0; i<a->i[j+1]-a->i[j]; i++) { |
2308 | for (k1=0; k1<bs; k1++) { |
2309 | sum += PetscAbsScalar(*v); |
2310 | v += bs; |
2311 | } |
2312 | } |
2313 | if (sum > *norm) *norm = sum; |
2314 | } |
2315 | } |
2316 | ierr = PetscLogFlops(PetscMax(bs2*nz-1,0)(((bs2*nz-1)<(0)) ? (0) : (bs2*nz-1)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2316,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2317 | } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet")return PetscError(((MPI_Comm)0x44000001),2317,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,56,PETSC_ERROR_INITIAL,"No support for this norm yet"); |
2318 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2319 | } |
2320 | |
2321 | |
2322 | PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscBool * flg) |
2323 | { |
2324 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; |
2325 | PetscErrorCode ierr; |
2326 | |
2327 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2327; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2328 | /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ |
2329 | if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) { |
2330 | *flg = PETSC_FALSE; |
2331 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2332 | } |
2333 | |
2334 | /* if the a->i are the same */ |
2335 | ierr = PetscArraycmp(a->i,b->i,a->mbs+1,flg)((sizeof(*(a->i)) != sizeof(*(b->i))) || PetscMemcmp(a-> i,b->i,(a->mbs+1)*sizeof(*(a->i)),flg));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2335,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2336 | if (!*flg) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2337 | |
2338 | /* if a->j are the same */ |
2339 | ierr = PetscArraycmp(a->j,b->j,a->nz,flg)((sizeof(*(a->j)) != sizeof(*(b->j))) || PetscMemcmp(a-> j,b->j,(a->nz)*sizeof(*(a->j)),flg));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2339,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2340 | if (!*flg) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2341 | |
2342 | /* if a->a are the same */ |
2343 | ierr = PetscArraycmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs),flg)((sizeof(*(a->a)) != sizeof(*(b->a))) || PetscMemcmp(a-> a,b->a,((a->nz)*(A->rmap->bs)*(B->rmap->bs) )*sizeof(*(a->a)),flg));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2343,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2344 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2345 | |
2346 | } |
2347 | |
2348 | PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v) |
2349 | { |
2350 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
2351 | PetscErrorCode ierr; |
2352 | PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2; |
2353 | PetscScalar *x,zero = 0.0; |
2354 | MatScalar *aa,*aa_j; |
2355 | |
2356 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2356; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2357 | if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix")return PetscError(((MPI_Comm)0x44000001),2357,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,73,PETSC_ERROR_INITIAL,"Not for factored matrix"); |
2358 | bs = A->rmap->bs; |
2359 | aa = a->a; |
2360 | ai = a->i; |
2361 | aj = a->j; |
2362 | ambs = a->mbs; |
2363 | bs2 = a->bs2; |
2364 | |
2365 | ierr = VecSet(v,zero);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2365,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2366 | ierr = VecGetArray(v,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2366,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2367 | ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2367,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2368 | if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector")return PetscError(((MPI_Comm)0x44000001),2368,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Nonconforming matrix and vector"); |
2369 | for (i=0; i<ambs; i++) { |
2370 | for (j=ai[i]; j<ai[i+1]; j++) { |
2371 | if (aj[j] == i) { |
2372 | row = i*bs; |
2373 | aa_j = aa+j*bs2; |
2374 | for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; |
2375 | break; |
2376 | } |
2377 | } |
2378 | } |
2379 | ierr = VecRestoreArray(v,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2379,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2380 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2381 | } |
2382 | |
2383 | PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) |
2384 | { |
2385 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
2386 | const PetscScalar *l,*r,*li,*ri; |
2387 | PetscScalar x; |
2388 | MatScalar *aa, *v; |
2389 | PetscErrorCode ierr; |
2390 | PetscInt i,j,k,lm,rn,M,m,n,mbs,tmp,bs,bs2,iai; |
2391 | const PetscInt *ai,*aj; |
2392 | |
2393 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2393; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2394 | ai = a->i; |
2395 | aj = a->j; |
2396 | aa = a->a; |
2397 | m = A->rmap->n; |
2398 | n = A->cmap->n; |
2399 | bs = A->rmap->bs; |
2400 | mbs = a->mbs; |
2401 | bs2 = a->bs2; |
2402 | if (ll) { |
2403 | ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2403,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2404 | ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2404,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2405 | if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length")return PetscError(((MPI_Comm)0x44000001),2405,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Left scaling vector wrong length"); |
2406 | for (i=0; i<mbs; i++) { /* for each block row */ |
2407 | M = ai[i+1] - ai[i]; |
2408 | li = l + i*bs; |
2409 | v = aa + bs2*ai[i]; |
2410 | for (j=0; j<M; j++) { /* for each block */ |
2411 | for (k=0; k<bs2; k++) { |
2412 | (*v++) *= li[k%bs]; |
2413 | } |
2414 | } |
2415 | } |
2416 | ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2416,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2417 | ierr = PetscLogFlops(a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2417,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2418 | } |
2419 | |
2420 | if (rr) { |
2421 | ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2421,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2422 | ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2422,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2423 | if (rn != n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length")return PetscError(((MPI_Comm)0x44000001),2423,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Right scaling vector wrong length"); |
2424 | for (i=0; i<mbs; i++) { /* for each block row */ |
2425 | iai = ai[i]; |
2426 | M = ai[i+1] - iai; |
2427 | v = aa + bs2*iai; |
2428 | for (j=0; j<M; j++) { /* for each block */ |
2429 | ri = r + bs*aj[iai+j]; |
2430 | for (k=0; k<bs; k++) { |
2431 | x = ri[k]; |
2432 | for (tmp=0; tmp<bs; tmp++) v[tmp] *= x; |
2433 | v += bs; |
2434 | } |
2435 | } |
2436 | } |
2437 | ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2437,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2438 | ierr = PetscLogFlops(a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2438,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2439 | } |
2440 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2441 | } |
2442 | |
2443 | |
2444 | PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) |
2445 | { |
2446 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
2447 | |
2448 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2448; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2449 | info->block_size = a->bs2; |
2450 | info->nz_allocated = a->bs2*a->maxnz; |
2451 | info->nz_used = a->bs2*a->nz; |
2452 | info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); |
2453 | info->assemblies = A->num_ass; |
2454 | info->mallocs = A->info.mallocs; |
2455 | info->memory = ((PetscObject)A)->mem; |
2456 | if (A->factortype) { |
2457 | info->fill_ratio_given = A->info.fill_ratio_given; |
2458 | info->fill_ratio_needed = A->info.fill_ratio_needed; |
2459 | info->factor_mallocs = A->info.factor_mallocs; |
2460 | } else { |
2461 | info->fill_ratio_given = 0; |
2462 | info->fill_ratio_needed = 0; |
2463 | info->factor_mallocs = 0; |
2464 | } |
2465 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2466 | } |
2467 | |
2468 | PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A) |
2469 | { |
2470 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
2471 | PetscErrorCode ierr; |
2472 | |
2473 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2473; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2474 | ierr = PetscArrayzero(a->a,a->bs2*a->i[a->mbs])PetscMemzero(a->a,(a->bs2*a->i[a->mbs])*sizeof(*( a->a)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2474,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2475 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2476 | } |
2477 | |
2478 | PETSC_INTERNextern __attribute__((visibility ("hidden"))) PetscErrorCode MatMatMult_SeqBAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) |
2479 | { |
2480 | PetscErrorCode ierr; |
2481 | |
2482 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2482; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2483 | if (scall == MAT_INITIAL_MATRIX) { |
2484 | ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLB)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2484,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2485 | ierr = MatMatMultSymbolic_SeqBAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2485,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2486 | ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLE)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2486,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2487 | } |
2488 | ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLB)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2488,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2489 | ierr = MatMatMultNumeric_SeqBAIJ_SeqDense(A,B,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2489,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2490 | ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLE)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2490,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2491 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2492 | } |
2493 | |
2494 | PetscErrorCode MatMatMultSymbolic_SeqBAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) |
2495 | { |
2496 | PetscErrorCode ierr; |
2497 | |
2498 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2498; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2499 | ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2499,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2500 | |
2501 | (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqBAIJ_SeqDense; |
2502 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2503 | } |
2504 | |
2505 | PetscErrorCode MatMatMult_SeqBAIJ_2_Private(Mat A,PetscScalar* b,PetscInt bm,PetscScalar* c,PetscInt cm,PetscInt cn) |
2506 | { |
2507 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
2508 | PetscScalar *z = 0,sum1,sum2,*zarray; |
2509 | const PetscScalar *x,*xb; |
2510 | PetscScalar x1,x2; |
2511 | const MatScalar *v,*vv; |
2512 | PetscErrorCode ierr; |
2513 | PetscInt mbs,i,*idx,*ii,j,*jj,n,k,*ridx=NULL((void*)0); |
2514 | PetscBool usecprow=a->compressedrow.use; |
2515 | |
2516 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2516; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2517 | |
2518 | idx = a->j; |
2519 | v = a->a; |
2520 | if (usecprow) { |
2521 | mbs = a->compressedrow.nrows; |
2522 | ii = a->compressedrow.i; |
2523 | ridx = a->compressedrow.rindex; |
2524 | } else { |
2525 | mbs = a->mbs; |
2526 | ii = a->i; |
2527 | z = c; |
2528 | } |
2529 | |
2530 | for (i=0; i<mbs; i++) { |
2531 | n = ii[1] - ii[0]; ii++; |
2532 | PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char *)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Indices for the next row (assumes same size as this one) */ |
2533 | PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+4*n),*_end = (const char *)((v+4*n)+(4*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch ((void *)((const char*)(_p)), ((((0))) >> 2) & 1, ( ((0))) & 0x3)); } while (0); /* Entries for the next row */ |
2534 | if (usecprow) z = c + 2*ridx[i]; |
2535 | jj = idx; |
2536 | vv = v; |
2537 | for (k=0; k<cn; k++) { |
2538 | idx = jj; |
2539 | v = vv; |
2540 | sum1 = 0.0; sum2 = 0.0; |
2541 | for (j=0; j<n; j++) { |
2542 | xb = b + 2*(*idx++); x1 = xb[0+k*cm]; x2 = xb[1+k*cm]; |
2543 | sum1 += v[0]*x1 + v[2]*x2; |
2544 | sum2 += v[1]*x1 + v[3]*x2; |
2545 | v += 4; |
2546 | } |
2547 | z[0+k*cm] = sum1; z[1+k*cm] = sum2; |
2548 | } |
2549 | if (!usecprow) z += 2; |
2550 | } |
2551 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2552 | } |
2553 | |
2554 | PetscErrorCode MatMatMultNumeric_SeqBAIJ_SeqDense(Mat A,Mat B,Mat C) |
2555 | { |
2556 | Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; |
2557 | Mat_SeqDense *bd = (Mat_SeqDense*)B->data; |
2558 | Mat_SeqDense *cd = (Mat_SeqDense*)B->data; |
2559 | PetscErrorCode ierr; |
2560 | PetscInt cm=cd->lda,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n; |
2561 | PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2; |
2562 | PetscBLASInt bbs,bcn,bbm,bcm; |
2563 | PetscScalar *z = 0; |
2564 | PetscScalar *c,*b; |
2565 | const MatScalar *v; |
2566 | const PetscInt *idx,*ii,*ridx=NULL((void*)0); |
2567 | PetscScalar _DZero=0.0,_DOne=1.0; |
2568 | PetscBool usecprow=a->compressedrow.use; |
2569 | |
2570 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2570; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); |
2571 | if (!cm || !cn) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2572 | if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n)return PetscError(((MPI_Comm)0x44000001),2572,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Number columns in A %D not equal rows in B %D\n" ,A->cmap->n,B->rmap->n); |
2573 | if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n)return PetscError(((MPI_Comm)0x44000001),2573,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Number rows in C %D not equal rows in A %D\n" ,C->rmap->n,A->rmap->n); |
2574 | if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n)return PetscError(((MPI_Comm)0x44000001),2574,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,60,PETSC_ERROR_INITIAL,"Number columns in B %D not equal columns in C %D\n" ,B->cmap->n,C->cmap->n); |
2575 | b = bd->v; |
2576 | if (a->nonzerorowcnt != A->rmap->n) { |
2577 | ierr = MatZeroEntries(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2577,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2578 | } |
2579 | ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2579,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2580 | if (bs == 2) { |
2581 | ierr = MatMatMult_SeqBAIJ_2_Private(A, b, bm, c, cm, cn); |
Value stored to 'ierr' is never read | |
2582 | } |
2583 | else { |
2584 | ierr = PetscBLASIntCast(bs,&bbs);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2584,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2585 | ierr = PetscBLASIntCast(cn,&bcn);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2585,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2586 | ierr = PetscBLASIntCast(bm,&bbm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2586,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2587 | ierr = PetscBLASIntCast(cm,&bcm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2587,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2588 | idx = a->j; |
2589 | v = a->a; |
2590 | if (usecprow) { |
2591 | mbs = a->compressedrow.nrows; |
2592 | ii = a->compressedrow.i; |
2593 | ridx = a->compressedrow.rindex; |
2594 | } else { |
2595 | mbs = a->mbs; |
2596 | ii = a->i; |
2597 | z = c; |
2598 | } |
2599 | for (i=0; i<mbs; i++) { |
2600 | n = ii[1] - ii[0]; ii++; |
2601 | if (usecprow) z = c + bs*ridx[i]; |
2602 | if (n) { |
2603 | PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&bbs,&bcn,&bbs,&_DOne,v,&bbs,b+bs*(*idx++),&bbm,&_DZero,z,&bcm))do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = "BLASgemm"; petscstack->file[petscstack->currentsize ] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2603; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dgemm_("N","N",&bbs,&bcn,&bbs,&_DOne,v,& bbs,b+bs*(*idx++),&bbm,&_DZero,z,&bcm); do { do { PetscErrorCode _7_ierr = PetscMallocValidate(2603,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),2603,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); |
2604 | v += bs2; |
2605 | } |
2606 | for (j=1; j<n; j++) { |
2607 | PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&bbs,&bcn,&bbs,&_DOne,v,&bbs,b+bs*(*idx++),&bbm,&_DOne,z,&bcm))do { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = "BLASgemm"; petscstack->file[petscstack->currentsize ] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ; petscstack->line[petscstack->currentsize] = 2607; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_TRUE || petscstack->hotdepth); } ; } while (0); dgemm_("N","N",&bbs,&bcn,&bbs,&_DOne,v,& bbs,b+bs*(*idx++),&bbm,&_DOne,z,&bcm); do { do {PetscErrorCode _7_ierr = PetscMallocValidate(2607,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" );do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),2607,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do { ; if (petscstack && petscstack->currentsize > 0 ) { petscstack->currentsize--; petscstack->function[petscstack ->currentsize] = 0; petscstack->file[petscstack->currentsize ] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack ->petscroutine[petscstack->currentsize] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack-> hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ; } while (0); } while (0); } while(0); |
2608 | v += bs2; |
2609 | } |
2610 | if (!usecprow) z += bs; |
2611 | } |
2612 | } |
2613 | ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2613,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2614 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2614,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2615 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2615,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2616 | ierr = PetscLogFlops((2.0*a->nz*bs2 - bs*a->nonzerorowcnt)*cn);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2616,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); |
2617 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); |
2618 | } |