Bug Summary

File:mat/impls/baij/seq/baij2.c
Warning:line 2581, column 5
Value stored to 'ierr' is never read

Annotated Source Code

[?] Use j/k keys for keyboard navigation

1
2#include <../src/mat/impls/baij/seq/baij.h>
3#include <../src/mat/impls/dense/seq/dense.h>
4#include <petsc/private/kernels/blockinvert.h>
5#include <petscbt.h>
6#include <petscblaslapack.h>
7
8#if defined(PETSC_HAVE_IMMINTRIN_H1) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE1) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
9#include <immintrin.h>
10#endif
11
12PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
13{
14 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
15 PetscErrorCode ierr;
16 PetscInt row,i,j,k,l,m,n,*nidx,isz,val,ival;
17 const PetscInt *idx;
18 PetscInt start,end,*ai,*aj,bs,*nidx2;
19 PetscBT table;
20
21 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 21; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
22 m = a->mbs;
23 ai = a->i;
24 aj = a->j;
25 bs = A->rmap->bs;
26
27 if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified")return PetscError(((MPI_Comm)0x44000001),27,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,63,PETSC_ERROR_INITIAL,"Negative overlap specified")
;
28
29 ierr = PetscBTCreate(m,&table);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),29,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
30 ierr = PetscMalloc1(m+1,&nidx)PetscMallocA(1,PETSC_FALSE,30,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(m+1)*sizeof(**(&nidx)),(&nidx))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),30,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
31 ierr = PetscMalloc1(A->rmap->N+1,&nidx2)PetscMallocA(1,PETSC_FALSE,31,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(A->rmap->N+1)*sizeof(**(&nidx2)),(&nidx2
))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),31,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
32
33 for (i=0; i<is_max; i++) {
34 /* Initialise the two local arrays */
35 isz = 0;
36 ierr = PetscBTMemzero(m,table);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),36,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
37
38 /* Extract the indices, assume there can be duplicate entries */
39 ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),39,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
40 ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),40,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
41
42 /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
43 for (j=0; j<n; ++j) {
44 ival = idx[j]/bs; /* convert the indices into block indices */
45 if (ival>=m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim")return PetscError(((MPI_Comm)0x44000001),45,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,63,PETSC_ERROR_INITIAL,"index greater than mat-dim")
;
46 if (!PetscBTLookupSet(table,ival)) nidx[isz++] = ival;
47 }
48 ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),48,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
49 ierr = ISDestroy(&is[i]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),49,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
50
51 k = 0;
52 for (j=0; j<ov; j++) { /* for each overlap*/
53 n = isz;
54 for (; k<n; k++) { /* do only those rows in nidx[k], which are not done yet */
55 row = nidx[k];
56 start = ai[row];
57 end = ai[row+1];
58 for (l = start; l<end; l++) {
59 val = aj[l];
60 if (!PetscBTLookupSet(table,val)) nidx[isz++] = val;
61 }
62 }
63 }
64 /* expand the Index Set */
65 for (j=0; j<isz; j++) {
66 for (k=0; k<bs; k++) nidx2[j*bs+k] = nidx[j]*bs+k;
67 }
68 ierr = ISCreateGeneral(PETSC_COMM_SELF((MPI_Comm)0x44000001),isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),68,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
69 }
70 ierr = PetscBTDestroy(&table);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),70,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
71 ierr = PetscFree(nidx)((*PetscTrFree)((void*)(nidx),71,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((nidx) = 0,0))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),71,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
72 ierr = PetscFree(nidx2)((*PetscTrFree)((void*)(nidx2),72,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((nidx2) = 0,0))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),72,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
73 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
74}
75
76PetscErrorCode MatCreateSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
77{
78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*c;
79 PetscErrorCode ierr;
80 PetscInt *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
81 PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
82 const PetscInt *irow,*icol;
83 PetscInt nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
84 PetscInt *aj = a->j,*ai = a->i;
85 MatScalar *mat_a;
86 Mat C;
87 PetscBool flag;
88
89 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 89; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
90 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),90,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
91 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),91,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
92 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),92,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
93 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),93,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
94
95 ierr = PetscCalloc1(1+oldcols,&smap)PetscMallocA(1,PETSC_TRUE,95,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(1+oldcols)*sizeof(**(&smap)),(&smap))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),95,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
96 ssmap = smap;
97 ierr = PetscMalloc1(1+nrows,&lens)PetscMallocA(1,PETSC_FALSE,97,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(1+nrows)*sizeof(**(&lens)),(&lens))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),97,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
98 for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
99 /* determine lens of each row */
100 for (i=0; i<nrows; i++) {
101 kstart = ai[irow[i]];
102 kend = kstart + a->ilen[irow[i]];
103 lens[i] = 0;
104 for (k=kstart; k<kend; k++) {
105 if (ssmap[aj[k]]) lens[i]++;
106 }
107 }
108 /* Create and fill new matrix */
109 if (scall == MAT_REUSE_MATRIX) {
110 c = (Mat_SeqBAIJ*)((*B)->data);
111
112 if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size")return PetscError(((MPI_Comm)0x44000001),112,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Submatrix wrong size")
;
113 ierr = PetscArraycmp(c->ilen,lens,c->mbs,&flag)((sizeof(*(c->ilen)) != sizeof(*(lens))) || PetscMemcmp(c->
ilen,lens,(c->mbs)*sizeof(*(c->ilen)),&flag))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),113,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
114 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros")return PetscError(((MPI_Comm)0x44000001),114,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Cannot reuse matrix. wrong no of nonzeros"
)
;
115 ierr = PetscArrayzero(c->ilen,c->mbs)PetscMemzero(c->ilen,(c->mbs)*sizeof(*(c->ilen)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),115,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
116 C = *B;
117 } else {
118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),118,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
119 ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE-1,PETSC_DETERMINE-1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),119,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
120 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),120,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
121 ierr = MatSeqBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),121,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
122 }
123 c = (Mat_SeqBAIJ*)(C->data);
124 for (i=0; i<nrows; i++) {
125 row = irow[i];
126 kstart = ai[row];
127 kend = kstart + a->ilen[row];
128 mat_i = c->i[i];
129 mat_j = c->j + mat_i;
130 mat_a = c->a + mat_i*bs2;
131 mat_ilen = c->ilen + i;
132 for (k=kstart; k<kend; k++) {
133 if ((tcol=ssmap[a->j[k]])) {
134 *mat_j++ = tcol - 1;
135 ierr = PetscArraycpy(mat_a,a->a+k*bs2,bs2)((sizeof(*(mat_a)) != sizeof(*(a->a+k*bs2))) || PetscMemcpy
(mat_a,a->a+k*bs2,(bs2)*sizeof(*(mat_a))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),135,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
136 mat_a += bs2;
137 (*mat_ilen)++;
138 }
139 }
140 }
141 /* sort */
142 {
143 MatScalar *work;
144 ierr = PetscMalloc1(bs2,&work)PetscMallocA(1,PETSC_FALSE,144,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(bs2)*sizeof(**(&work)),(&work))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),144,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
145 for (i=0; i<nrows; i++) {
146 PetscInt ilen;
147 mat_i = c->i[i];
148 mat_j = c->j + mat_i;
149 mat_a = c->a + mat_i*bs2;
150 ilen = c->ilen[i];
151 ierr = PetscSortIntWithDataArray(ilen,mat_j,mat_a,bs2*sizeof(MatScalar),work);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),151,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
152 }
153 ierr = PetscFree(work)((*PetscTrFree)((void*)(work),153,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((work) = 0,0))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),153,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
154 }
155
156 /* Free work space */
157 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),157,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
158 ierr = PetscFree(smap)((*PetscTrFree)((void*)(smap),158,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((smap) = 0,0))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),158,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
159 ierr = PetscFree(lens)((*PetscTrFree)((void*)(lens),159,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((lens) = 0,0))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),159,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
160 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),160,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
161 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),161,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
162
163 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),163,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
164 *B = C;
165 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
166}
167
168PetscErrorCode MatCreateSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
169{
170 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
171 IS is1,is2;
172 PetscErrorCode ierr;
173 PetscInt *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count,maxmnbs,j;
174 const PetscInt *irow,*icol;
175
176 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 176; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
177 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),177,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
178 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),178,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
179 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),179,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
180 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),180,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
181
182 /* Verify if the indices corespond to each element in a block
183 and form the IS with compressed IS */
184 maxmnbs = PetscMax(a->mbs,a->nbs)(((a->mbs)<(a->nbs)) ? (a->nbs) : (a->mbs));
185 ierr = PetscMalloc2(maxmnbs,&vary,maxmnbs,&iary)PetscMallocA(2,PETSC_FALSE,185,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(maxmnbs)*sizeof(**(&vary)),(&vary),(size_t)
(maxmnbs)*sizeof(**(&iary)),(&iary))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),185,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
186 ierr = PetscArrayzero(vary,a->mbs)PetscMemzero(vary,(a->mbs)*sizeof(*(vary)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),186,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
187 for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
188 for (i=0; i<a->mbs; i++) {
189 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Index set does not match blocks")return PetscError(((MPI_Comm)0x44000001),189,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Index set does not match blocks")
;
190 }
191 count = 0;
192 for (i=0; i<nrows; i++) {
193 j = irow[i] / bs;
194 if ((vary[j]--)==bs) iary[count++] = j;
195 }
196 ierr = ISCreateGeneral(PETSC_COMM_SELF((MPI_Comm)0x44000001),count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),196,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
197
198 ierr = PetscArrayzero(vary,a->nbs)PetscMemzero(vary,(a->nbs)*sizeof(*(vary)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),198,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
199 for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
200 for (i=0; i<a->nbs; i++) {
201 if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal error in PETSc")return PetscError(((MPI_Comm)0x44000001),201,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,77,PETSC_ERROR_INITIAL,"Internal error in PETSc")
;
202 }
203 count = 0;
204 for (i=0; i<ncols; i++) {
205 j = icol[i] / bs;
206 if ((vary[j]--)==bs) iary[count++] = j;
207 }
208 ierr = ISCreateGeneral(PETSC_COMM_SELF((MPI_Comm)0x44000001),count,iary,PETSC_COPY_VALUES,&is2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),208,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
209 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),209,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
210 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),210,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
211 ierr = PetscFree2(vary,iary)PetscFreeA(2,211,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,&(vary),&(iary))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),211,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
212
213 ierr = MatCreateSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),213,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
214 ierr = ISDestroy(&is1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),214,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
215 ierr = ISDestroy(&is2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),215,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
216 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
217}
218
219PetscErrorCode MatDestroySubMatrix_SeqBAIJ(Mat C)
220{
221 PetscErrorCode ierr;
222 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data;
223 Mat_SubSppt *submatj = c->submatis1;
224
225 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 225; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
226 ierr = (*submatj->destroy)(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),226,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
227 ierr = MatDestroySubMatrix_Private(submatj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),227,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
228 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
229}
230
231PetscErrorCode MatDestroySubMatrices_SeqBAIJ(PetscInt n,Mat *mat[])
232{
233 PetscErrorCode ierr;
234 PetscInt i;
235 Mat C;
236 Mat_SeqBAIJ *c;
237 Mat_SubSppt *submatj;
238
239 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 239; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
240 for (i=0; i<n; i++) {
241 C = (*mat)[i];
242 c = (Mat_SeqBAIJ*)C->data;
243 submatj = c->submatis1;
244 if (submatj) {
245 ierr = (*submatj->destroy)(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),245,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
246 ierr = MatDestroySubMatrix_Private(submatj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),246,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
247 ierr = PetscFree(C->defaultvectype)((*PetscTrFree)((void*)(C->defaultvectype),247,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((C->defaultvectype) = 0,0))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),247,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
248 ierr = PetscLayoutDestroy(&C->rmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),248,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
249 ierr = PetscLayoutDestroy(&C->cmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),249,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
250 ierr = PetscHeaderDestroy(&C)(PetscHeaderDestroy_Private((PetscObject)(*&C)) || ((*PetscTrFree
)((void*)(*&C),250,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((*&C) = 0,0)))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),250,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
251 } else {
252 ierr = MatDestroy(&C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),252,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
253 }
254 }
255 ierr = PetscFree(*mat)((*PetscTrFree)((void*)(*mat),255,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((*mat) = 0,0))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),255,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
256 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
257}
258
259PetscErrorCode MatCreateSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
260{
261 PetscErrorCode ierr;
262 PetscInt i;
263
264 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 264; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
265 if (scall == MAT_INITIAL_MATRIX) {
266 ierr = PetscCalloc1(n+1,B)PetscMallocA(1,PETSC_TRUE,266,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(n+1)*sizeof(**(B)),(B))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),266,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
267 }
268
269 for (i=0; i<n; i++) {
270 ierr = MatCreateSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),270,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
271 }
272 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
273}
274
275/* -------------------------------------------------------*/
276/* Should check that shapes of vectors and matrices match */
277/* -------------------------------------------------------*/
278
279PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
280{
281 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
282 PetscScalar *z,sum;
283 const PetscScalar *x;
284 const MatScalar *v;
285 PetscErrorCode ierr;
286 PetscInt mbs,i,n;
287 const PetscInt *idx,*ii,*ridx=NULL((void*)0);
288 PetscBool usecprow=a->compressedrow.use;
289
290 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 290; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
291 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),291,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
292 ierr = VecGetArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),292,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
293
294 if (usecprow) {
295 mbs = a->compressedrow.nrows;
296 ii = a->compressedrow.i;
297 ridx = a->compressedrow.rindex;
298 ierr = PetscArrayzero(z,a->mbs)PetscMemzero(z,(a->mbs)*sizeof(*(z)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),298,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
299 } else {
300 mbs = a->mbs;
301 ii = a->i;
302 }
303
304 for (i=0; i<mbs; i++) {
305 n = ii[1] - ii[0];
306 v = a->a + ii[0];
307 idx = a->j + ii[0];
308 ii++;
309 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
310 PetscPrefetchBlock(v+1*n,1*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+1*n),*_end = (const char
*)((v+1*n)+(1*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
311 sum = 0.0;
312 PetscSparseDensePlusDot(sum,x,v,idx,n){ PetscInt __i; for (__i=0; __i<n; __i++) sum += v[__i] * x
[idx[__i]];}
;
313 if (usecprow) {
314 z[ridx[i]] = sum;
315 } else {
316 z[i] = sum;
317 }
318 }
319 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),319,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
320 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),320,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
321 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),321,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
322 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
323}
324
325PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
326{
327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
328 PetscScalar *z = 0,sum1,sum2,*zarray;
329 const PetscScalar *x,*xb;
330 PetscScalar x1,x2;
331 const MatScalar *v;
332 PetscErrorCode ierr;
333 PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0);
334 PetscBool usecprow=a->compressedrow.use;
335
336 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 336; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
337 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),337,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
338 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),338,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
339
340 idx = a->j;
341 v = a->a;
342 if (usecprow) {
343 mbs = a->compressedrow.nrows;
344 ii = a->compressedrow.i;
345 ridx = a->compressedrow.rindex;
346 ierr = PetscArrayzero(zarray,2*a->mbs)PetscMemzero(zarray,(2*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),346,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
347 } else {
348 mbs = a->mbs;
349 ii = a->i;
350 z = zarray;
351 }
352
353 for (i=0; i<mbs; i++) {
354 n = ii[1] - ii[0]; ii++;
355 sum1 = 0.0; sum2 = 0.0;
356 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
357 PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+4*n),*_end = (const char
*)((v+4*n)+(4*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
358 for (j=0; j<n; j++) {
359 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
360 sum1 += v[0]*x1 + v[2]*x2;
361 sum2 += v[1]*x1 + v[3]*x2;
362 v += 4;
363 }
364 if (usecprow) z = zarray + 2*ridx[i];
365 z[0] = sum1; z[1] = sum2;
366 if (!usecprow) z += 2;
367 }
368 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),368,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
369 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),369,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
370 ierr = PetscLogFlops(8.0*a->nz - 2.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),370,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
371 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
372}
373
374PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
375{
376 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
377 PetscScalar *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray;
378 const PetscScalar *x,*xb;
379 const MatScalar *v;
380 PetscErrorCode ierr;
381 PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0);
382 PetscBool usecprow=a->compressedrow.use;
383
384#if defined(PETSC_HAVE_PRAGMA_DISJOINT)
385#pragma disjoint(*v,*z,*xb)
386#endif
387
388 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 388; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
389 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),389,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
390 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),390,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
391
392 idx = a->j;
393 v = a->a;
394 if (usecprow) {
395 mbs = a->compressedrow.nrows;
396 ii = a->compressedrow.i;
397 ridx = a->compressedrow.rindex;
398 ierr = PetscArrayzero(zarray,3*a->mbs)PetscMemzero(zarray,(3*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),398,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
399 } else {
400 mbs = a->mbs;
401 ii = a->i;
402 z = zarray;
403 }
404
405 for (i=0; i<mbs; i++) {
406 n = ii[1] - ii[0]; ii++;
407 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
408 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
409 PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+9*n),*_end = (const char
*)((v+9*n)+(9*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
410 for (j=0; j<n; j++) {
411 xb = x + 3*(*idx++);
412 x1 = xb[0];
413 x2 = xb[1];
414 x3 = xb[2];
415
416 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
417 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
418 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
419 v += 9;
420 }
421 if (usecprow) z = zarray + 3*ridx[i];
422 z[0] = sum1; z[1] = sum2; z[2] = sum3;
423 if (!usecprow) z += 3;
424 }
425 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),425,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
426 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),426,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
427 ierr = PetscLogFlops(18.0*a->nz - 3.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),427,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
428 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
429}
430
431PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
432{
433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
434 PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
435 const PetscScalar *x,*xb;
436 const MatScalar *v;
437 PetscErrorCode ierr;
438 PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0);
439 PetscBool usecprow=a->compressedrow.use;
440
441 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 441; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
442 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),442,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
443 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),443,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
444
445 idx = a->j;
446 v = a->a;
447 if (usecprow) {
448 mbs = a->compressedrow.nrows;
449 ii = a->compressedrow.i;
450 ridx = a->compressedrow.rindex;
451 ierr = PetscArrayzero(zarray,4*a->mbs)PetscMemzero(zarray,(4*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),451,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
452 } else {
453 mbs = a->mbs;
454 ii = a->i;
455 z = zarray;
456 }
457
458 for (i=0; i<mbs; i++) {
459 n = ii[1] - ii[0];
460 ii++;
461 sum1 = 0.0;
462 sum2 = 0.0;
463 sum3 = 0.0;
464 sum4 = 0.0;
465
466 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
467 PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+16*n),*_end = (const char
*)((v+16*n)+(16*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
468 for (j=0; j<n; j++) {
469 xb = x + 4*(*idx++);
470 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
471 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
472 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
473 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
474 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
475 v += 16;
476 }
477 if (usecprow) z = zarray + 4*ridx[i];
478 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
479 if (!usecprow) z += 4;
480 }
481 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),481,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
482 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),482,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
483 ierr = PetscLogFlops(32.0*a->nz - 4.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),483,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
484 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
485}
486
487PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
488{
489 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
490 PetscScalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray;
491 const PetscScalar *xb,*x;
492 const MatScalar *v;
493 PetscErrorCode ierr;
494 const PetscInt *idx,*ii,*ridx=NULL((void*)0);
495 PetscInt mbs,i,j,n;
496 PetscBool usecprow=a->compressedrow.use;
497
498 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 498; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
499 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),499,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
500 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),500,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
501
502 idx = a->j;
503 v = a->a;
504 if (usecprow) {
505 mbs = a->compressedrow.nrows;
506 ii = a->compressedrow.i;
507 ridx = a->compressedrow.rindex;
508 ierr = PetscArrayzero(zarray,5*a->mbs)PetscMemzero(zarray,(5*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),508,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
509 } else {
510 mbs = a->mbs;
511 ii = a->i;
512 z = zarray;
513 }
514
515 for (i=0; i<mbs; i++) {
516 n = ii[1] - ii[0]; ii++;
517 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
518 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
519 PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+25*n),*_end = (const char
*)((v+25*n)+(25*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
520 for (j=0; j<n; j++) {
521 xb = x + 5*(*idx++);
522 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
523 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
524 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
525 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
526 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
527 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
528 v += 25;
529 }
530 if (usecprow) z = zarray + 5*ridx[i];
531 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
532 if (!usecprow) z += 5;
533 }
534 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),534,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
535 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),535,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
536 ierr = PetscLogFlops(50.0*a->nz - 5.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),536,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
537 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
538}
539
540
541
542PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
543{
544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
545 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6;
546 const PetscScalar *x,*xb;
547 PetscScalar x1,x2,x3,x4,x5,x6,*zarray;
548 const MatScalar *v;
549 PetscErrorCode ierr;
550 PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0);
551 PetscBool usecprow=a->compressedrow.use;
552
553 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 553; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
554 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),554,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
555 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),555,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
556
557 idx = a->j;
558 v = a->a;
559 if (usecprow) {
560 mbs = a->compressedrow.nrows;
561 ii = a->compressedrow.i;
562 ridx = a->compressedrow.rindex;
563 ierr = PetscArrayzero(zarray,6*a->mbs)PetscMemzero(zarray,(6*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),563,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
564 } else {
565 mbs = a->mbs;
566 ii = a->i;
567 z = zarray;
568 }
569
570 for (i=0; i<mbs; i++) {
571 n = ii[1] - ii[0];
572 ii++;
573 sum1 = 0.0;
574 sum2 = 0.0;
575 sum3 = 0.0;
576 sum4 = 0.0;
577 sum5 = 0.0;
578 sum6 = 0.0;
579
580 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
581 PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+36*n),*_end = (const char
*)((v+36*n)+(36*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
582 for (j=0; j<n; j++) {
583 xb = x + 6*(*idx++);
584 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
585 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
586 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
587 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
588 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
589 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
590 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
591 v += 36;
592 }
593 if (usecprow) z = zarray + 6*ridx[i];
594 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
595 if (!usecprow) z += 6;
596 }
597
598 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),598,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
599 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),599,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
600 ierr = PetscLogFlops(72.0*a->nz - 6.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),600,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
601 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
602}
603
604PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
605{
606 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
607 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
608 const PetscScalar *x,*xb;
609 PetscScalar x1,x2,x3,x4,x5,x6,x7,*zarray;
610 const MatScalar *v;
611 PetscErrorCode ierr;
612 PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL((void*)0);
613 PetscBool usecprow=a->compressedrow.use;
614
615 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 615; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
616 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),616,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
617 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),617,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
618
619 idx = a->j;
620 v = a->a;
621 if (usecprow) {
622 mbs = a->compressedrow.nrows;
623 ii = a->compressedrow.i;
624 ridx = a->compressedrow.rindex;
625 ierr = PetscArrayzero(zarray,7*a->mbs)PetscMemzero(zarray,(7*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),625,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
626 } else {
627 mbs = a->mbs;
628 ii = a->i;
629 z = zarray;
630 }
631
632 for (i=0; i<mbs; i++) {
633 n = ii[1] - ii[0];
634 ii++;
635 sum1 = 0.0;
636 sum2 = 0.0;
637 sum3 = 0.0;
638 sum4 = 0.0;
639 sum5 = 0.0;
640 sum6 = 0.0;
641 sum7 = 0.0;
642
643 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
644 PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+49*n),*_end = (const char
*)((v+49*n)+(49*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
645 for (j=0; j<n; j++) {
646 xb = x + 7*(*idx++);
647 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
648 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
649 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
650 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
651 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
652 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
653 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
654 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
655 v += 49;
656 }
657 if (usecprow) z = zarray + 7*ridx[i];
658 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
659 if (!usecprow) z += 7;
660 }
661
662 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),662,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
663 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),663,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
664 ierr = PetscLogFlops(98.0*a->nz - 7.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),664,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
665 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
666}
667
668#if defined(PETSC_HAVE_IMMINTRIN_H1) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE1) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
669PetscErrorCode MatMult_SeqBAIJ_9_AVX2(Mat A,Vec xx,Vec zz)
670{
671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
672 PetscScalar *z = 0,*work,*workt,*zarray;
673 const PetscScalar *x,*xb;
674 const MatScalar *v;
675 PetscErrorCode ierr;
676 PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2;
677 const PetscInt *idx,*ii,*ridx=NULL((void*)0);
678 PetscInt k;
679 PetscBool usecprow=a->compressedrow.use;
680
681 __m256d a0,a1,a2,a3,a4,a5;
682 __m256d w0,w1,w2,w3;
683 __m256d z0,z1,z2;
684 __m256i mask1 = _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL<<63);
685
686 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 686; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
687 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),687,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
688 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),688,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
689
690 idx = a->j;
691 v = a->a;
692 if (usecprow) {
693 mbs = a->compressedrow.nrows;
694 ii = a->compressedrow.i;
695 ridx = a->compressedrow.rindex;
696 ierr = PetscArrayzero(zarray,bs*a->mbs)PetscMemzero(zarray,(bs*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),696,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
697 } else {
698 mbs = a->mbs;
699 ii = a->i;
700 z = zarray;
701 }
702
703 if (!a->mult_work) {
704 k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap->
n) : (A->rmap->n))
;
705 ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,705,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work
))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),705,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
706 }
707
708 work = a->mult_work;
709 for (i=0; i<mbs; i++) {
710 n = ii[1] - ii[0]; ii++;
711 workt = work;
712 for (j=0; j<n; j++) {
713 xb = x + bs*(*idx++);
714 for (k=0; k<bs; k++) workt[k] = xb[k];
715 workt += bs;
716 }
717 if (usecprow) z = zarray + bs*ridx[i];
718
719 z0 = _mm256_setzero_pd(); z1 = _mm256_setzero_pd(); z2 = _mm256_setzero_pd();
720
721 for (j=0; j<n; j++) {
722 // first column of a
723 w0 = _mm256_set1_pd(work[j*9 ]);
724 a0 = _mm256_loadu_pd(&v[j*81 ]); z0 = _mm256_fmadd_pd(a0,w0,z0);
725 a1 = _mm256_loadu_pd(&v[j*81+4]); z1 = _mm256_fmadd_pd(a1,w0,z1);
726 a2 = _mm256_loadu_pd(&v[j*81+8]); z2 = _mm256_fmadd_pd(a2,w0,z2);
727
728 // second column of a
729 w1 = _mm256_set1_pd(work[j*9+ 1]);
730 a0 = _mm256_loadu_pd(&v[j*81+ 9]); z0 = _mm256_fmadd_pd(a0,w1,z0);
731 a1 = _mm256_loadu_pd(&v[j*81+13]); z1 = _mm256_fmadd_pd(a1,w1,z1);
732 a2 = _mm256_loadu_pd(&v[j*81+17]); z2 = _mm256_fmadd_pd(a2,w1,z2);
733
734 // third column of a
735 w2 = _mm256_set1_pd(work[j*9 +2]);
736 a3 = _mm256_loadu_pd(&v[j*81+18]); z0 = _mm256_fmadd_pd(a3,w2,z0);
737 a4 = _mm256_loadu_pd(&v[j*81+22]); z1 = _mm256_fmadd_pd(a4,w2,z1);
738 a5 = _mm256_loadu_pd(&v[j*81+26]); z2 = _mm256_fmadd_pd(a5,w2,z2);
739
740 // fourth column of a
741 w3 = _mm256_set1_pd(work[j*9+ 3]);
742 a0 = _mm256_loadu_pd(&v[j*81+27]); z0 = _mm256_fmadd_pd(a0,w3,z0);
743 a1 = _mm256_loadu_pd(&v[j*81+31]); z1 = _mm256_fmadd_pd(a1,w3,z1);
744 a2 = _mm256_loadu_pd(&v[j*81+35]); z2 = _mm256_fmadd_pd(a2,w3,z2);
745
746 // fifth column of a
747 w0 = _mm256_set1_pd(work[j*9+ 4]);
748 a3 = _mm256_loadu_pd(&v[j*81+36]); z0 = _mm256_fmadd_pd(a3,w0,z0);
749 a4 = _mm256_loadu_pd(&v[j*81+40]); z1 = _mm256_fmadd_pd(a4,w0,z1);
750 a5 = _mm256_loadu_pd(&v[j*81+44]); z2 = _mm256_fmadd_pd(a5,w0,z2);
751
752 // sixth column of a
753 w1 = _mm256_set1_pd(work[j*9+ 5]);
754 a0 = _mm256_loadu_pd(&v[j*81+45]); z0 = _mm256_fmadd_pd(a0,w1,z0);
755 a1 = _mm256_loadu_pd(&v[j*81+49]); z1 = _mm256_fmadd_pd(a1,w1,z1);
756 a2 = _mm256_loadu_pd(&v[j*81+53]); z2 = _mm256_fmadd_pd(a2,w1,z2);
757
758 // seventh column of a
759 w2 = _mm256_set1_pd(work[j*9+ 6]);
760 a0 = _mm256_loadu_pd(&v[j*81+54]); z0 = _mm256_fmadd_pd(a0,w2,z0);
761 a1 = _mm256_loadu_pd(&v[j*81+58]); z1 = _mm256_fmadd_pd(a1,w2,z1);
762 a2 = _mm256_loadu_pd(&v[j*81+62]); z2 = _mm256_fmadd_pd(a2,w2,z2);
763
764 // eigth column of a
765 w3 = _mm256_set1_pd(work[j*9+ 7]);
766 a3 = _mm256_loadu_pd(&v[j*81+63]); z0 = _mm256_fmadd_pd(a3,w3,z0);
767 a4 = _mm256_loadu_pd(&v[j*81+67]); z1 = _mm256_fmadd_pd(a4,w3,z1);
768 a5 = _mm256_loadu_pd(&v[j*81+71]); z2 = _mm256_fmadd_pd(a5,w3,z2);
769
770 // ninth column of a
771 w0 = _mm256_set1_pd(work[j*9+ 8]);
772 a0 = _mm256_loadu_pd(&v[j*81+72]); z0 = _mm256_fmadd_pd(a0,w0,z0);
773 a1 = _mm256_loadu_pd(&v[j*81+76]); z1 = _mm256_fmadd_pd(a1,w0,z1);
774 a2 = _mm256_maskload_pd(&v[j*81+80],mask1); z2 = _mm256_fmadd_pd(a2,w0,z2);
775 }
776
777 _mm256_storeu_pd(&z[ 0], z0); _mm256_storeu_pd(&z[ 4], z1); _mm256_maskstore_pd(&z[8], mask1, z2);
778
779 v += n*bs2;
780 if (!usecprow) z += bs;
781 }
782 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),782,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
783 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),783,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
784 ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),784,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
785 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
786}
787#endif
788
789PetscErrorCode MatMult_SeqBAIJ_11(Mat A,Vec xx,Vec zz)
790{
791 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
792 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11;
793 const PetscScalar *x,*xb;
794 PetscScalar *zarray,xv;
795 const MatScalar *v;
796 PetscErrorCode ierr;
797 const PetscInt *ii,*ij=a->j,*idx;
798 PetscInt mbs,i,j,k,n,*ridx=NULL((void*)0);
799 PetscBool usecprow=a->compressedrow.use;
800
801 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 801; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
802 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),802,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
803 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),803,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
804
805 v = a->a;
806 if (usecprow) {
807 mbs = a->compressedrow.nrows;
808 ii = a->compressedrow.i;
809 ridx = a->compressedrow.rindex;
810 ierr = PetscArrayzero(zarray,11*a->mbs)PetscMemzero(zarray,(11*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),810,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
811 } else {
812 mbs = a->mbs;
813 ii = a->i;
814 z = zarray;
815 }
816
817 for (i=0; i<mbs; i++) {
818 n = ii[i+1] - ii[i];
819 idx = ij + ii[i];
820 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
821 sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0;
822
823 for (j=0; j<n; j++) {
824 xb = x + 11*(idx[j]);
825
826 for (k=0; k<11; k++) {
827 xv = xb[k];
828 sum1 += v[0]*xv;
829 sum2 += v[1]*xv;
830 sum3 += v[2]*xv;
831 sum4 += v[3]*xv;
832 sum5 += v[4]*xv;
833 sum6 += v[5]*xv;
834 sum7 += v[6]*xv;
835 sum8 += v[7]*xv;
836 sum9 += v[8]*xv;
837 sum10 += v[9]*xv;
838 sum11 += v[10]*xv;
839 v += 11;
840 }
841 }
842 if (usecprow) z = zarray + 11*ridx[i];
843 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
844 z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11;
845
846 if (!usecprow) z += 11;
847 }
848
849 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),849,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
850 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),850,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
851 ierr = PetscLogFlops(242.0*a->nz - 11.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),851,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
852 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
853}
854
855/* MatMult_SeqBAIJ_15 version 1: Columns in the block are accessed one at a time */
856/* Default MatMult for block size 15 */
857
858PetscErrorCode MatMult_SeqBAIJ_15_ver1(Mat A,Vec xx,Vec zz)
859{
860 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
861 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15;
862 const PetscScalar *x,*xb;
863 PetscScalar *zarray,xv;
864 const MatScalar *v;
865 PetscErrorCode ierr;
866 const PetscInt *ii,*ij=a->j,*idx;
867 PetscInt mbs,i,j,k,n,*ridx=NULL((void*)0);
868 PetscBool usecprow=a->compressedrow.use;
869
870 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 870; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
871 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),871,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
872 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),872,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
873
874 v = a->a;
875 if (usecprow) {
876 mbs = a->compressedrow.nrows;
877 ii = a->compressedrow.i;
878 ridx = a->compressedrow.rindex;
879 ierr = PetscArrayzero(zarray,15*a->mbs)PetscMemzero(zarray,(15*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),879,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
880 } else {
881 mbs = a->mbs;
882 ii = a->i;
883 z = zarray;
884 }
885
886 for (i=0; i<mbs; i++) {
887 n = ii[i+1] - ii[i];
888 idx = ij + ii[i];
889 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
890 sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0;
891
892 for (j=0; j<n; j++) {
893 xb = x + 15*(idx[j]);
894
895 for (k=0; k<15; k++) {
896 xv = xb[k];
897 sum1 += v[0]*xv;
898 sum2 += v[1]*xv;
899 sum3 += v[2]*xv;
900 sum4 += v[3]*xv;
901 sum5 += v[4]*xv;
902 sum6 += v[5]*xv;
903 sum7 += v[6]*xv;
904 sum8 += v[7]*xv;
905 sum9 += v[8]*xv;
906 sum10 += v[9]*xv;
907 sum11 += v[10]*xv;
908 sum12 += v[11]*xv;
909 sum13 += v[12]*xv;
910 sum14 += v[13]*xv;
911 sum15 += v[14]*xv;
912 v += 15;
913 }
914 }
915 if (usecprow) z = zarray + 15*ridx[i];
916 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
917 z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15;
918
919 if (!usecprow) z += 15;
920 }
921
922 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),922,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
923 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),923,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
924 ierr = PetscLogFlops(450.0*a->nz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),924,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
925 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
926}
927
928/* MatMult_SeqBAIJ_15_ver2 : Columns in the block are accessed in sets of 4,4,4,3 */
929PetscErrorCode MatMult_SeqBAIJ_15_ver2(Mat A,Vec xx,Vec zz)
930{
931 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
932 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15;
933 const PetscScalar *x,*xb;
934 PetscScalar x1,x2,x3,x4,*zarray;
935 const MatScalar *v;
936 PetscErrorCode ierr;
937 const PetscInt *ii,*ij=a->j,*idx;
938 PetscInt mbs,i,j,n,*ridx=NULL((void*)0);
939 PetscBool usecprow=a->compressedrow.use;
940
941 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 941; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
942 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),942,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
943 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),943,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
944
945 v = a->a;
946 if (usecprow) {
947 mbs = a->compressedrow.nrows;
948 ii = a->compressedrow.i;
949 ridx = a->compressedrow.rindex;
950 ierr = PetscArrayzero(zarray,15*a->mbs)PetscMemzero(zarray,(15*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),950,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
951 } else {
952 mbs = a->mbs;
953 ii = a->i;
954 z = zarray;
955 }
956
957 for (i=0; i<mbs; i++) {
958 n = ii[i+1] - ii[i];
959 idx = ij + ii[i];
960 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
961 sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0;
962
963 for (j=0; j<n; j++) {
964 xb = x + 15*(idx[j]);
965 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
966
967 sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4;
968 sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4;
969 sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4;
970 sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4;
971 sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4;
972 sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4;
973 sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4;
974 sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4;
975 sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4;
976 sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4;
977 sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4;
978 sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4;
979 sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4;
980 sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4;
981 sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4;
982
983 v += 60;
984
985 x1 = xb[4]; x2 = xb[5]; x3 = xb[6]; x4 = xb[7];
986
987 sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4;
988 sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4;
989 sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4;
990 sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4;
991 sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4;
992 sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4;
993 sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4;
994 sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4;
995 sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4;
996 sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4;
997 sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4;
998 sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4;
999 sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4;
1000 sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4;
1001 sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4;
1002 v += 60;
1003
1004 x1 = xb[8]; x2 = xb[9]; x3 = xb[10]; x4 = xb[11];
1005 sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4;
1006 sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4;
1007 sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4;
1008 sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4;
1009 sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4;
1010 sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4;
1011 sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4;
1012 sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4;
1013 sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4;
1014 sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4;
1015 sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4;
1016 sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4;
1017 sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4;
1018 sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4;
1019 sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4;
1020 v += 60;
1021
1022 x1 = xb[12]; x2 = xb[13]; x3 = xb[14];
1023 sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3;
1024 sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3;
1025 sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3;
1026 sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3;
1027 sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3;
1028 sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3;
1029 sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3;
1030 sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3;
1031 sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3;
1032 sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3;
1033 sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3;
1034 sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3;
1035 sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3;
1036 sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3;
1037 sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3;
1038 v += 45;
1039 }
1040 if (usecprow) z = zarray + 15*ridx[i];
1041 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1042 z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15;
1043
1044 if (!usecprow) z += 15;
1045 }
1046
1047 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1047,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1048 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1048,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1049 ierr = PetscLogFlops(450.0*a->nz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1049,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1050 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1051}
1052
1053/* MatMult_SeqBAIJ_15_ver3 : Columns in the block are accessed in sets of 8,7 */
1054PetscErrorCode MatMult_SeqBAIJ_15_ver3(Mat A,Vec xx,Vec zz)
1055{
1056 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1057 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15;
1058 const PetscScalar *x,*xb;
1059 PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,*zarray;
1060 const MatScalar *v;
1061 PetscErrorCode ierr;
1062 const PetscInt *ii,*ij=a->j,*idx;
1063 PetscInt mbs,i,j,n,*ridx=NULL((void*)0);
1064 PetscBool usecprow=a->compressedrow.use;
1065
1066 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1066; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1067 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1067,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1068 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1068,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1069
1070 v = a->a;
1071 if (usecprow) {
1072 mbs = a->compressedrow.nrows;
1073 ii = a->compressedrow.i;
1074 ridx = a->compressedrow.rindex;
1075 ierr = PetscArrayzero(zarray,15*a->mbs)PetscMemzero(zarray,(15*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1075,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1076 } else {
1077 mbs = a->mbs;
1078 ii = a->i;
1079 z = zarray;
1080 }
1081
1082 for (i=0; i<mbs; i++) {
1083 n = ii[i+1] - ii[i];
1084 idx = ij + ii[i];
1085 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1086 sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0;
1087
1088 for (j=0; j<n; j++) {
1089 xb = x + 15*(idx[j]);
1090 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1091 x8 = xb[7];
1092
1093 sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7 + v[105]*x8;
1094 sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7 + v[106]*x8;
1095 sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7 + v[107]*x8;
1096 sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7 + v[108]*x8;
1097 sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7 + v[109]*x8;
1098 sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7 + v[110]*x8;
1099 sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7 + v[111]*x8;
1100 sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7 + v[112]*x8;
1101 sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7 + v[113]*x8;
1102 sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7 + v[114]*x8;
1103 sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7 + v[115]*x8;
1104 sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7 + v[116]*x8;
1105 sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7 + v[117]*x8;
1106 sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7 + v[118]*x8;
1107 sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7 + v[119]*x8;
1108 v += 120;
1109
1110 x1 = xb[8]; x2 = xb[9]; x3 = xb[10]; x4 = xb[11]; x5 = xb[12]; x6 = xb[13]; x7 = xb[14];
1111
1112 sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7;
1113 sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7;
1114 sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7;
1115 sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7;
1116 sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7;
1117 sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7;
1118 sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7;
1119 sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7;
1120 sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7;
1121 sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7;
1122 sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7;
1123 sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7;
1124 sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7;
1125 sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7;
1126 sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7;
1127 v += 105;
1128 }
1129 if (usecprow) z = zarray + 15*ridx[i];
1130 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1131 z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15;
1132
1133 if (!usecprow) z += 15;
1134 }
1135
1136 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1136,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1137 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1137,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1138 ierr = PetscLogFlops(450.0*a->nz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1138,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1139 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1140}
1141
1142/* MatMult_SeqBAIJ_15_ver4 : All columns in the block are accessed at once */
1143
1144PetscErrorCode MatMult_SeqBAIJ_15_ver4(Mat A,Vec xx,Vec zz)
1145{
1146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1147 PetscScalar *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15;
1148 const PetscScalar *x,*xb;
1149 PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,*zarray;
1150 const MatScalar *v;
1151 PetscErrorCode ierr;
1152 const PetscInt *ii,*ij=a->j,*idx;
1153 PetscInt mbs,i,j,n,*ridx=NULL((void*)0);
1154 PetscBool usecprow=a->compressedrow.use;
1155
1156 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1156; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1157 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1157,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1158 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1158,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1159
1160 v = a->a;
1161 if (usecprow) {
1162 mbs = a->compressedrow.nrows;
1163 ii = a->compressedrow.i;
1164 ridx = a->compressedrow.rindex;
1165 ierr = PetscArrayzero(zarray,15*a->mbs)PetscMemzero(zarray,(15*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1165,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1166 } else {
1167 mbs = a->mbs;
1168 ii = a->i;
1169 z = zarray;
1170 }
1171
1172 for (i=0; i<mbs; i++) {
1173 n = ii[i+1] - ii[i];
1174 idx = ij + ii[i];
1175 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
1176 sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0;
1177
1178 for (j=0; j<n; j++) {
1179 xb = x + 15*(idx[j]);
1180 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1181 x8 = xb[7]; x9 = xb[8]; x10 = xb[9]; x11 = xb[10]; x12 = xb[11]; x13 = xb[12]; x14 = xb[13];x15 = xb[14];
1182
1183 sum1 += v[0]*x1 + v[15]*x2 + v[30]*x3 + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7 + v[105]*x8 + v[120]*x9 + v[135]*x10 + v[150]*x11 + v[165]*x12 + v[180]*x13 + v[195]*x14 + v[210]*x15;
1184 sum2 += v[1]*x1 + v[16]*x2 + v[31]*x3 + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7 + v[106]*x8 + v[121]*x9 + v[136]*x10 + v[151]*x11 + v[166]*x12 + v[181]*x13 + v[196]*x14 + v[211]*x15;
1185 sum3 += v[2]*x1 + v[17]*x2 + v[32]*x3 + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7 + v[107]*x8 + v[122]*x9 + v[137]*x10 + v[152]*x11 + v[167]*x12 + v[182]*x13 + v[197]*x14 + v[212]*x15;
1186 sum4 += v[3]*x1 + v[18]*x2 + v[33]*x3 + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7 + v[108]*x8 + v[123]*x9 + v[138]*x10 + v[153]*x11 + v[168]*x12 + v[183]*x13 + v[198]*x14 + v[213]*x15;
1187 sum5 += v[4]*x1 + v[19]*x2 + v[34]*x3 + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7 + v[109]*x8 + v[124]*x9 + v[139]*x10 + v[154]*x11 + v[169]*x12 + v[184]*x13 + v[199]*x14 + v[214]*x15;
1188 sum6 += v[5]*x1 + v[20]*x2 + v[35]*x3 + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7 + v[110]*x8 + v[125]*x9 + v[140]*x10 + v[155]*x11 + v[170]*x12 + v[185]*x13 + v[200]*x14 + v[215]*x15;
1189 sum7 += v[6]*x1 + v[21]*x2 + v[36]*x3 + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7 + v[111]*x8 + v[126]*x9 + v[141]*x10 + v[156]*x11 + v[171]*x12 + v[186]*x13 + v[201]*x14 + v[216]*x15;
1190 sum8 += v[7]*x1 + v[22]*x2 + v[37]*x3 + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7 + v[112]*x8 + v[127]*x9 + v[142]*x10 + v[157]*x11 + v[172]*x12 + v[187]*x13 + v[202]*x14 + v[217]*x15;
1191 sum9 += v[8]*x1 + v[23]*x2 + v[38]*x3 + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7 + v[113]*x8 + v[128]*x9 + v[143]*x10 + v[158]*x11 + v[173]*x12 + v[188]*x13 + v[203]*x14 + v[218]*x15;
1192 sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3 + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7 + v[114]*x8 + v[129]*x9 + v[144]*x10 + v[159]*x11 + v[174]*x12 + v[189]*x13 + v[204]*x14 + v[219]*x15;
1193 sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7 + v[115]*x8 + v[130]*x9 + v[145]*x10 + v[160]*x11 + v[175]*x12 + v[190]*x13 + v[205]*x14 + v[220]*x15;
1194 sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7 + v[116]*x8 + v[131]*x9 + v[146]*x10 + v[161]*x11 + v[176]*x12 + v[191]*x13 + v[206]*x14 + v[221]*x15;
1195 sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7 + v[117]*x8 + v[132]*x9 + v[147]*x10 + v[162]*x11 + v[177]*x12 + v[192]*x13 + v[207]*x14 + v[222]*x15;
1196 sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7 + v[118]*x8 + v[133]*x9 + v[148]*x10 + v[163]*x11 + v[178]*x12 + v[193]*x13 + v[208]*x14 + v[223]*x15;
1197 sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7 + v[119]*x8 + v[134]*x9 + v[149]*x10 + v[164]*x11 + v[179]*x12 + v[194]*x13 + v[209]*x14 + v[224]*x15;
1198 v += 225;
1199 }
1200 if (usecprow) z = zarray + 15*ridx[i];
1201 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1202 z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15;
1203
1204 if (!usecprow) z += 15;
1205 }
1206
1207 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1207,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1208 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1208,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1209 ierr = PetscLogFlops(450.0*a->nz - 15.0*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1209,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1210 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1211}
1212
1213
1214/*
1215 This will not work with MatScalar == float because it calls the BLAS
1216*/
1217PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
1218{
1219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1220 PetscScalar *z = 0,*work,*workt,*zarray;
1221 const PetscScalar *x,*xb;
1222 const MatScalar *v;
1223 PetscErrorCode ierr;
1224 PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2;
1225 const PetscInt *idx,*ii,*ridx=NULL((void*)0);
1226 PetscInt ncols,k;
1227 PetscBool usecprow=a->compressedrow.use;
1228
1229 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1229; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1230 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1230,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1231 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1231,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1232
1233 idx = a->j;
1234 v = a->a;
1235 if (usecprow) {
1236 mbs = a->compressedrow.nrows;
1237 ii = a->compressedrow.i;
1238 ridx = a->compressedrow.rindex;
1239 ierr = PetscArrayzero(zarray,bs*a->mbs)PetscMemzero(zarray,(bs*a->mbs)*sizeof(*(zarray)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1239,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1240 } else {
1241 mbs = a->mbs;
1242 ii = a->i;
1243 z = zarray;
1244 }
1245
1246 if (!a->mult_work) {
1247 k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap->
n) : (A->rmap->n))
;
1248 ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,1248,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work
))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1248,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1249 }
1250 work = a->mult_work;
1251 for (i=0; i<mbs; i++) {
1252 n = ii[1] - ii[0]; ii++;
1253 ncols = n*bs;
1254 workt = work;
1255 for (j=0; j<n; j++) {
1256 xb = x + bs*(*idx++);
1257 for (k=0; k<bs; k++) workt[k] = xb[k];
1258 workt += bs;
1259 }
1260 if (usecprow) z = zarray + bs*ridx[i];
1261 PetscKernel_w_gets_Ar_times_v(bs,ncols,work,v,z){ PetscScalar _one = 1.0,_zero = 0.0; PetscBLASInt _ione = 1,
_bbs,_bncols; PetscErrorCode _ierr; _ierr = PetscBLASIntCast(
bs,&_bbs);do {if (__builtin_expect(!!(_ierr),0)) return PetscError
(((MPI_Comm)0x44000001),1261,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_ierr,PETSC_ERROR_REPEAT," ");} while (0); _ierr = PetscBLASIntCast
(ncols,&_bncols);do {if (__builtin_expect(!!(_ierr),0)) return
PetscError(((MPI_Comm)0x44000001),1261,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_ierr,PETSC_ERROR_REPEAT," ");} while (0); do { do { ; if (petscstack
&& (petscstack->currentsize < 64)) { petscstack
->function[petscstack->currentsize] = "BLASgemv"; petscstack
->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1261; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_TRUE || petscstack->hotdepth); } ; } while (0);
dgemv_("N",&(_bbs),&_bncols,&_one,v,&(_bbs),
work,&_ione,&_zero,z,&_ione); do { do {PetscErrorCode
_7_ierr = PetscMallocValidate(1261,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
);do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError(
((MPI_Comm)0x44000001),1261,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do {
; if (petscstack && petscstack->currentsize > 0
) { petscstack->currentsize--; petscstack->function[petscstack
->currentsize] = 0; petscstack->file[petscstack->currentsize
] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; }
if (petscstack) { petscstack->hotdepth = (((petscstack->
hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ;
} while (0); } while (0); } while(0); }
;
1262 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
1263 v += n*bs2;
1264 if (!usecprow) z += bs;
1265 }
1266 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1266,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1267 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1267,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1268 ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*a->nonzerorowcnt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1268,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1269 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1270}
1271
1272PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1273{
1274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1275 const PetscScalar *x;
1276 PetscScalar *y,*z,sum;
1277 const MatScalar *v;
1278 PetscErrorCode ierr;
1279 PetscInt mbs=a->mbs,i,n,*ridx=NULL((void*)0);
1280 const PetscInt *idx,*ii;
1281 PetscBool usecprow=a->compressedrow.use;
1282
1283 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1283; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1284 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1284,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1285 ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1285,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1286
1287 idx = a->j;
1288 v = a->a;
1289 if (usecprow) {
1290 if (zz != yy) {
1291 ierr = PetscArraycpy(z,y,mbs)((sizeof(*(z)) != sizeof(*(y))) || PetscMemcpy(z,y,(mbs)*sizeof
(*(z))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1291,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1292 }
1293 mbs = a->compressedrow.nrows;
1294 ii = a->compressedrow.i;
1295 ridx = a->compressedrow.rindex;
1296 } else {
1297 ii = a->i;
1298 }
1299
1300 for (i=0; i<mbs; i++) {
1301 n = ii[1] - ii[0];
1302 ii++;
1303 if (!usecprow) {
1304 sum = y[i];
1305 } else {
1306 sum = y[ridx[i]];
1307 }
1308 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
1309 PetscPrefetchBlock(v+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+n),*_end = (const char*
)((v+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
1310 PetscSparseDensePlusDot(sum,x,v,idx,n){ PetscInt __i; for (__i=0; __i<n; __i++) sum += v[__i] * x
[idx[__i]];}
;
1311 v += n;
1312 idx += n;
1313 if (usecprow) {
1314 z[ridx[i]] = sum;
1315 } else {
1316 z[i] = sum;
1317 }
1318 }
1319 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1319,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1320 ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1320,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1321 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1321,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1322 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1323}
1324
1325PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1326{
1327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1328 PetscScalar *y = 0,*z = 0,sum1,sum2;
1329 const PetscScalar *x,*xb;
1330 PetscScalar x1,x2,*yarray,*zarray;
1331 const MatScalar *v;
1332 PetscErrorCode ierr;
1333 PetscInt mbs = a->mbs,i,n,j;
1334 const PetscInt *idx,*ii,*ridx = NULL((void*)0);
1335 PetscBool usecprow = a->compressedrow.use;
1336
1337 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1337; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1338 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1338,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1339 ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1339,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1340
1341 idx = a->j;
1342 v = a->a;
1343 if (usecprow) {
1344 if (zz != yy) {
1345 ierr = PetscArraycpy(zarray,yarray,2*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray
,yarray,(2*mbs)*sizeof(*(zarray))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1345,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1346 }
1347 mbs = a->compressedrow.nrows;
1348 ii = a->compressedrow.i;
1349 ridx = a->compressedrow.rindex;
1350 } else {
1351 ii = a->i;
1352 y = yarray;
1353 z = zarray;
1354 }
1355
1356 for (i=0; i<mbs; i++) {
1357 n = ii[1] - ii[0]; ii++;
1358 if (usecprow) {
1359 z = zarray + 2*ridx[i];
1360 y = yarray + 2*ridx[i];
1361 }
1362 sum1 = y[0]; sum2 = y[1];
1363 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
1364 PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+4*n),*_end = (const char
*)((v+4*n)+(4*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
1365 for (j=0; j<n; j++) {
1366 xb = x + 2*(*idx++);
1367 x1 = xb[0];
1368 x2 = xb[1];
1369
1370 sum1 += v[0]*x1 + v[2]*x2;
1371 sum2 += v[1]*x1 + v[3]*x2;
1372 v += 4;
1373 }
1374 z[0] = sum1; z[1] = sum2;
1375 if (!usecprow) {
1376 z += 2; y += 2;
1377 }
1378 }
1379 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1379,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1380 ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1380,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1381 ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1381,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1382 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1383}
1384
1385PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1386{
1387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1388 PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
1389 const PetscScalar *x,*xb;
1390 const MatScalar *v;
1391 PetscErrorCode ierr;
1392 PetscInt mbs = a->mbs,i,j,n;
1393 const PetscInt *idx,*ii,*ridx = NULL((void*)0);
1394 PetscBool usecprow = a->compressedrow.use;
1395
1396 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1396; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1397 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1397,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1398 ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1398,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1399
1400 idx = a->j;
1401 v = a->a;
1402 if (usecprow) {
1403 if (zz != yy) {
1404 ierr = PetscArraycpy(zarray,yarray,3*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray
,yarray,(3*mbs)*sizeof(*(zarray))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1404,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1405 }
1406 mbs = a->compressedrow.nrows;
1407 ii = a->compressedrow.i;
1408 ridx = a->compressedrow.rindex;
1409 } else {
1410 ii = a->i;
1411 y = yarray;
1412 z = zarray;
1413 }
1414
1415 for (i=0; i<mbs; i++) {
1416 n = ii[1] - ii[0]; ii++;
1417 if (usecprow) {
1418 z = zarray + 3*ridx[i];
1419 y = yarray + 3*ridx[i];
1420 }
1421 sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1422 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
1423 PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+9*n),*_end = (const char
*)((v+9*n)+(9*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
1424 for (j=0; j<n; j++) {
1425 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1426 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1427 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1428 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1429 v += 9;
1430 }
1431 z[0] = sum1; z[1] = sum2; z[2] = sum3;
1432 if (!usecprow) {
1433 z += 3; y += 3;
1434 }
1435 }
1436 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1436,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1437 ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1437,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1438 ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1438,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1439 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1440}
1441
1442PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1443{
1444 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1445 PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
1446 const PetscScalar *x,*xb;
1447 const MatScalar *v;
1448 PetscErrorCode ierr;
1449 PetscInt mbs = a->mbs,i,j,n;
1450 const PetscInt *idx,*ii,*ridx=NULL((void*)0);
1451 PetscBool usecprow=a->compressedrow.use;
1452
1453 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1453; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1454 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1454,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1455 ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1455,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1456
1457 idx = a->j;
1458 v = a->a;
1459 if (usecprow) {
1460 if (zz != yy) {
1461 ierr = PetscArraycpy(zarray,yarray,4*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray
,yarray,(4*mbs)*sizeof(*(zarray))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1461,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1462 }
1463 mbs = a->compressedrow.nrows;
1464 ii = a->compressedrow.i;
1465 ridx = a->compressedrow.rindex;
1466 } else {
1467 ii = a->i;
1468 y = yarray;
1469 z = zarray;
1470 }
1471
1472 for (i=0; i<mbs; i++) {
1473 n = ii[1] - ii[0]; ii++;
1474 if (usecprow) {
1475 z = zarray + 4*ridx[i];
1476 y = yarray + 4*ridx[i];
1477 }
1478 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1479 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
1480 PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+16*n),*_end = (const char
*)((v+16*n)+(16*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
1481 for (j=0; j<n; j++) {
1482 xb = x + 4*(*idx++);
1483 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1484 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
1485 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
1486 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
1487 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
1488 v += 16;
1489 }
1490 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1491 if (!usecprow) {
1492 z += 4; y += 4;
1493 }
1494 }
1495 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1495,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1496 ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1496,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1497 ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1497,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1498 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1499}
1500
1501PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1502{
1503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1504 PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
1505 const PetscScalar *x,*xb;
1506 PetscScalar *yarray,*zarray;
1507 const MatScalar *v;
1508 PetscErrorCode ierr;
1509 PetscInt mbs = a->mbs,i,j,n;
1510 const PetscInt *idx,*ii,*ridx = NULL((void*)0);
1511 PetscBool usecprow=a->compressedrow.use;
1512
1513 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1513; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1514 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1514,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1515 ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1515,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1516
1517 idx = a->j;
1518 v = a->a;
1519 if (usecprow) {
1520 if (zz != yy) {
1521 ierr = PetscArraycpy(zarray,yarray,5*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray
,yarray,(5*mbs)*sizeof(*(zarray))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1521,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1522 }
1523 mbs = a->compressedrow.nrows;
1524 ii = a->compressedrow.i;
1525 ridx = a->compressedrow.rindex;
1526 } else {
1527 ii = a->i;
1528 y = yarray;
1529 z = zarray;
1530 }
1531
1532 for (i=0; i<mbs; i++) {
1533 n = ii[1] - ii[0]; ii++;
1534 if (usecprow) {
1535 z = zarray + 5*ridx[i];
1536 y = yarray + 5*ridx[i];
1537 }
1538 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1539 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
1540 PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+25*n),*_end = (const char
*)((v+25*n)+(25*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
1541 for (j=0; j<n; j++) {
1542 xb = x + 5*(*idx++);
1543 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1544 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
1545 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
1546 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
1547 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
1548 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
1549 v += 25;
1550 }
1551 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1552 if (!usecprow) {
1553 z += 5; y += 5;
1554 }
1555 }
1556 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1556,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1557 ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1557,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1558 ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1558,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1559 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1560}
1561PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1562{
1563 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1564 PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6;
1565 const PetscScalar *x,*xb;
1566 PetscScalar x1,x2,x3,x4,x5,x6,*yarray,*zarray;
1567 const MatScalar *v;
1568 PetscErrorCode ierr;
1569 PetscInt mbs = a->mbs,i,j,n;
1570 const PetscInt *idx,*ii,*ridx=NULL((void*)0);
1571 PetscBool usecprow=a->compressedrow.use;
1572
1573 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1573; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1574 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1574,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1575 ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1575,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1576
1577 idx = a->j;
1578 v = a->a;
1579 if (usecprow) {
1580 if (zz != yy) {
1581 ierr = PetscArraycpy(zarray,yarray,6*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray
,yarray,(6*mbs)*sizeof(*(zarray))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1581,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1582 }
1583 mbs = a->compressedrow.nrows;
1584 ii = a->compressedrow.i;
1585 ridx = a->compressedrow.rindex;
1586 } else {
1587 ii = a->i;
1588 y = yarray;
1589 z = zarray;
1590 }
1591
1592 for (i=0; i<mbs; i++) {
1593 n = ii[1] - ii[0]; ii++;
1594 if (usecprow) {
1595 z = zarray + 6*ridx[i];
1596 y = yarray + 6*ridx[i];
1597 }
1598 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1599 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
1600 PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+36*n),*_end = (const char
*)((v+36*n)+(36*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
1601 for (j=0; j<n; j++) {
1602 xb = x + 6*(*idx++);
1603 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1604 sum1 += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
1605 sum2 += v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
1606 sum3 += v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
1607 sum4 += v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
1608 sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
1609 sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
1610 v += 36;
1611 }
1612 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1613 if (!usecprow) {
1614 z += 6; y += 6;
1615 }
1616 }
1617 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1617,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1618 ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1618,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1619 ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1619,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1620 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1621}
1622
1623PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1624{
1625 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1626 PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1627 const PetscScalar *x,*xb;
1628 PetscScalar x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1629 const MatScalar *v;
1630 PetscErrorCode ierr;
1631 PetscInt mbs = a->mbs,i,j,n;
1632 const PetscInt *idx,*ii,*ridx = NULL((void*)0);
1633 PetscBool usecprow=a->compressedrow.use;
1634
1635 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1635; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1636 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1636,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1637 ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1637,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1638
1639 idx = a->j;
1640 v = a->a;
1641 if (usecprow) {
1642 if (zz != yy) {
1643 ierr = PetscArraycpy(zarray,yarray,7*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray
,yarray,(7*mbs)*sizeof(*(zarray))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1643,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1644 }
1645 mbs = a->compressedrow.nrows;
1646 ii = a->compressedrow.i;
1647 ridx = a->compressedrow.rindex;
1648 } else {
1649 ii = a->i;
1650 y = yarray;
1651 z = zarray;
1652 }
1653
1654 for (i=0; i<mbs; i++) {
1655 n = ii[1] - ii[0]; ii++;
1656 if (usecprow) {
1657 z = zarray + 7*ridx[i];
1658 y = yarray + 7*ridx[i];
1659 }
1660 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1661 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
1662 PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+49*n),*_end = (const char
*)((v+49*n)+(49*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
1663 for (j=0; j<n; j++) {
1664 xb = x + 7*(*idx++);
1665 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1666 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1667 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1668 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1669 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1670 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1671 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1672 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1673 v += 49;
1674 }
1675 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1676 if (!usecprow) {
1677 z += 7; y += 7;
1678 }
1679 }
1680 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1680,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1681 ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1681,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1682 ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1682,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1683 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1684}
1685
1686#if defined(PETSC_HAVE_IMMINTRIN_H1) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE1) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
1687PetscErrorCode MatMultAdd_SeqBAIJ_9_AVX2(Mat A,Vec xx,Vec yy,Vec zz)
1688{
1689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1690 PetscScalar *z = 0,*work,*workt,*zarray;
1691 const PetscScalar *x,*xb;
1692 const MatScalar *v;
1693 PetscErrorCode ierr;
1694 PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2;
1695 PetscInt k;
1696 PetscBool usecprow=a->compressedrow.use;
1697 const PetscInt *idx,*ii,*ridx=NULL((void*)0);
1698
1699 __m256d a0,a1,a2,a3,a4,a5;
1700 __m256d w0,w1,w2,w3;
1701 __m256d z0,z1,z2;
1702 __m256i mask1 = _mm256_set_epi64x(0LL, 0LL, 0LL, 1LL<<63);
1703
1704 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1704; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1705 ierr = VecCopy(yy,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1705,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1706 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1706,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1707 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1707,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1708
1709 idx = a->j;
1710 v = a->a;
1711 if (usecprow) {
1712 mbs = a->compressedrow.nrows;
1713 ii = a->compressedrow.i;
1714 ridx = a->compressedrow.rindex;
1715 } else {
1716 mbs = a->mbs;
1717 ii = a->i;
1718 z = zarray;
1719 }
1720
1721 if (!a->mult_work) {
1722 k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap->
n) : (A->rmap->n))
;
1723 ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,1723,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work
))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1723,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1724 }
1725
1726 work = a->mult_work;
1727 for (i=0; i<mbs; i++) {
1728 n = ii[1] - ii[0]; ii++;
1729 workt = work;
1730 for (j=0; j<n; j++) {
1731 xb = x + bs*(*idx++);
1732 for (k=0; k<bs; k++) workt[k] = xb[k];
1733 workt += bs;
1734 }
1735 if (usecprow) z = zarray + bs*ridx[i];
1736
1737 z0 = _mm256_loadu_pd(&z[ 0]); z1 = _mm256_loadu_pd(&z[ 4]); z2 = _mm256_set1_pd(z[ 8]);
1738
1739 for (j=0; j<n; j++) {
1740 // first column of a
1741 w0 = _mm256_set1_pd(work[j*9 ]);
1742 a0 = _mm256_loadu_pd(&v[j*81 ]); z0 = _mm256_fmadd_pd(a0,w0,z0);
1743 a1 = _mm256_loadu_pd(&v[j*81+4]); z1 = _mm256_fmadd_pd(a1,w0,z1);
1744 a2 = _mm256_loadu_pd(&v[j*81+8]); z2 = _mm256_fmadd_pd(a2,w0,z2);
1745
1746 // second column of a
1747 w1 = _mm256_set1_pd(work[j*9+ 1]);
1748 a0 = _mm256_loadu_pd(&v[j*81+ 9]); z0 = _mm256_fmadd_pd(a0,w1,z0);
1749 a1 = _mm256_loadu_pd(&v[j*81+13]); z1 = _mm256_fmadd_pd(a1,w1,z1);
1750 a2 = _mm256_loadu_pd(&v[j*81+17]); z2 = _mm256_fmadd_pd(a2,w1,z2);
1751
1752 // third column of a
1753 w2 = _mm256_set1_pd(work[j*9+ 2]);
1754 a3 = _mm256_loadu_pd(&v[j*81+18]); z0 = _mm256_fmadd_pd(a3,w2,z0);
1755 a4 = _mm256_loadu_pd(&v[j*81+22]); z1 = _mm256_fmadd_pd(a4,w2,z1);
1756 a5 = _mm256_loadu_pd(&v[j*81+26]); z2 = _mm256_fmadd_pd(a5,w2,z2);
1757
1758 // fourth column of a
1759 w3 = _mm256_set1_pd(work[j*9+ 3]);
1760 a0 = _mm256_loadu_pd(&v[j*81+27]); z0 = _mm256_fmadd_pd(a0,w3,z0);
1761 a1 = _mm256_loadu_pd(&v[j*81+31]); z1 = _mm256_fmadd_pd(a1,w3,z1);
1762 a2 = _mm256_loadu_pd(&v[j*81+35]); z2 = _mm256_fmadd_pd(a2,w3,z2);
1763
1764 // fifth column of a
1765 w0 = _mm256_set1_pd(work[j*9+ 4]);
1766 a3 = _mm256_loadu_pd(&v[j*81+36]); z0 = _mm256_fmadd_pd(a3,w0,z0);
1767 a4 = _mm256_loadu_pd(&v[j*81+40]); z1 = _mm256_fmadd_pd(a4,w0,z1);
1768 a5 = _mm256_loadu_pd(&v[j*81+44]); z2 = _mm256_fmadd_pd(a5,w0,z2);
1769
1770 // sixth column of a
1771 w1 = _mm256_set1_pd(work[j*9+ 5]);
1772 a0 = _mm256_loadu_pd(&v[j*81+45]); z0 = _mm256_fmadd_pd(a0,w1,z0);
1773 a1 = _mm256_loadu_pd(&v[j*81+49]); z1 = _mm256_fmadd_pd(a1,w1,z1);
1774 a2 = _mm256_loadu_pd(&v[j*81+53]); z2 = _mm256_fmadd_pd(a2,w1,z2);
1775
1776 // seventh column of a
1777 w2 = _mm256_set1_pd(work[j*9+ 6]);
1778 a0 = _mm256_loadu_pd(&v[j*81+54]); z0 = _mm256_fmadd_pd(a0,w2,z0);
1779 a1 = _mm256_loadu_pd(&v[j*81+58]); z1 = _mm256_fmadd_pd(a1,w2,z1);
1780 a2 = _mm256_loadu_pd(&v[j*81+62]); z2 = _mm256_fmadd_pd(a2,w2,z2);
1781
1782 // eigth column of a
1783 w3 = _mm256_set1_pd(work[j*9+ 7]);
1784 a3 = _mm256_loadu_pd(&v[j*81+63]); z0 = _mm256_fmadd_pd(a3,w3,z0);
1785 a4 = _mm256_loadu_pd(&v[j*81+67]); z1 = _mm256_fmadd_pd(a4,w3,z1);
1786 a5 = _mm256_loadu_pd(&v[j*81+71]); z2 = _mm256_fmadd_pd(a5,w3,z2);
1787
1788 // ninth column of a
1789 w0 = _mm256_set1_pd(work[j*9+ 8]);
1790 a0 = _mm256_loadu_pd(&v[j*81+72]); z0 = _mm256_fmadd_pd(a0,w0,z0);
1791 a1 = _mm256_loadu_pd(&v[j*81+76]); z1 = _mm256_fmadd_pd(a1,w0,z1);
1792 a2 = _mm256_maskload_pd(&v[j*81+80],mask1); z2 = _mm256_fmadd_pd(a2,w0,z2);
1793 }
1794
1795 _mm256_storeu_pd(&z[ 0], z0); _mm256_storeu_pd(&z[ 4], z1); _mm256_maskstore_pd(&z[8], mask1, z2);
1796
1797 v += n*bs2;
1798 if (!usecprow) z += bs;
1799 }
1800 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1800,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1801 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1801,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1802 ierr = PetscLogFlops(162.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1802,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1803 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1804}
1805#endif
1806
1807PetscErrorCode MatMultAdd_SeqBAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1808{
1809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1810 PetscScalar *y = 0,*z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11;
1811 const PetscScalar *x,*xb;
1812 PetscScalar x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,*yarray,*zarray;
1813 const MatScalar *v;
1814 PetscErrorCode ierr;
1815 PetscInt mbs = a->mbs,i,j,n;
1816 const PetscInt *idx,*ii,*ridx = NULL((void*)0);
1817 PetscBool usecprow=a->compressedrow.use;
1818
1819 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1819; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1820 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1820,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1821 ierr = VecGetArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1821,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1822
1823 idx = a->j;
1824 v = a->a;
1825 if (usecprow) {
1826 if (zz != yy) {
1827 ierr = PetscArraycpy(zarray,yarray,7*mbs)((sizeof(*(zarray)) != sizeof(*(yarray))) || PetscMemcpy(zarray
,yarray,(7*mbs)*sizeof(*(zarray))))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1827,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1828 }
1829 mbs = a->compressedrow.nrows;
1830 ii = a->compressedrow.i;
1831 ridx = a->compressedrow.rindex;
1832 } else {
1833 ii = a->i;
1834 y = yarray;
1835 z = zarray;
1836 }
1837
1838 for (i=0; i<mbs; i++) {
1839 n = ii[1] - ii[0]; ii++;
1840 if (usecprow) {
1841 z = zarray + 11*ridx[i];
1842 y = yarray + 11*ridx[i];
1843 }
1844 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1845 sum8 = y[7]; sum9 = y[8]; sum10 = y[9]; sum11 = y[10];
1846 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
1847 PetscPrefetchBlock(v+121*n,121*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+121*n),*_end = (const char
*)((v+121*n)+(121*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
1848 for (j=0; j<n; j++) {
1849 xb = x + 11*(*idx++);
1850 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];x8 = xb[7]; x9 = xb[8]; x10 = xb[9]; x11 = xb[10];
1851 sum1 += v[ 0]*x1 + v[ 11]*x2 + v[2*11]*x3 + v[3*11]*x4 + v[4*11]*x5 + v[5*11]*x6 + v[6*11]*x7+ v[7*11]*x8 + v[8*11]*x9 + v[9*11]*x10 + v[10*11]*x11;
1852 sum2 += v[1+0]*x1 + v[1+11]*x2 + v[1+2*11]*x3 + v[1+3*11]*x4 + v[1+4*11]*x5 + v[1+5*11]*x6 + v[1+6*11]*x7+ v[1+7*11]*x8 + v[1+8*11]*x9 + v[1+9*11]*x10 + v[1+10*11]*x11;
1853 sum3 += v[2+0]*x1 + v[2+11]*x2 + v[2+2*11]*x3 + v[2+3*11]*x4 + v[2+4*11]*x5 + v[2+5*11]*x6 + v[2+6*11]*x7+ v[2+7*11]*x8 + v[2+8*11]*x9 + v[2+9*11]*x10 + v[2+10*11]*x11;
1854 sum4 += v[3+0]*x1 + v[3+11]*x2 + v[3+2*11]*x3 + v[3+3*11]*x4 + v[3+4*11]*x5 + v[3+5*11]*x6 + v[3+6*11]*x7+ v[3+7*11]*x8 + v[3+8*11]*x9 + v[3+9*11]*x10 + v[3+10*11]*x11;
1855 sum5 += v[4+0]*x1 + v[4+11]*x2 + v[4+2*11]*x3 + v[4+3*11]*x4 + v[4+4*11]*x5 + v[4+5*11]*x6 + v[4+6*11]*x7+ v[4+7*11]*x8 + v[4+8*11]*x9 + v[4+9*11]*x10 + v[4+10*11]*x11;
1856 sum6 += v[5+0]*x1 + v[5+11]*x2 + v[5+2*11]*x3 + v[5+3*11]*x4 + v[5+4*11]*x5 + v[5+5*11]*x6 + v[5+6*11]*x7+ v[5+7*11]*x8 + v[5+8*11]*x9 + v[5+9*11]*x10 + v[5+10*11]*x11;
1857 sum7 += v[6+0]*x1 + v[6+11]*x2 + v[6+2*11]*x3 + v[6+3*11]*x4 + v[6+4*11]*x5 + v[6+5*11]*x6 + v[6+6*11]*x7+ v[6+7*11]*x8 + v[6+8*11]*x9 + v[6+9*11]*x10 + v[6+10*11]*x11;
1858 sum8 += v[7+0]*x1 + v[7+11]*x2 + v[7+2*11]*x3 + v[7+3*11]*x4 + v[7+4*11]*x5 + v[7+5*11]*x6 + v[7+6*11]*x7+ v[7+7*11]*x8 + v[7+8*11]*x9 + v[7+9*11]*x10 + v[7+10*11]*x11;
1859 sum9 += v[8+0]*x1 + v[8+11]*x2 + v[8+2*11]*x3 + v[8+3*11]*x4 + v[8+4*11]*x5 + v[8+5*11]*x6 + v[8+6*11]*x7+ v[8+7*11]*x8 + v[8+8*11]*x9 + v[8+9*11]*x10 + v[8+10*11]*x11;
1860 sum10 += v[9+0]*x1 + v[9+11]*x2 + v[9+2*11]*x3 + v[9+3*11]*x4 + v[9+4*11]*x5 + v[9+5*11]*x6 + v[9+6*11]*x7+ v[9+7*11]*x8 + v[9+8*11]*x9 + v[9+9*11]*x10 + v[9+10*11]*x11;
1861 sum11 += v[10+0]*x1 + v[10+11]*x2 + v[10+2*11]*x3 + v[10+3*11]*x4 + v[10+4*11]*x5 + v[10+5*11]*x6 + v[10+6*11]*x7+ v[10+7*11]*x8 + v[10+8*11]*x9 + v[10+9*11]*x10 + v[10+10*11]*x11;
1862 v += 121;
1863 }
1864 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1865 z[7] = sum6; z[8] = sum7; z[9] = sum8; z[10] = sum9; z[11] = sum10;
1866 if (!usecprow) {
1867 z += 11; y += 11;
1868 }
1869 }
1870 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1870,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1871 ierr = VecRestoreArrayPair(yy,zz,&yarray,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1871,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1872 ierr = PetscLogFlops(242.0*a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1872,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1873 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1874}
1875
1876PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1877{
1878 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1879 PetscScalar *z = 0,*work,*workt,*zarray;
1880 const PetscScalar *x,*xb;
1881 const MatScalar *v;
1882 PetscErrorCode ierr;
1883 PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2;
1884 PetscInt ncols,k;
1885 const PetscInt *ridx = NULL((void*)0),*idx,*ii;
1886 PetscBool usecprow = a->compressedrow.use;
1887
1888 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1888; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1889 ierr = VecCopy(yy,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1889,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1890 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1890,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1891 ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1891,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1892
1893 idx = a->j;
1894 v = a->a;
1895 if (usecprow) {
1896 mbs = a->compressedrow.nrows;
1897 ii = a->compressedrow.i;
1898 ridx = a->compressedrow.rindex;
1899 } else {
1900 mbs = a->mbs;
1901 ii = a->i;
1902 z = zarray;
1903 }
1904
1905 if (!a->mult_work) {
1906 k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap->
n) : (A->rmap->n))
;
1907 ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,1907,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work
))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1907,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1908 }
1909 work = a->mult_work;
1910 for (i=0; i<mbs; i++) {
1911 n = ii[1] - ii[0]; ii++;
1912 ncols = n*bs;
1913 workt = work;
1914 for (j=0; j<n; j++) {
1915 xb = x + bs*(*idx++);
1916 for (k=0; k<bs; k++) workt[k] = xb[k];
1917 workt += bs;
1918 }
1919 if (usecprow) z = zarray + bs*ridx[i];
1920 PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z){ PetscScalar _one = 1.0; PetscBLASInt _ione = 1,_bbs,_bncols
; PetscErrorCode _ierr; _ierr = PetscBLASIntCast(bs,&_bbs
);do {if (__builtin_expect(!!(_ierr),0)) return PetscError(((
MPI_Comm)0x44000001),1920,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_ierr,PETSC_ERROR_REPEAT," ");} while (0); _ierr = PetscBLASIntCast
(ncols,&_bncols);do {if (__builtin_expect(!!(_ierr),0)) return
PetscError(((MPI_Comm)0x44000001),1920,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_ierr,PETSC_ERROR_REPEAT," ");} while (0); do { do { ; if (petscstack
&& (petscstack->currentsize < 64)) { petscstack
->function[petscstack->currentsize] = "BLASgemv"; petscstack
->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1920; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_TRUE || petscstack->hotdepth); } ; } while (0);
dgemv_("N",&(_bbs),&(_bncols),&_one,v,&(_bbs
),work,&_ione,&_one,z,&_ione); do { do {PetscErrorCode
_7_ierr = PetscMallocValidate(1920,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
);do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError(
((MPI_Comm)0x44000001),1920,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do {
; if (petscstack && petscstack->currentsize > 0
) { petscstack->currentsize--; petscstack->function[petscstack
->currentsize] = 0; petscstack->file[petscstack->currentsize
] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; }
if (petscstack) { petscstack->hotdepth = (((petscstack->
hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ;
} while (0); } while (0); } while(0); }
;
1921 /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1922 v += n*bs2;
1923 if (!usecprow) z += bs;
1924 }
1925 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1925,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1926 ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1926,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1927 ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1927,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1928 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1929}
1930
1931PetscErrorCode MatMultHermitianTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1932{
1933 PetscScalar zero = 0.0;
1934 PetscErrorCode ierr;
1935
1936 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1936; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1937 ierr = VecSet(zz,zero);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1937,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1938 ierr = MatMultHermitianTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1938,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1939 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1940}
1941
1942PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1943{
1944 PetscScalar zero = 0.0;
1945 PetscErrorCode ierr;
1946
1947 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1947; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1948 ierr = VecSet(zz,zero);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1948,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1949 ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1949,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1950 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
1951}
1952
1953PetscErrorCode MatMultHermitianTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1954{
1955 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1956 PetscScalar *z,x1,x2,x3,x4,x5;
1957 const PetscScalar *x,*xb = NULL((void*)0);
1958 const MatScalar *v;
1959 PetscErrorCode ierr;
1960 PetscInt mbs,i,rval,bs=A->rmap->bs,j,n;
1961 const PetscInt *idx,*ii,*ib,*ridx = NULL((void*)0);
1962 Mat_CompressedRow cprow = a->compressedrow;
1963 PetscBool usecprow = cprow.use;
1964
1965 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 1965; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
1966 if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1966,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
; }
1967 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1967,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1968 ierr = VecGetArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),1968,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
1969
1970 idx = a->j;
1971 v = a->a;
1972 if (usecprow) {
1973 mbs = cprow.nrows;
1974 ii = cprow.i;
1975 ridx = cprow.rindex;
1976 } else {
1977 mbs=a->mbs;
1978 ii = a->i;
1979 xb = x;
1980 }
1981
1982 switch (bs) {
1983 case 1:
1984 for (i=0; i<mbs; i++) {
1985 if (usecprow) xb = x + ridx[i];
1986 x1 = xb[0];
1987 ib = idx + ii[0];
1988 n = ii[1] - ii[0]; ii++;
1989 for (j=0; j<n; j++) {
1990 rval = ib[j];
1991 z[rval] += PetscConj(*v)(*v) * x1;
1992 v++;
1993 }
1994 if (!usecprow) xb++;
1995 }
1996 break;
1997 case 2:
1998 for (i=0; i<mbs; i++) {
1999 if (usecprow) xb = x + 2*ridx[i];
2000 x1 = xb[0]; x2 = xb[1];
2001 ib = idx + ii[0];
2002 n = ii[1] - ii[0]; ii++;
2003 for (j=0; j<n; j++) {
2004 rval = ib[j]*2;
2005 z[rval++] += PetscConj(v[0])(v[0])*x1 + PetscConj(v[1])(v[1])*x2;
2006 z[rval++] += PetscConj(v[2])(v[2])*x1 + PetscConj(v[3])(v[3])*x2;
2007 v += 4;
2008 }
2009 if (!usecprow) xb += 2;
2010 }
2011 break;
2012 case 3:
2013 for (i=0; i<mbs; i++) {
2014 if (usecprow) xb = x + 3*ridx[i];
2015 x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
2016 ib = idx + ii[0];
2017 n = ii[1] - ii[0]; ii++;
2018 for (j=0; j<n; j++) {
2019 rval = ib[j]*3;
2020 z[rval++] += PetscConj(v[0])(v[0])*x1 + PetscConj(v[1])(v[1])*x2 + PetscConj(v[2])(v[2])*x3;
2021 z[rval++] += PetscConj(v[3])(v[3])*x1 + PetscConj(v[4])(v[4])*x2 + PetscConj(v[5])(v[5])*x3;
2022 z[rval++] += PetscConj(v[6])(v[6])*x1 + PetscConj(v[7])(v[7])*x2 + PetscConj(v[8])(v[8])*x3;
2023 v += 9;
2024 }
2025 if (!usecprow) xb += 3;
2026 }
2027 break;
2028 case 4:
2029 for (i=0; i<mbs; i++) {
2030 if (usecprow) xb = x + 4*ridx[i];
2031 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
2032 ib = idx + ii[0];
2033 n = ii[1] - ii[0]; ii++;
2034 for (j=0; j<n; j++) {
2035 rval = ib[j]*4;
2036 z[rval++] += PetscConj(v[0])(v[0])*x1 + PetscConj(v[1])(v[1])*x2 + PetscConj(v[2])(v[2])*x3 + PetscConj(v[3])(v[3])*x4;
2037 z[rval++] += PetscConj(v[4])(v[4])*x1 + PetscConj(v[5])(v[5])*x2 + PetscConj(v[6])(v[6])*x3 + PetscConj(v[7])(v[7])*x4;
2038 z[rval++] += PetscConj(v[8])(v[8])*x1 + PetscConj(v[9])(v[9])*x2 + PetscConj(v[10])(v[10])*x3 + PetscConj(v[11])(v[11])*x4;
2039 z[rval++] += PetscConj(v[12])(v[12])*x1 + PetscConj(v[13])(v[13])*x2 + PetscConj(v[14])(v[14])*x3 + PetscConj(v[15])(v[15])*x4;
2040 v += 16;
2041 }
2042 if (!usecprow) xb += 4;
2043 }
2044 break;
2045 case 5:
2046 for (i=0; i<mbs; i++) {
2047 if (usecprow) xb = x + 5*ridx[i];
2048 x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
2049 x4 = xb[3]; x5 = xb[4];
2050 ib = idx + ii[0];
2051 n = ii[1] - ii[0]; ii++;
2052 for (j=0; j<n; j++) {
2053 rval = ib[j]*5;
2054 z[rval++] += PetscConj(v[0])(v[0])*x1 + PetscConj(v[1])(v[1])*x2 + PetscConj(v[2])(v[2])*x3 + PetscConj(v[3])(v[3])*x4 + PetscConj(v[4])(v[4])*x5;
2055 z[rval++] += PetscConj(v[5])(v[5])*x1 + PetscConj(v[6])(v[6])*x2 + PetscConj(v[7])(v[7])*x3 + PetscConj(v[8])(v[8])*x4 + PetscConj(v[9])(v[9])*x5;
2056 z[rval++] += PetscConj(v[10])(v[10])*x1 + PetscConj(v[11])(v[11])*x2 + PetscConj(v[12])(v[12])*x3 + PetscConj(v[13])(v[13])*x4 + PetscConj(v[14])(v[14])*x5;
2057 z[rval++] += PetscConj(v[15])(v[15])*x1 + PetscConj(v[16])(v[16])*x2 + PetscConj(v[17])(v[17])*x3 + PetscConj(v[18])(v[18])*x4 + PetscConj(v[19])(v[19])*x5;
2058 z[rval++] += PetscConj(v[20])(v[20])*x1 + PetscConj(v[21])(v[21])*x2 + PetscConj(v[22])(v[22])*x3 + PetscConj(v[23])(v[23])*x4 + PetscConj(v[24])(v[24])*x5;
2059 v += 25;
2060 }
2061 if (!usecprow) xb += 5;
2062 }
2063 break;
2064 default: /* block sizes larger than 5 by 5 are handled by BLAS */
2065 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size larger than 5 is not supported yet")return PetscError(((MPI_Comm)0x44000001),2065,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,56,PETSC_ERROR_INITIAL,"block size larger than 5 is not supported yet"
)
;
2066#if 0
2067 {
2068 PetscInt ncols,k,bs2=a->bs2;
2069 PetscScalar *work,*workt,zb;
2070 const PetscScalar *xtmp;
2071 if (!a->mult_work) {
2072 k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap->
n) : (A->rmap->n))
;
2073 ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,2073,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work
))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2073,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2074 }
2075 work = a->mult_work;
2076 xtmp = x;
2077 for (i=0; i<mbs; i++) {
2078 n = ii[1] - ii[0]; ii++;
2079 ncols = n*bs;
2080 ierr = PetscArrayzero(work,ncols)PetscMemzero(work,(ncols)*sizeof(*(work)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2080,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2081 if (usecprow) xtmp = x + bs*ridx[i];
2082 PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work){ PetscScalar _one = 1.0; PetscBLASInt _ione = 1,_bbs,_bncols
; PetscErrorCode _ierr; _ierr = PetscBLASIntCast(bs,&_bbs
);do {if (__builtin_expect(!!(_ierr),0)) return PetscError(((
MPI_Comm)0x44000001),2082,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_ierr,PETSC_ERROR_REPEAT," ");} while (0); _ierr = PetscBLASIntCast
(ncols,&_bncols);do {if (__builtin_expect(!!(_ierr),0)) return
PetscError(((MPI_Comm)0x44000001),2082,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_ierr,PETSC_ERROR_REPEAT," ");} while (0); do { do { ; if (petscstack
&& (petscstack->currentsize < 64)) { petscstack
->function[petscstack->currentsize] = "BLASgemv"; petscstack
->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2082; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_TRUE || petscstack->hotdepth); } ; } while (0);
dgemv_("T",&_bbs,&_bncols,&_one,v,&_bbs,xtmp
,&_ione,&_one,work,&_ione); do { do {PetscErrorCode
_7_ierr = PetscMallocValidate(2082,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
);do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError(
((MPI_Comm)0x44000001),2082,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do {
; if (petscstack && petscstack->currentsize > 0
) { petscstack->currentsize--; petscstack->function[petscstack
->currentsize] = 0; petscstack->file[petscstack->currentsize
] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; }
if (petscstack) { petscstack->hotdepth = (((petscstack->
hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ;
} while (0); } while (0); } while(0); }
;
2083 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
2084 v += n*bs2;
2085 if (!usecprow) xtmp += bs;
2086 workt = work;
2087 for (j=0; j<n; j++) {
2088 zb = z + bs*(*idx++);
2089 for (k=0; k<bs; k++) zb[k] += workt[k] ;
2090 workt += bs;
2091 }
2092 }
2093 }
2094#endif
2095 }
2096 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2096,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2097 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2097,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2098 ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2098,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2099 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2100}
2101
2102PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
2103{
2104 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2105 PetscScalar *zb,*z,x1,x2,x3,x4,x5;
2106 const PetscScalar *x,*xb = 0;
2107 const MatScalar *v;
2108 PetscErrorCode ierr;
2109 PetscInt mbs,i,rval,bs=A->rmap->bs,j,n,bs2=a->bs2;
2110 const PetscInt *idx,*ii,*ib,*ridx = NULL((void*)0);
2111 Mat_CompressedRow cprow = a->compressedrow;
2112 PetscBool usecprow=cprow.use;
2113
2114 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2114; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2115 if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2115,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
; }
2116 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2116,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2117 ierr = VecGetArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2117,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2118
2119 idx = a->j;
2120 v = a->a;
2121 if (usecprow) {
2122 mbs = cprow.nrows;
2123 ii = cprow.i;
2124 ridx = cprow.rindex;
2125 } else {
2126 mbs=a->mbs;
2127 ii = a->i;
2128 xb = x;
2129 }
2130
2131 switch (bs) {
2132 case 1:
2133 for (i=0; i<mbs; i++) {
2134 if (usecprow) xb = x + ridx[i];
2135 x1 = xb[0];
2136 ib = idx + ii[0];
2137 n = ii[1] - ii[0]; ii++;
2138 for (j=0; j<n; j++) {
2139 rval = ib[j];
2140 z[rval] += *v * x1;
2141 v++;
2142 }
2143 if (!usecprow) xb++;
2144 }
2145 break;
2146 case 2:
2147 for (i=0; i<mbs; i++) {
2148 if (usecprow) xb = x + 2*ridx[i];
2149 x1 = xb[0]; x2 = xb[1];
2150 ib = idx + ii[0];
2151 n = ii[1] - ii[0]; ii++;
2152 for (j=0; j<n; j++) {
2153 rval = ib[j]*2;
2154 z[rval++] += v[0]*x1 + v[1]*x2;
2155 z[rval++] += v[2]*x1 + v[3]*x2;
2156 v += 4;
2157 }
2158 if (!usecprow) xb += 2;
2159 }
2160 break;
2161 case 3:
2162 for (i=0; i<mbs; i++) {
2163 if (usecprow) xb = x + 3*ridx[i];
2164 x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
2165 ib = idx + ii[0];
2166 n = ii[1] - ii[0]; ii++;
2167 for (j=0; j<n; j++) {
2168 rval = ib[j]*3;
2169 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
2170 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
2171 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
2172 v += 9;
2173 }
2174 if (!usecprow) xb += 3;
2175 }
2176 break;
2177 case 4:
2178 for (i=0; i<mbs; i++) {
2179 if (usecprow) xb = x + 4*ridx[i];
2180 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
2181 ib = idx + ii[0];
2182 n = ii[1] - ii[0]; ii++;
2183 for (j=0; j<n; j++) {
2184 rval = ib[j]*4;
2185 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
2186 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
2187 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
2188 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
2189 v += 16;
2190 }
2191 if (!usecprow) xb += 4;
2192 }
2193 break;
2194 case 5:
2195 for (i=0; i<mbs; i++) {
2196 if (usecprow) xb = x + 5*ridx[i];
2197 x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
2198 x4 = xb[3]; x5 = xb[4];
2199 ib = idx + ii[0];
2200 n = ii[1] - ii[0]; ii++;
2201 for (j=0; j<n; j++) {
2202 rval = ib[j]*5;
2203 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
2204 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
2205 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
2206 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
2207 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
2208 v += 25;
2209 }
2210 if (!usecprow) xb += 5;
2211 }
2212 break;
2213 default: { /* block sizes larger then 5 by 5 are handled by BLAS */
2214 PetscInt ncols,k;
2215 PetscScalar *work,*workt;
2216 const PetscScalar *xtmp;
2217 if (!a->mult_work) {
2218 k = PetscMax(A->rmap->n,A->cmap->n)(((A->rmap->n)<(A->cmap->n)) ? (A->cmap->
n) : (A->rmap->n))
;
2219 ierr = PetscMalloc1(k+1,&a->mult_work)PetscMallocA(1,PETSC_FALSE,2219,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(k+1)*sizeof(**(&a->mult_work)),(&a->mult_work
))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2219,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2220 }
2221 work = a->mult_work;
2222 xtmp = x;
2223 for (i=0; i<mbs; i++) {
2224 n = ii[1] - ii[0]; ii++;
2225 ncols = n*bs;
2226 ierr = PetscArrayzero(work,ncols)PetscMemzero(work,(ncols)*sizeof(*(work)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2226,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2227 if (usecprow) xtmp = x + bs*ridx[i];
2228 PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work){ PetscScalar _one = 1.0; PetscBLASInt _ione = 1,_bbs,_bncols
; PetscErrorCode _ierr; _ierr = PetscBLASIntCast(bs,&_bbs
);do {if (__builtin_expect(!!(_ierr),0)) return PetscError(((
MPI_Comm)0x44000001),2228,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_ierr,PETSC_ERROR_REPEAT," ");} while (0); _ierr = PetscBLASIntCast
(ncols,&_bncols);do {if (__builtin_expect(!!(_ierr),0)) return
PetscError(((MPI_Comm)0x44000001),2228,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_ierr,PETSC_ERROR_REPEAT," ");} while (0); do { do { ; if (petscstack
&& (petscstack->currentsize < 64)) { petscstack
->function[petscstack->currentsize] = "BLASgemv"; petscstack
->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2228; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_TRUE || petscstack->hotdepth); } ; } while (0);
dgemv_("T",&_bbs,&_bncols,&_one,v,&_bbs,xtmp
,&_ione,&_one,work,&_ione); do { do {PetscErrorCode
_7_ierr = PetscMallocValidate(2228,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
);do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError(
((MPI_Comm)0x44000001),2228,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do {
; if (petscstack && petscstack->currentsize > 0
) { petscstack->currentsize--; petscstack->function[petscstack
->currentsize] = 0; petscstack->file[petscstack->currentsize
] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; }
if (petscstack) { petscstack->hotdepth = (((petscstack->
hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ;
} while (0); } while (0); } while(0); }
;
2229 /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
2230 v += n*bs2;
2231 if (!usecprow) xtmp += bs;
2232 workt = work;
2233 for (j=0; j<n; j++) {
2234 zb = z + bs*(*idx++);
2235 for (k=0; k<bs; k++) zb[k] += workt[k];
2236 workt += bs;
2237 }
2238 }
2239 }
2240 }
2241 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2241,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2242 ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2242,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2243 ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2243,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2244 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2245}
2246
2247PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
2248{
2249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2250 PetscInt totalnz = a->bs2*a->nz;
2251 PetscScalar oalpha = alpha;
2252 PetscErrorCode ierr;
2253 PetscBLASInt one = 1,tnz;
2254
2255 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2255; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2256 ierr = PetscBLASIntCast(totalnz,&tnz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2256,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2257 PetscStackCallBLAS("BLASscal",BLASscal_(&tnz,&oalpha,a->a,&one))do { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = "BLASscal"; petscstack->file[petscstack->currentsize
] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2257; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_TRUE || petscstack->hotdepth); } ; } while (0);
dscal_(&tnz,&oalpha,a->a,&one); do { do {PetscErrorCode
_7_ierr = PetscMallocValidate(2257,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
);do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError(
((MPI_Comm)0x44000001),2257,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do {
; if (petscstack && petscstack->currentsize > 0
) { petscstack->currentsize--; petscstack->function[petscstack
->currentsize] = 0; petscstack->file[petscstack->currentsize
] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; }
if (petscstack) { petscstack->hotdepth = (((petscstack->
hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ;
} while (0); } while (0); } while(0)
;
2258 ierr = PetscLogFlops(totalnz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2258,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2259 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2260}
2261
2262PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
2263{
2264 PetscErrorCode ierr;
2265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2266 MatScalar *v = a->a;
2267 PetscReal sum = 0.0;
2268 PetscInt i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1;
2269
2270 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2270; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2271 if (type == NORM_FROBENIUS) {
2272#if defined(PETSC_USE_REAL___FP16)
2273 PetscBLASInt one = 1,cnt = bs2*nz;
2274 *norm = BLASnrm2_dnrm2_(&cnt,v,&one);
2275#else
2276 for (i=0; i<bs2*nz; i++) {
2277 sum += PetscRealPart(PetscConj(*v)*(*v))((*v)*(*v)); v++;
2278 }
2279#endif
2280 *norm = PetscSqrtReal(sum)sqrt(sum);
2281 ierr = PetscLogFlops(2*bs2*nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2281,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2282 } else if (type == NORM_1) { /* maximum column sum */
2283 PetscReal *tmp;
2284 PetscInt *bcol = a->j;
2285 ierr = PetscCalloc1(A->cmap->n+1,&tmp)PetscMallocA(1,PETSC_TRUE,2285,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,(size_t)(A->cmap->n+1)*sizeof(**(&tmp)),(&tmp)
)
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2285,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2286 for (i=0; i<nz; i++) {
2287 for (j=0; j<bs; j++) {
2288 k1 = bs*(*bcol) + j; /* column index */
2289 for (k=0; k<bs; k++) {
2290 tmp[k1] += PetscAbsScalar(*v); v++;
2291 }
2292 }
2293 bcol++;
2294 }
2295 *norm = 0.0;
2296 for (j=0; j<A->cmap->n; j++) {
2297 if (tmp[j] > *norm) *norm = tmp[j];
2298 }
2299 ierr = PetscFree(tmp)((*PetscTrFree)((void*)(tmp),2299,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
) || ((tmp) = 0,0))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2299,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2300 ierr = PetscLogFlops(PetscMax(bs2*nz-1,0)(((bs2*nz-1)<(0)) ? (0) : (bs2*nz-1)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2300,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2301 } else if (type == NORM_INFINITY) { /* maximum row sum */
2302 *norm = 0.0;
2303 for (k=0; k<bs; k++) {
2304 for (j=0; j<a->mbs; j++) {
2305 v = a->a + bs2*a->i[j] + k;
2306 sum = 0.0;
2307 for (i=0; i<a->i[j+1]-a->i[j]; i++) {
2308 for (k1=0; k1<bs; k1++) {
2309 sum += PetscAbsScalar(*v);
2310 v += bs;
2311 }
2312 }
2313 if (sum > *norm) *norm = sum;
2314 }
2315 }
2316 ierr = PetscLogFlops(PetscMax(bs2*nz-1,0)(((bs2*nz-1)<(0)) ? (0) : (bs2*nz-1)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2316,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2317 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet")return PetscError(((MPI_Comm)0x44000001),2317,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,56,PETSC_ERROR_INITIAL,"No support for this norm yet")
;
2318 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2319}
2320
2321
2322PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscBool * flg)
2323{
2324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data;
2325 PetscErrorCode ierr;
2326
2327 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2327; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2328 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
2329 if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
2330 *flg = PETSC_FALSE;
2331 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2332 }
2333
2334 /* if the a->i are the same */
2335 ierr = PetscArraycmp(a->i,b->i,a->mbs+1,flg)((sizeof(*(a->i)) != sizeof(*(b->i))) || PetscMemcmp(a->
i,b->i,(a->mbs+1)*sizeof(*(a->i)),flg))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2335,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2336 if (!*flg) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2337
2338 /* if a->j are the same */
2339 ierr = PetscArraycmp(a->j,b->j,a->nz,flg)((sizeof(*(a->j)) != sizeof(*(b->j))) || PetscMemcmp(a->
j,b->j,(a->nz)*sizeof(*(a->j)),flg))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2339,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2340 if (!*flg) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2341
2342 /* if a->a are the same */
2343 ierr = PetscArraycmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs),flg)((sizeof(*(a->a)) != sizeof(*(b->a))) || PetscMemcmp(a->
a,b->a,((a->nz)*(A->rmap->bs)*(B->rmap->bs)
)*sizeof(*(a->a)),flg))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2343,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2344 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2345
2346}
2347
2348PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
2349{
2350 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2351 PetscErrorCode ierr;
2352 PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
2353 PetscScalar *x,zero = 0.0;
2354 MatScalar *aa,*aa_j;
2355
2356 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2356; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2357 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix")return PetscError(((MPI_Comm)0x44000001),2357,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,73,PETSC_ERROR_INITIAL,"Not for factored matrix")
;
2358 bs = A->rmap->bs;
2359 aa = a->a;
2360 ai = a->i;
2361 aj = a->j;
2362 ambs = a->mbs;
2363 bs2 = a->bs2;
2364
2365 ierr = VecSet(v,zero);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2365,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2366 ierr = VecGetArray(v,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2366,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2367 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2367,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2368 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector")return PetscError(((MPI_Comm)0x44000001),2368,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Nonconforming matrix and vector")
;
2369 for (i=0; i<ambs; i++) {
2370 for (j=ai[i]; j<ai[i+1]; j++) {
2371 if (aj[j] == i) {
2372 row = i*bs;
2373 aa_j = aa+j*bs2;
2374 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
2375 break;
2376 }
2377 }
2378 }
2379 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2379,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2380 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2381}
2382
2383PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
2384{
2385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2386 const PetscScalar *l,*r,*li,*ri;
2387 PetscScalar x;
2388 MatScalar *aa, *v;
2389 PetscErrorCode ierr;
2390 PetscInt i,j,k,lm,rn,M,m,n,mbs,tmp,bs,bs2,iai;
2391 const PetscInt *ai,*aj;
2392
2393 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2393; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2394 ai = a->i;
2395 aj = a->j;
2396 aa = a->a;
2397 m = A->rmap->n;
2398 n = A->cmap->n;
2399 bs = A->rmap->bs;
2400 mbs = a->mbs;
2401 bs2 = a->bs2;
2402 if (ll) {
2403 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2403,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2404 ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2404,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2405 if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length")return PetscError(((MPI_Comm)0x44000001),2405,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Left scaling vector wrong length")
;
2406 for (i=0; i<mbs; i++) { /* for each block row */
2407 M = ai[i+1] - ai[i];
2408 li = l + i*bs;
2409 v = aa + bs2*ai[i];
2410 for (j=0; j<M; j++) { /* for each block */
2411 for (k=0; k<bs2; k++) {
2412 (*v++) *= li[k%bs];
2413 }
2414 }
2415 }
2416 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2416,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2417 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2417,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2418 }
2419
2420 if (rr) {
2421 ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2421,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2422 ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2422,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2423 if (rn != n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length")return PetscError(((MPI_Comm)0x44000001),2423,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Right scaling vector wrong length")
;
2424 for (i=0; i<mbs; i++) { /* for each block row */
2425 iai = ai[i];
2426 M = ai[i+1] - iai;
2427 v = aa + bs2*iai;
2428 for (j=0; j<M; j++) { /* for each block */
2429 ri = r + bs*aj[iai+j];
2430 for (k=0; k<bs; k++) {
2431 x = ri[k];
2432 for (tmp=0; tmp<bs; tmp++) v[tmp] *= x;
2433 v += bs;
2434 }
2435 }
2436 }
2437 ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2437,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2438 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2438,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2439 }
2440 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2441}
2442
2443
2444PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
2445{
2446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2447
2448 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2448; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2449 info->block_size = a->bs2;
2450 info->nz_allocated = a->bs2*a->maxnz;
2451 info->nz_used = a->bs2*a->nz;
2452 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
2453 info->assemblies = A->num_ass;
2454 info->mallocs = A->info.mallocs;
2455 info->memory = ((PetscObject)A)->mem;
2456 if (A->factortype) {
2457 info->fill_ratio_given = A->info.fill_ratio_given;
2458 info->fill_ratio_needed = A->info.fill_ratio_needed;
2459 info->factor_mallocs = A->info.factor_mallocs;
2460 } else {
2461 info->fill_ratio_given = 0;
2462 info->fill_ratio_needed = 0;
2463 info->factor_mallocs = 0;
2464 }
2465 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2466}
2467
2468PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
2469{
2470 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2471 PetscErrorCode ierr;
2472
2473 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2473; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2474 ierr = PetscArrayzero(a->a,a->bs2*a->i[a->mbs])PetscMemzero(a->a,(a->bs2*a->i[a->mbs])*sizeof(*(
a->a)))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2474,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2475 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2476}
2477
2478PETSC_INTERNextern __attribute__((visibility ("hidden"))) PetscErrorCode MatMatMult_SeqBAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2479{
2480 PetscErrorCode ierr;
2481
2482 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2482; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2483 if (scall == MAT_INITIAL_MATRIX) {
2484 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog
->curStage].perfInfo.active && petsc_stageLog->
stageInfo[petsc_stageLog->curStage].eventLog->eventInfo
[MAT_MatMultSymbolic].active) ? (*PetscLogPLB)((MAT_MatMultSymbolic
),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject
)(0)) : 0 ))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2484,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2485 ierr = MatMatMultSymbolic_SeqBAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2485,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2486 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog
->curStage].perfInfo.active && petsc_stageLog->
stageInfo[petsc_stageLog->curStage].eventLog->eventInfo
[MAT_MatMultSymbolic].active) ? (*PetscLogPLE)((MAT_MatMultSymbolic
),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject
)(0)) : 0 ))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2486,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2487 }
2488 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog
->curStage].perfInfo.active && petsc_stageLog->
stageInfo[petsc_stageLog->curStage].eventLog->eventInfo
[MAT_MatMultNumeric].active) ? (*PetscLogPLB)((MAT_MatMultNumeric
),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject
)(0)) : 0 ))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2488,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2489 ierr = MatMatMultNumeric_SeqBAIJ_SeqDense(A,B,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2489,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2490 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog
->curStage].perfInfo.active && petsc_stageLog->
stageInfo[petsc_stageLog->curStage].eventLog->eventInfo
[MAT_MatMultNumeric].active) ? (*PetscLogPLE)((MAT_MatMultNumeric
),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject
)(0)) : 0 ))
;CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2490,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2491 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2492}
2493
2494PetscErrorCode MatMatMultSymbolic_SeqBAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C)
2495{
2496 PetscErrorCode ierr;
2497
2498 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2498; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2499 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2499,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2500
2501 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqBAIJ_SeqDense;
2502 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2503}
2504
2505PetscErrorCode MatMatMult_SeqBAIJ_2_Private(Mat A,PetscScalar* b,PetscInt bm,PetscScalar* c,PetscInt cm,PetscInt cn)
2506{
2507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2508 PetscScalar *z = 0,sum1,sum2,*zarray;
2509 const PetscScalar *x,*xb;
2510 PetscScalar x1,x2;
2511 const MatScalar *v,*vv;
2512 PetscErrorCode ierr;
2513 PetscInt mbs,i,*idx,*ii,j,*jj,n,k,*ridx=NULL((void*)0);
2514 PetscBool usecprow=a->compressedrow.use;
2515
2516 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2516; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2517
2518 idx = a->j;
2519 v = a->a;
2520 if (usecprow) {
2521 mbs = a->compressedrow.nrows;
2522 ii = a->compressedrow.i;
2523 ridx = a->compressedrow.rindex;
2524 } else {
2525 mbs = a->mbs;
2526 ii = a->i;
2527 z = c;
2528 }
2529
2530 for (i=0; i<mbs; i++) {
2531 n = ii[1] - ii[0]; ii++;
2532 PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(idx+n),*_end = (const char
*)((idx+n)+(n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Indices for the next row (assumes same size as this one) */
2533 PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA)do { const char *_p = (const char*)(v+4*n),*_end = (const char
*)((v+4*n)+(4*n)); for ( ; _p < _end; _p += 64) (__builtin_prefetch
((void *)((const char*)(_p)), ((((0))) >> 2) & 1, (
((0))) & 0x3)); } while (0)
; /* Entries for the next row */
2534 if (usecprow) z = c + 2*ridx[i];
2535 jj = idx;
2536 vv = v;
2537 for (k=0; k<cn; k++) {
2538 idx = jj;
2539 v = vv;
2540 sum1 = 0.0; sum2 = 0.0;
2541 for (j=0; j<n; j++) {
2542 xb = b + 2*(*idx++); x1 = xb[0+k*cm]; x2 = xb[1+k*cm];
2543 sum1 += v[0]*x1 + v[2]*x2;
2544 sum2 += v[1]*x1 + v[3]*x2;
2545 v += 4;
2546 }
2547 z[0+k*cm] = sum1; z[1+k*cm] = sum2;
2548 }
2549 if (!usecprow) z += 2;
2550 }
2551 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2552}
2553
2554PetscErrorCode MatMatMultNumeric_SeqBAIJ_SeqDense(Mat A,Mat B,Mat C)
2555{
2556 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2557 Mat_SeqDense *bd = (Mat_SeqDense*)B->data;
2558 Mat_SeqDense *cd = (Mat_SeqDense*)B->data;
2559 PetscErrorCode ierr;
2560 PetscInt cm=cd->lda,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n;
2561 PetscInt mbs,i,bs=A->rmap->bs,j,n,bs2=a->bs2;
2562 PetscBLASInt bbs,bcn,bbm,bcm;
2563 PetscScalar *z = 0;
2564 PetscScalar *c,*b;
2565 const MatScalar *v;
2566 const PetscInt *idx,*ii,*ridx=NULL((void*)0);
2567 PetscScalar _DZero=0.0,_DOne=1.0;
2568 PetscBool usecprow=a->compressedrow.use;
2569
2570 PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = __func__; petscstack->file[petscstack->currentsize]
= "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2570; petscstack
->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_FALSE || petscstack->hotdepth); } ; } while (0)
; ; } while (0)
;
2571 if (!cm || !cn) PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2572 if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n)return PetscError(((MPI_Comm)0x44000001),2572,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Number columns in A %D not equal rows in B %D\n"
,A->cmap->n,B->rmap->n)
;
2573 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n)return PetscError(((MPI_Comm)0x44000001),2573,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Number rows in C %D not equal rows in A %D\n"
,C->rmap->n,A->rmap->n)
;
2574 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n)return PetscError(((MPI_Comm)0x44000001),2574,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,60,PETSC_ERROR_INITIAL,"Number columns in B %D not equal columns in C %D\n"
,B->cmap->n,C->cmap->n)
;
2575 b = bd->v;
2576 if (a->nonzerorowcnt != A->rmap->n) {
2577 ierr = MatZeroEntries(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2577,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2578 }
2579 ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2579,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2580 if (bs == 2) {
2581 ierr = MatMatMult_SeqBAIJ_2_Private(A, b, bm, c, cm, cn);
Value stored to 'ierr' is never read
2582 }
2583 else {
2584 ierr = PetscBLASIntCast(bs,&bbs);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2584,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2585 ierr = PetscBLASIntCast(cn,&bcn);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2585,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2586 ierr = PetscBLASIntCast(bm,&bbm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2586,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2587 ierr = PetscBLASIntCast(cm,&bcm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2587,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2588 idx = a->j;
2589 v = a->a;
2590 if (usecprow) {
2591 mbs = a->compressedrow.nrows;
2592 ii = a->compressedrow.i;
2593 ridx = a->compressedrow.rindex;
2594 } else {
2595 mbs = a->mbs;
2596 ii = a->i;
2597 z = c;
2598 }
2599 for (i=0; i<mbs; i++) {
2600 n = ii[1] - ii[0]; ii++;
2601 if (usecprow) z = c + bs*ridx[i];
2602 if (n) {
2603 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&bbs,&bcn,&bbs,&_DOne,v,&bbs,b+bs*(*idx++),&bbm,&_DZero,z,&bcm))do { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = "BLASgemm"; petscstack->file[petscstack->currentsize
] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2603; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_TRUE || petscstack->hotdepth); } ; } while (0);
dgemm_("N","N",&bbs,&bcn,&bbs,&_DOne,v,&
bbs,b+bs*(*idx++),&bbm,&_DZero,z,&bcm); do { do {
PetscErrorCode _7_ierr = PetscMallocValidate(2603,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
);do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError(
((MPI_Comm)0x44000001),2603,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do {
; if (petscstack && petscstack->currentsize > 0
) { petscstack->currentsize--; petscstack->function[petscstack
->currentsize] = 0; petscstack->file[petscstack->currentsize
] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; }
if (petscstack) { petscstack->hotdepth = (((petscstack->
hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ;
} while (0); } while (0); } while(0)
;
2604 v += bs2;
2605 }
2606 for (j=1; j<n; j++) {
2607 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&bbs,&bcn,&bbs,&_DOne,v,&bbs,b+bs*(*idx++),&bbm,&_DOne,z,&bcm))do { do { ; if (petscstack && (petscstack->currentsize
< 64)) { petscstack->function[petscstack->currentsize
] = "BLASgemm"; petscstack->file[petscstack->currentsize
] = "/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
; petscstack->line[petscstack->currentsize] = 2607; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; petscstack
->currentsize++; } if (petscstack) { petscstack->hotdepth
+= (PETSC_TRUE || petscstack->hotdepth); } ; } while (0);
dgemm_("N","N",&bbs,&bcn,&bbs,&_DOne,v,&
bbs,b+bs*(*idx++),&bbm,&_DOne,z,&bcm); do { do {PetscErrorCode
_7_ierr = PetscMallocValidate(2607,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
);do {if (__builtin_expect(!!(_7_ierr),0)) return PetscError(
((MPI_Comm)0x44000001),2607,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,_7_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0); do {
; if (petscstack && petscstack->currentsize > 0
) { petscstack->currentsize--; petscstack->function[petscstack
->currentsize] = 0; petscstack->file[petscstack->currentsize
] = 0; petscstack->line[petscstack->currentsize] = 0; petscstack
->petscroutine[petscstack->currentsize] = PETSC_FALSE; }
if (petscstack) { petscstack->hotdepth = (((petscstack->
hotdepth-1)<(0)) ? (0) : (petscstack->hotdepth-1)); } ;
} while (0); } while (0); } while(0)
;
2608 v += bs2;
2609 }
2610 if (!usecprow) z += bs;
2611 }
2612 }
2613 ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2613,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2614 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2614,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2615 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2615,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2616 ierr = PetscLogFlops((2.0*a->nz*bs2 - bs*a->nonzerorowcnt)*cn);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm
)0x44000001),2616,__func__,"/sandbox/petsc/petsc.next/src/mat/impls/baij/seq/baij2.c"
,ierr,PETSC_ERROR_REPEAT," ");} while (0)
;
2617 PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize
> 0) { petscstack->currentsize--; petscstack->function
[petscstack->currentsize] = 0; petscstack->file[petscstack
->currentsize] = 0; petscstack->line[petscstack->currentsize
] = 0; petscstack->petscroutine[petscstack->currentsize
] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth =
(((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack->
hotdepth-1)); } ; } while (0); return(0);} while (0)
;
2618}