Actual source code: sell.c


  2: /*
  3:   Defines the basic matrix operations for the SELL matrix storage format.
  4: */
  5: #include <../src/mat/impls/sell/seq/sell.h>
  6: #include <petscblaslapack.h>
  7: #include <petsc/private/kernels/blocktranspose.h>

  9: static PetscBool  cited      = PETSC_FALSE;
 10: static const char citation[] = "@inproceedings{ZhangELLPACK2018,\n"
 11:                                " author = {Hong Zhang and Richard T. Mills and Karl Rupp and Barry F. Smith},\n"
 12:                                " title = {Vectorized Parallel Sparse Matrix-Vector Multiplication in {PETSc} Using {AVX-512}},\n"
 13:                                " booktitle = {Proceedings of the 47th International Conference on Parallel Processing},\n"
 14:                                " year = 2018\n"
 15:                                "}\n";

 17: #if defined(PETSC_HAVE_IMMINTRIN_H) && (defined(__AVX512F__) || (defined(__AVX2__) && defined(__FMA__)) || defined(__AVX__)) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)

 19:   #include <immintrin.h>

 21:   #if !defined(_MM_SCALE_8)
 22:     #define _MM_SCALE_8 8
 23:   #endif

 25:   #if defined(__AVX512F__)
 26:     /* these do not work
 27:    vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
 28:    vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
 29:   */
 30:     #define AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y) \
 31:       /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
 32:       vec_idx  = _mm256_loadu_si256((__m256i const *)acolidx); \
 33:       vec_vals = _mm512_loadu_pd(aval); \
 34:       vec_x    = _mm512_i32gather_pd(vec_idx, x, _MM_SCALE_8); \
 35:       vec_y    = _mm512_fmadd_pd(vec_x, vec_vals, vec_y)
 36:   #elif defined(__AVX2__) && defined(__FMA__)
 37:     #define AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y) \
 38:       vec_vals = _mm256_loadu_pd(aval); \
 39:       vec_idx  = _mm_loadu_si128((__m128i const *)acolidx); /* SSE2 */ \
 40:       vec_x    = _mm256_i32gather_pd(x, vec_idx, _MM_SCALE_8); \
 41:       vec_y    = _mm256_fmadd_pd(vec_x, vec_vals, vec_y)
 42:   #endif
 43: #endif /* PETSC_HAVE_IMMINTRIN_H */

 45: /*@C
 46:  MatSeqSELLSetPreallocation - For good matrix assembly performance
 47:  the user should preallocate the matrix storage by setting the parameter nz
 48:  (or the array nnz).  By setting these parameters accurately, performance
 49:  during matrix assembly can be increased significantly.

 51:  Collective

 53:  Input Parameters:
 54:  +  B - The `MATSEQSELL` matrix
 55:  .  rlenmax - number of nonzeros per row (same for all rows)
 56:  -  rlen - array containing the number of nonzeros in the various rows
 57:  (possibly different for each row) or `NULL`

 59:  Level: intermediate

 61:  Notes:
 62:  If `rlen` is given then `rlenmax` is ignored.

 64:  Specify the preallocated storage with either `rlenmax` or `rlen` (not both).
 65:  Set `rlenmax` = `PETSC_DEFAULT` and `rlen` = `NULL` for PETSc to control dynamic memory
 66:  allocation.  For large problems you MUST preallocate memory or you
 67:  will get TERRIBLE performance, see the users' manual chapter on matrices.

 69:  You can call `MatGetInfo()` to get information on how effective the preallocation was;
 70:  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
 71:  You can also run with the option `-info` and look for messages with the string
 72:  malloc in them to see if additional memory allocation was needed.

 74:  Developer Note:
 75:  Use `rlenmax` of `MAT_SKIP_ALLOCATION` to not allocate any space for the matrix
 76:  entries or columns indices.

 78:  The maximum number of nonzeos in any row should be as accurate as possible.
 79:  If it is underestimated, you will get bad performance due to reallocation
 80:  (`MatSeqXSELLReallocateSELL()`).

 82:  .seealso: `Mat`, `MATSEQSELL`, `MATSELL`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MatGetInfo()`
 83:  @*/
 84: PetscErrorCode MatSeqSELLSetPreallocation(Mat B, PetscInt rlenmax, const PetscInt rlen[])
 85: {
 86:   PetscFunctionBegin;
 89:   PetscTryMethod(B, "MatSeqSELLSetPreallocation_C", (Mat, PetscInt, const PetscInt[]), (B, rlenmax, rlen));
 90:   PetscFunctionReturn(PETSC_SUCCESS);
 91: }

 93: PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B, PetscInt maxallocrow, const PetscInt rlen[])
 94: {
 95:   Mat_SeqSELL *b;
 96:   PetscInt     i, j, totalslices;
 97:   PetscBool    skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;

 99:   PetscFunctionBegin;
100:   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
101:   if (maxallocrow == MAT_SKIP_ALLOCATION) {
102:     skipallocation = PETSC_TRUE;
103:     maxallocrow    = 0;
104:   }

106:   PetscCall(PetscLayoutSetUp(B->rmap));
107:   PetscCall(PetscLayoutSetUp(B->cmap));

109:   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
110:   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
111:   PetscCheck(maxallocrow >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "maxallocrow cannot be less than 0: value %" PetscInt_FMT, maxallocrow);
112:   if (rlen) {
113:     for (i = 0; i < B->rmap->n; i++) {
114:       PetscCheck(rlen[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "rlen cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, rlen[i]);
115:       PetscCheck(rlen[i] <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "rlen cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, rlen[i], B->cmap->n);
116:     }
117:   }

119:   B->preallocated = PETSC_TRUE;

121:   b = (Mat_SeqSELL *)B->data;

123:   totalslices    = PetscCeilInt(B->rmap->n, 8);
124:   b->totalslices = totalslices;
125:   if (!skipallocation) {
126:     if (B->rmap->n & 0x07) PetscCall(PetscInfo(B, "Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %" PetscInt_FMT ")\n", B->rmap->n));

128:     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
129:       PetscCall(PetscMalloc1(totalslices + 1, &b->sliidx));
130:     }
131:     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
132:       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
133:       else if (maxallocrow < 0) maxallocrow = 1;
134:       for (i = 0; i <= totalslices; i++) b->sliidx[i] = i * 8 * maxallocrow;
135:     } else {
136:       maxallocrow  = 0;
137:       b->sliidx[0] = 0;
138:       for (i = 1; i < totalslices; i++) {
139:         b->sliidx[i] = 0;
140:         for (j = 0; j < 8; j++) b->sliidx[i] = PetscMax(b->sliidx[i], rlen[8 * (i - 1) + j]);
141:         maxallocrow = PetscMax(b->sliidx[i], maxallocrow);
142:         PetscCall(PetscIntSumError(b->sliidx[i - 1], 8 * b->sliidx[i], &b->sliidx[i]));
143:       }
144:       /* last slice */
145:       b->sliidx[totalslices] = 0;
146:       for (j = (totalslices - 1) * 8; j < B->rmap->n; j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices], rlen[j]);
147:       maxallocrow            = PetscMax(b->sliidx[totalslices], maxallocrow);
148:       b->sliidx[totalslices] = b->sliidx[totalslices - 1] + 8 * b->sliidx[totalslices];
149:     }

151:     /* allocate space for val, colidx, rlen */
152:     /* FIXME: should B's old memory be unlogged? */
153:     PetscCall(MatSeqXSELLFreeSELL(B, &b->val, &b->colidx));
154:     /* FIXME: assuming an element of the bit array takes 8 bits */
155:     PetscCall(PetscMalloc2(b->sliidx[totalslices], &b->val, b->sliidx[totalslices], &b->colidx));
156:     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
157:     PetscCall(PetscCalloc1(8 * totalslices, &b->rlen));

159:     b->singlemalloc = PETSC_TRUE;
160:     b->free_val     = PETSC_TRUE;
161:     b->free_colidx  = PETSC_TRUE;
162:   } else {
163:     b->free_val    = PETSC_FALSE;
164:     b->free_colidx = PETSC_FALSE;
165:   }

167:   b->nz               = 0;
168:   b->maxallocrow      = maxallocrow;
169:   b->rlenmax          = maxallocrow;
170:   b->maxallocmat      = b->sliidx[totalslices];
171:   B->info.nz_unneeded = (double)b->maxallocmat;
172:   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
173:   PetscFunctionReturn(PETSC_SUCCESS);
174: }

176: PetscErrorCode MatGetRow_SeqSELL(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
177: {
178:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
179:   PetscInt     shift;

181:   PetscFunctionBegin;
182:   PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
183:   if (nz) *nz = a->rlen[row];
184:   shift = a->sliidx[row >> 3] + (row & 0x07);
185:   if (!a->getrowcols) PetscCall(PetscMalloc2(a->rlenmax, &a->getrowcols, a->rlenmax, &a->getrowvals));
186:   if (idx) {
187:     PetscInt j;
188:     for (j = 0; j < a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift + 8 * j];
189:     *idx = a->getrowcols;
190:   }
191:   if (v) {
192:     PetscInt j;
193:     for (j = 0; j < a->rlen[row]; j++) a->getrowvals[j] = a->val[shift + 8 * j];
194:     *v = a->getrowvals;
195:   }
196:   PetscFunctionReturn(PETSC_SUCCESS);
197: }

199: PetscErrorCode MatRestoreRow_SeqSELL(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
200: {
201:   PetscFunctionBegin;
202:   PetscFunctionReturn(PETSC_SUCCESS);
203: }

205: PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
206: {
207:   Mat          B;
208:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
209:   PetscInt     i;

211:   PetscFunctionBegin;
212:   if (reuse == MAT_REUSE_MATRIX) {
213:     B = *newmat;
214:     PetscCall(MatZeroEntries(B));
215:   } else {
216:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
217:     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
218:     PetscCall(MatSetType(B, MATSEQAIJ));
219:     PetscCall(MatSeqAIJSetPreallocation(B, 0, a->rlen));
220:   }

222:   for (i = 0; i < A->rmap->n; i++) {
223:     PetscInt     nz = 0, *cols = NULL;
224:     PetscScalar *vals = NULL;

226:     PetscCall(MatGetRow_SeqSELL(A, i, &nz, &cols, &vals));
227:     PetscCall(MatSetValues(B, 1, &i, nz, cols, vals, INSERT_VALUES));
228:     PetscCall(MatRestoreRow_SeqSELL(A, i, &nz, &cols, &vals));
229:   }

231:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
232:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
233:   B->rmap->bs = A->rmap->bs;

235:   if (reuse == MAT_INPLACE_MATRIX) {
236:     PetscCall(MatHeaderReplace(A, &B));
237:   } else {
238:     *newmat = B;
239:   }
240:   PetscFunctionReturn(PETSC_SUCCESS);
241: }

243: #include <../src/mat/impls/aij/seq/aij.h>

245: PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
246: {
247:   Mat                B;
248:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
249:   PetscInt          *ai = a->i, m = A->rmap->N, n = A->cmap->N, i, *rowlengths, row, ncols;
250:   const PetscInt    *cols;
251:   const PetscScalar *vals;

253:   PetscFunctionBegin;

255:   if (reuse == MAT_REUSE_MATRIX) {
256:     B = *newmat;
257:   } else {
258:     if (PetscDefined(USE_DEBUG) || !a->ilen) {
259:       PetscCall(PetscMalloc1(m, &rowlengths));
260:       for (i = 0; i < m; i++) rowlengths[i] = ai[i + 1] - ai[i];
261:     }
262:     if (PetscDefined(USE_DEBUG) && a->ilen) {
263:       PetscBool eq;
264:       PetscCall(PetscMemcmp(rowlengths, a->ilen, m * sizeof(PetscInt), &eq));
265:       PetscCheck(eq, PETSC_COMM_SELF, PETSC_ERR_PLIB, "SeqAIJ ilen array incorrect");
266:       PetscCall(PetscFree(rowlengths));
267:       rowlengths = a->ilen;
268:     } else if (a->ilen) rowlengths = a->ilen;
269:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
270:     PetscCall(MatSetSizes(B, m, n, m, n));
271:     PetscCall(MatSetType(B, MATSEQSELL));
272:     PetscCall(MatSeqSELLSetPreallocation(B, 0, rowlengths));
273:     if (rowlengths != a->ilen) PetscCall(PetscFree(rowlengths));
274:   }

276:   for (row = 0; row < m; row++) {
277:     PetscCall(MatGetRow_SeqAIJ(A, row, &ncols, (PetscInt **)&cols, (PetscScalar **)&vals));
278:     PetscCall(MatSetValues_SeqSELL(B, 1, &row, ncols, cols, vals, INSERT_VALUES));
279:     PetscCall(MatRestoreRow_SeqAIJ(A, row, &ncols, (PetscInt **)&cols, (PetscScalar **)&vals));
280:   }
281:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
282:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
283:   B->rmap->bs = A->rmap->bs;

285:   if (reuse == MAT_INPLACE_MATRIX) {
286:     PetscCall(MatHeaderReplace(A, &B));
287:   } else {
288:     *newmat = B;
289:   }
290:   PetscFunctionReturn(PETSC_SUCCESS);
291: }

293: PetscErrorCode MatMult_SeqSELL(Mat A, Vec xx, Vec yy)
294: {
295:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
296:   PetscScalar       *y;
297:   const PetscScalar *x;
298:   const MatScalar   *aval        = a->val;
299:   PetscInt           totalslices = a->totalslices;
300:   const PetscInt    *acolidx     = a->colidx;
301:   PetscInt           i, j;
302: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
303:   __m512d  vec_x, vec_y, vec_vals;
304:   __m256i  vec_idx;
305:   __mmask8 mask;
306:   __m512d  vec_x2, vec_y2, vec_vals2, vec_x3, vec_y3, vec_vals3, vec_x4, vec_y4, vec_vals4;
307:   __m256i  vec_idx2, vec_idx3, vec_idx4;
308: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
309:   __m128i   vec_idx;
310:   __m256d   vec_x, vec_y, vec_y2, vec_vals;
311:   MatScalar yval;
312:   PetscInt  r, rows_left, row, nnz_in_row;
313: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
314:   __m128d   vec_x_tmp;
315:   __m256d   vec_x, vec_y, vec_y2, vec_vals;
316:   MatScalar yval;
317:   PetscInt  r, rows_left, row, nnz_in_row;
318: #else
319:   PetscScalar sum[8];
320: #endif

322: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
323:   #pragma disjoint(*x, *y, *aval)
324: #endif

326:   PetscFunctionBegin;
327:   PetscCall(VecGetArrayRead(xx, &x));
328:   PetscCall(VecGetArray(yy, &y));
329: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
330:   for (i = 0; i < totalslices; i++) { /* loop over slices */
331:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
332:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);

334:     vec_y  = _mm512_setzero_pd();
335:     vec_y2 = _mm512_setzero_pd();
336:     vec_y3 = _mm512_setzero_pd();
337:     vec_y4 = _mm512_setzero_pd();

339:     j = a->sliidx[i] >> 3; /* 8 bytes are read at each time, corresponding to a slice column */
340:     switch ((a->sliidx[i + 1] - a->sliidx[i]) / 8 & 3) {
341:     case 3:
342:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
343:       acolidx += 8;
344:       aval += 8;
345:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
346:       acolidx += 8;
347:       aval += 8;
348:       AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3);
349:       acolidx += 8;
350:       aval += 8;
351:       j += 3;
352:       break;
353:     case 2:
354:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
355:       acolidx += 8;
356:       aval += 8;
357:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
358:       acolidx += 8;
359:       aval += 8;
360:       j += 2;
361:       break;
362:     case 1:
363:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
364:       acolidx += 8;
365:       aval += 8;
366:       j += 1;
367:       break;
368:     }
369:   #pragma novector
370:     for (; j < (a->sliidx[i + 1] >> 3); j += 4) {
371:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
372:       acolidx += 8;
373:       aval += 8;
374:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
375:       acolidx += 8;
376:       aval += 8;
377:       AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3);
378:       acolidx += 8;
379:       aval += 8;
380:       AVX512_Mult_Private(vec_idx4, vec_x4, vec_vals4, vec_y4);
381:       acolidx += 8;
382:       aval += 8;
383:     }

385:     vec_y = _mm512_add_pd(vec_y, vec_y2);
386:     vec_y = _mm512_add_pd(vec_y, vec_y3);
387:     vec_y = _mm512_add_pd(vec_y, vec_y4);
388:     if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
389:       mask = (__mmask8)(0xff >> (8 - (A->rmap->n & 0x07)));
390:       _mm512_mask_storeu_pd(&y[8 * i], mask, vec_y);
391:     } else {
392:       _mm512_storeu_pd(&y[8 * i], vec_y);
393:     }
394:   }
395: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
396:   for (i = 0; i < totalslices; i++) { /* loop over full slices */
397:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
398:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);

400:     /* last slice may have padding rows. Don't use vectorization. */
401:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
402:       rows_left = A->rmap->n - 8 * i;
403:       for (r = 0; r < rows_left; ++r) {
404:         yval       = (MatScalar)0;
405:         row        = 8 * i + r;
406:         nnz_in_row = a->rlen[row];
407:         for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]];
408:         y[row] = yval;
409:       }
410:       break;
411:     }

413:     vec_y  = _mm256_setzero_pd();
414:     vec_y2 = _mm256_setzero_pd();

416:   /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
417:   #pragma novector
418:   #pragma unroll(2)
419:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
420:       AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
421:       aval += 4;
422:       acolidx += 4;
423:       AVX2_Mult_Private(vec_idx, vec_x, vec_vals, vec_y2);
424:       aval += 4;
425:       acolidx += 4;
426:     }

428:     _mm256_storeu_pd(y + i * 8, vec_y);
429:     _mm256_storeu_pd(y + i * 8 + 4, vec_y2);
430:   }
431: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
432:   for (i = 0; i < totalslices; i++) { /* loop over full slices */
433:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
434:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);

436:     vec_y  = _mm256_setzero_pd();
437:     vec_y2 = _mm256_setzero_pd();

439:     /* last slice may have padding rows. Don't use vectorization. */
440:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
441:       rows_left = A->rmap->n - 8 * i;
442:       for (r = 0; r < rows_left; ++r) {
443:         yval       = (MatScalar)0;
444:         row        = 8 * i + r;
445:         nnz_in_row = a->rlen[row];
446:         for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]];
447:         y[row] = yval;
448:       }
449:       break;
450:     }

452:   /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
453:   #pragma novector
454:   #pragma unroll(2)
455:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
456:       vec_vals  = _mm256_loadu_pd(aval);
457:       vec_x_tmp = _mm_setzero_pd();
458:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
459:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
460:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0);
461:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
462:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
463:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1);
464:       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y);
465:       aval += 4;

467:       vec_vals  = _mm256_loadu_pd(aval);
468:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
469:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
470:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0);
471:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
472:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
473:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1);
474:       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y2);
475:       aval += 4;
476:     }

478:     _mm256_storeu_pd(y + i * 8, vec_y);
479:     _mm256_storeu_pd(y + i * 8 + 4, vec_y2);
480:   }
481: #else
482:   for (i = 0; i < totalslices; i++) { /* loop over slices */
483:     for (j = 0; j < 8; j++) sum[j] = 0.0;
484:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
485:       sum[0] += aval[j] * x[acolidx[j]];
486:       sum[1] += aval[j + 1] * x[acolidx[j + 1]];
487:       sum[2] += aval[j + 2] * x[acolidx[j + 2]];
488:       sum[3] += aval[j + 3] * x[acolidx[j + 3]];
489:       sum[4] += aval[j + 4] * x[acolidx[j + 4]];
490:       sum[5] += aval[j + 5] * x[acolidx[j + 5]];
491:       sum[6] += aval[j + 6] * x[acolidx[j + 6]];
492:       sum[7] += aval[j + 7] * x[acolidx[j + 7]];
493:     }
494:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
495:       for (j = 0; j < (A->rmap->n & 0x07); j++) y[8 * i + j] = sum[j];
496:     } else {
497:       for (j = 0; j < 8; j++) y[8 * i + j] = sum[j];
498:     }
499:   }
500: #endif

502:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); /* theoretical minimal FLOPs */
503:   PetscCall(VecRestoreArrayRead(xx, &x));
504:   PetscCall(VecRestoreArray(yy, &y));
505:   PetscFunctionReturn(PETSC_SUCCESS);
506: }

508: #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
509: PetscErrorCode MatMultAdd_SeqSELL(Mat A, Vec xx, Vec yy, Vec zz)
510: {
511:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
512:   PetscScalar       *y, *z;
513:   const PetscScalar *x;
514:   const MatScalar   *aval        = a->val;
515:   PetscInt           totalslices = a->totalslices;
516:   const PetscInt    *acolidx     = a->colidx;
517:   PetscInt           i, j;
518: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
519:   __m512d  vec_x, vec_y, vec_vals;
520:   __m256i  vec_idx;
521:   __mmask8 mask;
522:   __m512d  vec_x2, vec_y2, vec_vals2, vec_x3, vec_y3, vec_vals3, vec_x4, vec_y4, vec_vals4;
523:   __m256i  vec_idx2, vec_idx3, vec_idx4;
524: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
525:   __m128d   vec_x_tmp;
526:   __m256d   vec_x, vec_y, vec_y2, vec_vals;
527:   MatScalar yval;
528:   PetscInt  r, row, nnz_in_row;
529: #else
530:   PetscScalar sum[8];
531: #endif

533: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
534:   #pragma disjoint(*x, *y, *aval)
535: #endif

537:   PetscFunctionBegin;
538:   PetscCall(VecGetArrayRead(xx, &x));
539:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
540: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
541:   for (i = 0; i < totalslices; i++) { /* loop over slices */
542:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
543:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);

545:     if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
546:       mask  = (__mmask8)(0xff >> (8 - (A->rmap->n & 0x07)));
547:       vec_y = _mm512_mask_loadu_pd(vec_y, mask, &y[8 * i]);
548:     } else {
549:       vec_y = _mm512_loadu_pd(&y[8 * i]);
550:     }
551:     vec_y2 = _mm512_setzero_pd();
552:     vec_y3 = _mm512_setzero_pd();
553:     vec_y4 = _mm512_setzero_pd();

555:     j = a->sliidx[i] >> 3; /* 8 bytes are read at each time, corresponding to a slice column */
556:     switch ((a->sliidx[i + 1] - a->sliidx[i]) / 8 & 3) {
557:     case 3:
558:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
559:       acolidx += 8;
560:       aval += 8;
561:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
562:       acolidx += 8;
563:       aval += 8;
564:       AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3);
565:       acolidx += 8;
566:       aval += 8;
567:       j += 3;
568:       break;
569:     case 2:
570:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
571:       acolidx += 8;
572:       aval += 8;
573:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
574:       acolidx += 8;
575:       aval += 8;
576:       j += 2;
577:       break;
578:     case 1:
579:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
580:       acolidx += 8;
581:       aval += 8;
582:       j += 1;
583:       break;
584:     }
585:   #pragma novector
586:     for (; j < (a->sliidx[i + 1] >> 3); j += 4) {
587:       AVX512_Mult_Private(vec_idx, vec_x, vec_vals, vec_y);
588:       acolidx += 8;
589:       aval += 8;
590:       AVX512_Mult_Private(vec_idx2, vec_x2, vec_vals2, vec_y2);
591:       acolidx += 8;
592:       aval += 8;
593:       AVX512_Mult_Private(vec_idx3, vec_x3, vec_vals3, vec_y3);
594:       acolidx += 8;
595:       aval += 8;
596:       AVX512_Mult_Private(vec_idx4, vec_x4, vec_vals4, vec_y4);
597:       acolidx += 8;
598:       aval += 8;
599:     }

601:     vec_y = _mm512_add_pd(vec_y, vec_y2);
602:     vec_y = _mm512_add_pd(vec_y, vec_y3);
603:     vec_y = _mm512_add_pd(vec_y, vec_y4);
604:     if (i == totalslices - 1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
605:       _mm512_mask_storeu_pd(&z[8 * i], mask, vec_y);
606:     } else {
607:       _mm512_storeu_pd(&z[8 * i], vec_y);
608:     }
609:   }
610: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
611:   for (i = 0; i < totalslices; i++) { /* loop over full slices */
612:     PetscPrefetchBlock(acolidx, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);
613:     PetscPrefetchBlock(aval, a->sliidx[i + 1] - a->sliidx[i], 0, PETSC_PREFETCH_HINT_T0);

615:     /* last slice may have padding rows. Don't use vectorization. */
616:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
617:       for (r = 0; r < (A->rmap->n & 0x07); ++r) {
618:         row        = 8 * i + r;
619:         yval       = (MatScalar)0.0;
620:         nnz_in_row = a->rlen[row];
621:         for (j = 0; j < nnz_in_row; ++j) yval += aval[8 * j + r] * x[acolidx[8 * j + r]];
622:         z[row] = y[row] + yval;
623:       }
624:       break;
625:     }

627:     vec_y  = _mm256_loadu_pd(y + 8 * i);
628:     vec_y2 = _mm256_loadu_pd(y + 8 * i + 4);

630:     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
631:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
632:       vec_vals  = _mm256_loadu_pd(aval);
633:       vec_x_tmp = _mm_setzero_pd();
634:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
635:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
636:       vec_x     = _mm256_setzero_pd();
637:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0);
638:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
639:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
640:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1);
641:       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y);
642:       aval += 4;

644:       vec_vals  = _mm256_loadu_pd(aval);
645:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
646:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
647:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 0);
648:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
649:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
650:       vec_x     = _mm256_insertf128_pd(vec_x, vec_x_tmp, 1);
651:       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x, vec_vals), vec_y2);
652:       aval += 4;
653:     }

655:     _mm256_storeu_pd(z + i * 8, vec_y);
656:     _mm256_storeu_pd(z + i * 8 + 4, vec_y2);
657:   }
658: #else
659:   for (i = 0; i < totalslices; i++) { /* loop over slices */
660:     for (j = 0; j < 8; j++) sum[j] = 0.0;
661:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
662:       sum[0] += aval[j] * x[acolidx[j]];
663:       sum[1] += aval[j + 1] * x[acolidx[j + 1]];
664:       sum[2] += aval[j + 2] * x[acolidx[j + 2]];
665:       sum[3] += aval[j + 3] * x[acolidx[j + 3]];
666:       sum[4] += aval[j + 4] * x[acolidx[j + 4]];
667:       sum[5] += aval[j + 5] * x[acolidx[j + 5]];
668:       sum[6] += aval[j + 6] * x[acolidx[j + 6]];
669:       sum[7] += aval[j + 7] * x[acolidx[j + 7]];
670:     }
671:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
672:       for (j = 0; j < (A->rmap->n & 0x07); j++) z[8 * i + j] = y[8 * i + j] + sum[j];
673:     } else {
674:       for (j = 0; j < 8; j++) z[8 * i + j] = y[8 * i + j] + sum[j];
675:     }
676:   }
677: #endif

679:   PetscCall(PetscLogFlops(2.0 * a->nz));
680:   PetscCall(VecRestoreArrayRead(xx, &x));
681:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
682:   PetscFunctionReturn(PETSC_SUCCESS);
683: }

685: PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A, Vec xx, Vec zz, Vec yy)
686: {
687:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
688:   PetscScalar       *y;
689:   const PetscScalar *x;
690:   const MatScalar   *aval    = a->val;
691:   const PetscInt    *acolidx = a->colidx;
692:   PetscInt           i, j, r, row, nnz_in_row, totalslices = a->totalslices;

694: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
695:   #pragma disjoint(*x, *y, *aval)
696: #endif

698:   PetscFunctionBegin;
699:   if (A->symmetric == PETSC_BOOL3_TRUE) {
700:     PetscCall(MatMultAdd_SeqSELL(A, xx, zz, yy));
701:     PetscFunctionReturn(PETSC_SUCCESS);
702:   }
703:   if (zz != yy) PetscCall(VecCopy(zz, yy));
704:   PetscCall(VecGetArrayRead(xx, &x));
705:   PetscCall(VecGetArray(yy, &y));
706:   for (i = 0; i < a->totalslices; i++) { /* loop over slices */
707:     if (i == totalslices - 1 && (A->rmap->n & 0x07)) {
708:       for (r = 0; r < (A->rmap->n & 0x07); ++r) {
709:         row        = 8 * i + r;
710:         nnz_in_row = a->rlen[row];
711:         for (j = 0; j < nnz_in_row; ++j) y[acolidx[8 * j + r]] += aval[8 * j + r] * x[row];
712:       }
713:       break;
714:     }
715:     for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j += 8) {
716:       y[acolidx[j]] += aval[j] * x[8 * i];
717:       y[acolidx[j + 1]] += aval[j + 1] * x[8 * i + 1];
718:       y[acolidx[j + 2]] += aval[j + 2] * x[8 * i + 2];
719:       y[acolidx[j + 3]] += aval[j + 3] * x[8 * i + 3];
720:       y[acolidx[j + 4]] += aval[j + 4] * x[8 * i + 4];
721:       y[acolidx[j + 5]] += aval[j + 5] * x[8 * i + 5];
722:       y[acolidx[j + 6]] += aval[j + 6] * x[8 * i + 6];
723:       y[acolidx[j + 7]] += aval[j + 7] * x[8 * i + 7];
724:     }
725:   }
726:   PetscCall(PetscLogFlops(2.0 * a->sliidx[a->totalslices]));
727:   PetscCall(VecRestoreArrayRead(xx, &x));
728:   PetscCall(VecRestoreArray(yy, &y));
729:   PetscFunctionReturn(PETSC_SUCCESS);
730: }

732: PetscErrorCode MatMultTranspose_SeqSELL(Mat A, Vec xx, Vec yy)
733: {
734:   PetscFunctionBegin;
735:   if (A->symmetric == PETSC_BOOL3_TRUE) {
736:     PetscCall(MatMult_SeqSELL(A, xx, yy));
737:   } else {
738:     PetscCall(VecSet(yy, 0.0));
739:     PetscCall(MatMultTransposeAdd_SeqSELL(A, xx, yy, yy));
740:   }
741:   PetscFunctionReturn(PETSC_SUCCESS);
742: }

744: /*
745:      Checks for missing diagonals
746: */
747: PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A, PetscBool *missing, PetscInt *d)
748: {
749:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
750:   PetscInt    *diag, i;

752:   PetscFunctionBegin;
753:   *missing = PETSC_FALSE;
754:   if (A->rmap->n > 0 && !(a->colidx)) {
755:     *missing = PETSC_TRUE;
756:     if (d) *d = 0;
757:     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
758:   } else {
759:     diag = a->diag;
760:     for (i = 0; i < A->rmap->n; i++) {
761:       if (diag[i] == -1) {
762:         *missing = PETSC_TRUE;
763:         if (d) *d = i;
764:         PetscCall(PetscInfo(A, "Matrix is missing diagonal number %" PetscInt_FMT "\n", i));
765:         break;
766:       }
767:     }
768:   }
769:   PetscFunctionReturn(PETSC_SUCCESS);
770: }

772: PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
773: {
774:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
775:   PetscInt     i, j, m = A->rmap->n, shift;

777:   PetscFunctionBegin;
778:   if (!a->diag) {
779:     PetscCall(PetscMalloc1(m, &a->diag));
780:     a->free_diag = PETSC_TRUE;
781:   }
782:   for (i = 0; i < m; i++) {                      /* loop over rows */
783:     shift      = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
784:     a->diag[i] = -1;
785:     for (j = 0; j < a->rlen[i]; j++) {
786:       if (a->colidx[shift + j * 8] == i) {
787:         a->diag[i] = shift + j * 8;
788:         break;
789:       }
790:     }
791:   }
792:   PetscFunctionReturn(PETSC_SUCCESS);
793: }

795: /*
796:   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
797: */
798: PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A, PetscScalar omega, PetscScalar fshift)
799: {
800:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
801:   PetscInt     i, *diag, m = A->rmap->n;
802:   MatScalar   *val = a->val;
803:   PetscScalar *idiag, *mdiag;

805:   PetscFunctionBegin;
806:   if (a->idiagvalid) PetscFunctionReturn(PETSC_SUCCESS);
807:   PetscCall(MatMarkDiagonal_SeqSELL(A));
808:   diag = a->diag;
809:   if (!a->idiag) {
810:     PetscCall(PetscMalloc3(m, &a->idiag, m, &a->mdiag, m, &a->ssor_work));
811:     val = a->val;
812:   }
813:   mdiag = a->mdiag;
814:   idiag = a->idiag;

816:   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
817:     for (i = 0; i < m; i++) {
818:       mdiag[i] = val[diag[i]];
819:       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
820:         PetscCheck(PetscRealPart(fshift), PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Zero diagonal on row %" PetscInt_FMT, i);
821:         PetscCall(PetscInfo(A, "Zero diagonal on row %" PetscInt_FMT "\n", i));
822:         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
823:         A->factorerror_zeropivot_value = 0.0;
824:         A->factorerror_zeropivot_row   = i;
825:       }
826:       idiag[i] = 1.0 / val[diag[i]];
827:     }
828:     PetscCall(PetscLogFlops(m));
829:   } else {
830:     for (i = 0; i < m; i++) {
831:       mdiag[i] = val[diag[i]];
832:       idiag[i] = omega / (fshift + val[diag[i]]);
833:     }
834:     PetscCall(PetscLogFlops(2.0 * m));
835:   }
836:   a->idiagvalid = PETSC_TRUE;
837:   PetscFunctionReturn(PETSC_SUCCESS);
838: }

840: PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
841: {
842:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;

844:   PetscFunctionBegin;
845:   PetscCall(PetscArrayzero(a->val, a->sliidx[a->totalslices]));
846:   PetscCall(MatSeqSELLInvalidateDiagonal(A));
847:   PetscFunctionReturn(PETSC_SUCCESS);
848: }

850: PetscErrorCode MatDestroy_SeqSELL(Mat A)
851: {
852:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;

854:   PetscFunctionBegin;
855: #if defined(PETSC_USE_LOG)
856:   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->n, A->cmap->n, a->nz));
857: #endif
858:   PetscCall(MatSeqXSELLFreeSELL(A, &a->val, &a->colidx));
859:   PetscCall(ISDestroy(&a->row));
860:   PetscCall(ISDestroy(&a->col));
861:   PetscCall(PetscFree(a->diag));
862:   PetscCall(PetscFree(a->rlen));
863:   PetscCall(PetscFree(a->sliidx));
864:   PetscCall(PetscFree3(a->idiag, a->mdiag, a->ssor_work));
865:   PetscCall(PetscFree(a->solve_work));
866:   PetscCall(ISDestroy(&a->icol));
867:   PetscCall(PetscFree(a->saved_values));
868:   PetscCall(PetscFree2(a->getrowcols, a->getrowvals));

870:   PetscCall(PetscFree(A->data));

872:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
873:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
874:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
875:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", NULL));
876:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLGetArray_C", NULL));
877:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqSELLRestoreArray_C", NULL));
878:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqsell_seqaij_C", NULL));
879:   PetscFunctionReturn(PETSC_SUCCESS);
880: }

882: PetscErrorCode MatSetOption_SeqSELL(Mat A, MatOption op, PetscBool flg)
883: {
884:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;

886:   PetscFunctionBegin;
887:   switch (op) {
888:   case MAT_ROW_ORIENTED:
889:     a->roworiented = flg;
890:     break;
891:   case MAT_KEEP_NONZERO_PATTERN:
892:     a->keepnonzeropattern = flg;
893:     break;
894:   case MAT_NEW_NONZERO_LOCATIONS:
895:     a->nonew = (flg ? 0 : 1);
896:     break;
897:   case MAT_NEW_NONZERO_LOCATION_ERR:
898:     a->nonew = (flg ? -1 : 0);
899:     break;
900:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
901:     a->nonew = (flg ? -2 : 0);
902:     break;
903:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
904:     a->nounused = (flg ? -1 : 0);
905:     break;
906:   case MAT_FORCE_DIAGONAL_ENTRIES:
907:   case MAT_IGNORE_OFF_PROC_ENTRIES:
908:   case MAT_USE_HASH_TABLE:
909:   case MAT_SORTED_FULL:
910:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
911:     break;
912:   case MAT_SPD:
913:   case MAT_SYMMETRIC:
914:   case MAT_STRUCTURALLY_SYMMETRIC:
915:   case MAT_HERMITIAN:
916:   case MAT_SYMMETRY_ETERNAL:
917:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
918:   case MAT_SPD_ETERNAL:
919:     /* These options are handled directly by MatSetOption() */
920:     break;
921:   default:
922:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
923:   }
924:   PetscFunctionReturn(PETSC_SUCCESS);
925: }

927: PetscErrorCode MatGetDiagonal_SeqSELL(Mat A, Vec v)
928: {
929:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
930:   PetscInt     i, j, n, shift;
931:   PetscScalar *x, zero = 0.0;

933:   PetscFunctionBegin;
934:   PetscCall(VecGetLocalSize(v, &n));
935:   PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");

937:   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
938:     PetscInt *diag = a->diag;
939:     PetscCall(VecGetArray(v, &x));
940:     for (i = 0; i < n; i++) x[i] = 1.0 / a->val[diag[i]];
941:     PetscCall(VecRestoreArray(v, &x));
942:     PetscFunctionReturn(PETSC_SUCCESS);
943:   }

945:   PetscCall(VecSet(v, zero));
946:   PetscCall(VecGetArray(v, &x));
947:   for (i = 0; i < n; i++) {                 /* loop over rows */
948:     shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
949:     x[i]  = 0;
950:     for (j = 0; j < a->rlen[i]; j++) {
951:       if (a->colidx[shift + j * 8] == i) {
952:         x[i] = a->val[shift + j * 8];
953:         break;
954:       }
955:     }
956:   }
957:   PetscCall(VecRestoreArray(v, &x));
958:   PetscFunctionReturn(PETSC_SUCCESS);
959: }

961: PetscErrorCode MatDiagonalScale_SeqSELL(Mat A, Vec ll, Vec rr)
962: {
963:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
964:   const PetscScalar *l, *r;
965:   PetscInt           i, j, m, n, row;

967:   PetscFunctionBegin;
968:   if (ll) {
969:     /* The local size is used so that VecMPI can be passed to this routine
970:        by MatDiagonalScale_MPISELL */
971:     PetscCall(VecGetLocalSize(ll, &m));
972:     PetscCheck(m == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector wrong length");
973:     PetscCall(VecGetArrayRead(ll, &l));
974:     for (i = 0; i < a->totalslices; i++) {                  /* loop over slices */
975:       if (i == a->totalslices - 1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
976:         for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
977:           if (row < (A->rmap->n & 0x07)) a->val[j] *= l[8 * i + row];
978:         }
979:       } else {
980:         for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) a->val[j] *= l[8 * i + row];
981:       }
982:     }
983:     PetscCall(VecRestoreArrayRead(ll, &l));
984:     PetscCall(PetscLogFlops(a->nz));
985:   }
986:   if (rr) {
987:     PetscCall(VecGetLocalSize(rr, &n));
988:     PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vector wrong length");
989:     PetscCall(VecGetArrayRead(rr, &r));
990:     for (i = 0; i < a->totalslices; i++) {                  /* loop over slices */
991:       if (i == a->totalslices - 1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
992:         for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
993:           if (row < (A->rmap->n & 0x07)) a->val[j] *= r[a->colidx[j]];
994:         }
995:       } else {
996:         for (j = a->sliidx[i]; j < a->sliidx[i + 1]; j++) a->val[j] *= r[a->colidx[j]];
997:       }
998:     }
999:     PetscCall(VecRestoreArrayRead(rr, &r));
1000:     PetscCall(PetscLogFlops(a->nz));
1001:   }
1002:   PetscCall(MatSeqSELLInvalidateDiagonal(A));
1003:   PetscFunctionReturn(PETSC_SUCCESS);
1004: }

1006: PetscErrorCode MatGetValues_SeqSELL(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
1007: {
1008:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1009:   PetscInt    *cp, i, k, low, high, t, row, col, l;
1010:   PetscInt     shift;
1011:   MatScalar   *vp;

1013:   PetscFunctionBegin;
1014:   for (k = 0; k < m; k++) { /* loop over requested rows */
1015:     row = im[k];
1016:     if (row < 0) continue;
1017:     PetscCheck(row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->n - 1);
1018:     shift = a->sliidx[row >> 3] + (row & 0x07); /* starting index of the row */
1019:     cp    = a->colidx + shift;                  /* pointer to the row */
1020:     vp    = a->val + shift;                     /* pointer to the row */
1021:     for (l = 0; l < n; l++) {                   /* loop over requested columns */
1022:       col = in[l];
1023:       if (col < 0) continue;
1024:       PetscCheck(col < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: row %" PetscInt_FMT " max %" PetscInt_FMT, col, A->cmap->n - 1);
1025:       high = a->rlen[row];
1026:       low  = 0; /* assume unsorted */
1027:       while (high - low > 5) {
1028:         t = (low + high) / 2;
1029:         if (*(cp + t * 8) > col) high = t;
1030:         else low = t;
1031:       }
1032:       for (i = low; i < high; i++) {
1033:         if (*(cp + 8 * i) > col) break;
1034:         if (*(cp + 8 * i) == col) {
1035:           *v++ = *(vp + 8 * i);
1036:           goto finished;
1037:         }
1038:       }
1039:       *v++ = 0.0;
1040:     finished:;
1041:     }
1042:   }
1043:   PetscFunctionReturn(PETSC_SUCCESS);
1044: }

1046: PetscErrorCode MatView_SeqSELL_ASCII(Mat A, PetscViewer viewer)
1047: {
1048:   Mat_SeqSELL      *a = (Mat_SeqSELL *)A->data;
1049:   PetscInt          i, j, m = A->rmap->n, shift;
1050:   const char       *name;
1051:   PetscViewerFormat format;

1053:   PetscFunctionBegin;
1054:   PetscCall(PetscViewerGetFormat(viewer, &format));
1055:   if (format == PETSC_VIEWER_ASCII_MATLAB) {
1056:     PetscInt nofinalvalue = 0;
1057:     /*
1058:     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1059:       nofinalvalue = 1;
1060:     }
1061:     */
1062:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1063:     PetscCall(PetscViewerASCIIPrintf(viewer, "%% Size = %" PetscInt_FMT " %" PetscInt_FMT " \n", m, A->cmap->n));
1064:     PetscCall(PetscViewerASCIIPrintf(viewer, "%% Nonzeros = %" PetscInt_FMT " \n", a->nz));
1065: #if defined(PETSC_USE_COMPLEX)
1066:     PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = zeros(%" PetscInt_FMT ",4);\n", a->nz + nofinalvalue));
1067: #else
1068:     PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = zeros(%" PetscInt_FMT ",3);\n", a->nz + nofinalvalue));
1069: #endif
1070:     PetscCall(PetscViewerASCIIPrintf(viewer, "zzz = [\n"));

1072:     for (i = 0; i < m; i++) {
1073:       shift = a->sliidx[i >> 3] + (i & 0x07);
1074:       for (j = 0; j < a->rlen[i]; j++) {
1075: #if defined(PETSC_USE_COMPLEX)
1076:         PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e %18.16e\n", i + 1, a->colidx[shift + 8 * j] + 1, (double)PetscRealPart(a->val[shift + 8 * j]), (double)PetscImaginaryPart(a->val[shift + 8 * j])));
1077: #else
1078:         PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n", i + 1, a->colidx[shift + 8 * j] + 1, (double)a->val[shift + 8 * j]));
1079: #endif
1080:       }
1081:     }
1082:     /*
1083:     if (nofinalvalue) {
1084: #if defined(PETSC_USE_COMPLEX)
1085:       PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e %18.16e\n",m,A->cmap->n,0.,0.));
1086: #else
1087:       PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",m,A->cmap->n,0.0));
1088: #endif
1089:     }
1090:     */
1091:     PetscCall(PetscObjectGetName((PetscObject)A, &name));
1092:     PetscCall(PetscViewerASCIIPrintf(viewer, "];\n %s = spconvert(zzz);\n", name));
1093:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1094:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1095:     PetscFunctionReturn(PETSC_SUCCESS);
1096:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1097:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1098:     for (i = 0; i < m; i++) {
1099:       PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
1100:       shift = a->sliidx[i >> 3] + (i & 0x07);
1101:       for (j = 0; j < a->rlen[i]; j++) {
1102: #if defined(PETSC_USE_COMPLEX)
1103:         if (PetscImaginaryPart(a->val[shift + 8 * j]) > 0.0 && PetscRealPart(a->val[shift + 8 * j]) != 0.0) {
1104:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j]), (double)PetscImaginaryPart(a->val[shift + 8 * j])));
1105:         } else if (PetscImaginaryPart(a->val[shift + 8 * j]) < 0.0 && PetscRealPart(a->val[shift + 8 * j]) != 0.0) {
1106:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j]), (double)-PetscImaginaryPart(a->val[shift + 8 * j])));
1107:         } else if (PetscRealPart(a->val[shift + 8 * j]) != 0.0) {
1108:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j])));
1109:         }
1110: #else
1111:         if (a->val[shift + 8 * j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + 8 * j], (double)a->val[shift + 8 * j]));
1112: #endif
1113:       }
1114:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1115:     }
1116:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1117:   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1118:     PetscInt    cnt = 0, jcnt;
1119:     PetscScalar value;
1120: #if defined(PETSC_USE_COMPLEX)
1121:     PetscBool realonly = PETSC_TRUE;
1122:     for (i = 0; i < a->sliidx[a->totalslices]; i++) {
1123:       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1124:         realonly = PETSC_FALSE;
1125:         break;
1126:       }
1127:     }
1128: #endif

1130:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1131:     for (i = 0; i < m; i++) {
1132:       jcnt  = 0;
1133:       shift = a->sliidx[i >> 3] + (i & 0x07);
1134:       for (j = 0; j < A->cmap->n; j++) {
1135:         if (jcnt < a->rlen[i] && j == a->colidx[shift + 8 * j]) {
1136:           value = a->val[cnt++];
1137:           jcnt++;
1138:         } else {
1139:           value = 0.0;
1140:         }
1141: #if defined(PETSC_USE_COMPLEX)
1142:         if (realonly) {
1143:           PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e ", (double)PetscRealPart(value)));
1144:         } else {
1145:           PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e+%7.5e i ", (double)PetscRealPart(value), (double)PetscImaginaryPart(value)));
1146:         }
1147: #else
1148:         PetscCall(PetscViewerASCIIPrintf(viewer, " %7.5e ", (double)value));
1149: #endif
1150:       }
1151:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1152:     }
1153:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1154:   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1155:     PetscInt fshift = 1;
1156:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1157: #if defined(PETSC_USE_COMPLEX)
1158:     PetscCall(PetscViewerASCIIPrintf(viewer, "%%%%MatrixMarket matrix coordinate complex general\n"));
1159: #else
1160:     PetscCall(PetscViewerASCIIPrintf(viewer, "%%%%MatrixMarket matrix coordinate real general\n"));
1161: #endif
1162:     PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", m, A->cmap->n, a->nz));
1163:     for (i = 0; i < m; i++) {
1164:       shift = a->sliidx[i >> 3] + (i & 0x07);
1165:       for (j = 0; j < a->rlen[i]; j++) {
1166: #if defined(PETSC_USE_COMPLEX)
1167:         PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %g %g\n", i + fshift, a->colidx[shift + 8 * j] + fshift, (double)PetscRealPart(a->val[shift + 8 * j]), (double)PetscImaginaryPart(a->val[shift + 8 * j])));
1168: #else
1169:         PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " %" PetscInt_FMT " %g\n", i + fshift, a->colidx[shift + 8 * j] + fshift, (double)a->val[shift + 8 * j]));
1170: #endif
1171:       }
1172:     }
1173:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1174:   } else if (format == PETSC_VIEWER_NATIVE) {
1175:     for (i = 0; i < a->totalslices; i++) { /* loop over slices */
1176:       PetscInt row;
1177:       PetscCall(PetscViewerASCIIPrintf(viewer, "slice %" PetscInt_FMT ": %" PetscInt_FMT " %" PetscInt_FMT "\n", i, a->sliidx[i], a->sliidx[i + 1]));
1178:       for (j = a->sliidx[i], row = 0; j < a->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
1179: #if defined(PETSC_USE_COMPLEX)
1180:         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1181:           PetscCall(PetscViewerASCIIPrintf(viewer, "  %" PetscInt_FMT " %" PetscInt_FMT " %g + %g i\n", 8 * i + row, a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j])));
1182:         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1183:           PetscCall(PetscViewerASCIIPrintf(viewer, "  %" PetscInt_FMT " %" PetscInt_FMT " %g - %g i\n", 8 * i + row, a->colidx[j], (double)PetscRealPart(a->val[j]), -(double)PetscImaginaryPart(a->val[j])));
1184:         } else {
1185:           PetscCall(PetscViewerASCIIPrintf(viewer, "  %" PetscInt_FMT " %" PetscInt_FMT " %g\n", 8 * i + row, a->colidx[j], (double)PetscRealPart(a->val[j])));
1186:         }
1187: #else
1188:         PetscCall(PetscViewerASCIIPrintf(viewer, "  %" PetscInt_FMT " %" PetscInt_FMT " %g\n", 8 * i + row, a->colidx[j], (double)a->val[j]));
1189: #endif
1190:       }
1191:     }
1192:   } else {
1193:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1194:     if (A->factortype) {
1195:       for (i = 0; i < m; i++) {
1196:         shift = a->sliidx[i >> 3] + (i & 0x07);
1197:         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
1198:         /* L part */
1199:         for (j = shift; j < a->diag[i]; j += 8) {
1200: #if defined(PETSC_USE_COMPLEX)
1201:           if (PetscImaginaryPart(a->val[shift + 8 * j]) > 0.0) {
1202:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j])));
1203:           } else if (PetscImaginaryPart(a->val[shift + 8 * j]) < 0.0) {
1204:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)(-PetscImaginaryPart(a->val[j]))));
1205:           } else {
1206:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(a->val[j])));
1207:           }
1208: #else
1209:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)a->val[j]));
1210: #endif
1211:         }
1212:         /* diagonal */
1213:         j = a->diag[i];
1214: #if defined(PETSC_USE_COMPLEX)
1215:         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1216:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j]), (double)PetscImaginaryPart(1.0 / a->val[j])));
1217:         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1218:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j]), (double)(-PetscImaginaryPart(1.0 / a->val[j]))));
1219:         } else {
1220:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(1.0 / a->val[j])));
1221:         }
1222: #else
1223:         PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)(1.0 / a->val[j])));
1224: #endif

1226:         /* U part */
1227:         for (j = a->diag[i] + 1; j < shift + 8 * a->rlen[i]; j += 8) {
1228: #if defined(PETSC_USE_COMPLEX)
1229:           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1230:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)PetscImaginaryPart(a->val[j])));
1231:           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1232:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[j], (double)PetscRealPart(a->val[j]), (double)(-PetscImaginaryPart(a->val[j]))));
1233:           } else {
1234:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)PetscRealPart(a->val[j])));
1235:           }
1236: #else
1237:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[j], (double)a->val[j]));
1238: #endif
1239:         }
1240:         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1241:       }
1242:     } else {
1243:       for (i = 0; i < m; i++) {
1244:         shift = a->sliidx[i >> 3] + (i & 0x07);
1245:         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i));
1246:         for (j = 0; j < a->rlen[i]; j++) {
1247: #if defined(PETSC_USE_COMPLEX)
1248:           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1249:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i)", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j]), (double)PetscImaginaryPart(a->val[shift + 8 * j])));
1250:           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1251:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i)", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j]), (double)-PetscImaginaryPart(a->val[shift + 8 * j])));
1252:           } else {
1253:             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + 8 * j], (double)PetscRealPart(a->val[shift + 8 * j])));
1254:           }
1255: #else
1256:           PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", a->colidx[shift + 8 * j], (double)a->val[shift + 8 * j]));
1257: #endif
1258:         }
1259:         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1260:       }
1261:     }
1262:     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1263:   }
1264:   PetscCall(PetscViewerFlush(viewer));
1265:   PetscFunctionReturn(PETSC_SUCCESS);
1266: }

1268: #include <petscdraw.h>
1269: PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw, void *Aa)
1270: {
1271:   Mat               A = (Mat)Aa;
1272:   Mat_SeqSELL      *a = (Mat_SeqSELL *)A->data;
1273:   PetscInt          i, j, m = A->rmap->n, shift;
1274:   int               color;
1275:   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1276:   PetscViewer       viewer;
1277:   PetscViewerFormat format;

1279:   PetscFunctionBegin;
1280:   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1281:   PetscCall(PetscViewerGetFormat(viewer, &format));
1282:   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));

1284:   /* loop over matrix elements drawing boxes */

1286:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1287:     PetscDrawCollectiveBegin(draw);
1288:     /* Blue for negative, Cyan for zero and  Red for positive */
1289:     color = PETSC_DRAW_BLUE;
1290:     for (i = 0; i < m; i++) {
1291:       shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1292:       y_l   = m - i - 1.0;
1293:       y_r   = y_l + 1.0;
1294:       for (j = 0; j < a->rlen[i]; j++) {
1295:         x_l = a->colidx[shift + j * 8];
1296:         x_r = x_l + 1.0;
1297:         if (PetscRealPart(a->val[shift + 8 * j]) >= 0.) continue;
1298:         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1299:       }
1300:     }
1301:     color = PETSC_DRAW_CYAN;
1302:     for (i = 0; i < m; i++) {
1303:       shift = a->sliidx[i >> 3] + (i & 0x07);
1304:       y_l   = m - i - 1.0;
1305:       y_r   = y_l + 1.0;
1306:       for (j = 0; j < a->rlen[i]; j++) {
1307:         x_l = a->colidx[shift + j * 8];
1308:         x_r = x_l + 1.0;
1309:         if (a->val[shift + 8 * j] != 0.) continue;
1310:         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1311:       }
1312:     }
1313:     color = PETSC_DRAW_RED;
1314:     for (i = 0; i < m; i++) {
1315:       shift = a->sliidx[i >> 3] + (i & 0x07);
1316:       y_l   = m - i - 1.0;
1317:       y_r   = y_l + 1.0;
1318:       for (j = 0; j < a->rlen[i]; j++) {
1319:         x_l = a->colidx[shift + j * 8];
1320:         x_r = x_l + 1.0;
1321:         if (PetscRealPart(a->val[shift + 8 * j]) <= 0.) continue;
1322:         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1323:       }
1324:     }
1325:     PetscDrawCollectiveEnd(draw);
1326:   } else {
1327:     /* use contour shading to indicate magnitude of values */
1328:     /* first determine max of all nonzero values */
1329:     PetscReal minv = 0.0, maxv = 0.0;
1330:     PetscInt  count = 0;
1331:     PetscDraw popup;
1332:     for (i = 0; i < a->sliidx[a->totalslices]; i++) {
1333:       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1334:     }
1335:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1336:     PetscCall(PetscDrawGetPopup(draw, &popup));
1337:     PetscCall(PetscDrawScalePopup(popup, minv, maxv));

1339:     PetscDrawCollectiveBegin(draw);
1340:     for (i = 0; i < m; i++) {
1341:       shift = a->sliidx[i >> 3] + (i & 0x07);
1342:       y_l   = m - i - 1.0;
1343:       y_r   = y_l + 1.0;
1344:       for (j = 0; j < a->rlen[i]; j++) {
1345:         x_l   = a->colidx[shift + j * 8];
1346:         x_r   = x_l + 1.0;
1347:         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]), minv, maxv);
1348:         PetscCall(PetscDrawRectangle(draw, x_l, y_l, x_r, y_r, color, color, color, color));
1349:         count++;
1350:       }
1351:     }
1352:     PetscDrawCollectiveEnd(draw);
1353:   }
1354:   PetscFunctionReturn(PETSC_SUCCESS);
1355: }

1357: #include <petscdraw.h>
1358: PetscErrorCode MatView_SeqSELL_Draw(Mat A, PetscViewer viewer)
1359: {
1360:   PetscDraw draw;
1361:   PetscReal xr, yr, xl, yl, h, w;
1362:   PetscBool isnull;

1364:   PetscFunctionBegin;
1365:   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1366:   PetscCall(PetscDrawIsNull(draw, &isnull));
1367:   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);

1369:   xr = A->cmap->n;
1370:   yr = A->rmap->n;
1371:   h  = yr / 10.0;
1372:   w  = xr / 10.0;
1373:   xr += w;
1374:   yr += h;
1375:   xl = -w;
1376:   yl = -h;
1377:   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
1378:   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
1379:   PetscCall(PetscDrawZoom(draw, MatView_SeqSELL_Draw_Zoom, A));
1380:   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
1381:   PetscCall(PetscDrawSave(draw));
1382:   PetscFunctionReturn(PETSC_SUCCESS);
1383: }

1385: PetscErrorCode MatView_SeqSELL(Mat A, PetscViewer viewer)
1386: {
1387:   PetscBool iascii, isbinary, isdraw;

1389:   PetscFunctionBegin;
1390:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1391:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1392:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1393:   if (iascii) {
1394:     PetscCall(MatView_SeqSELL_ASCII(A, viewer));
1395:   } else if (isbinary) {
1396:     /* PetscCall(MatView_SeqSELL_Binary(A,viewer)); */
1397:   } else if (isdraw) PetscCall(MatView_SeqSELL_Draw(A, viewer));
1398:   PetscFunctionReturn(PETSC_SUCCESS);
1399: }

1401: PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A, MatAssemblyType mode)
1402: {
1403:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1404:   PetscInt     i, shift, row_in_slice, row, nrow, *cp, lastcol, j, k;
1405:   MatScalar   *vp;

1407:   PetscFunctionBegin;
1408:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
1409:   /* To do: compress out the unused elements */
1410:   PetscCall(MatMarkDiagonal_SeqSELL(A));
1411:   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " allocated %" PetscInt_FMT " used (%" PetscInt_FMT " nonzeros+%" PetscInt_FMT " paddedzeros)\n", A->rmap->n, A->cmap->n, a->maxallocmat, a->sliidx[a->totalslices], a->nz, a->sliidx[a->totalslices] - a->nz));
1412:   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
1413:   PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", a->rlenmax));
1414:   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1415:   for (i = 0; i < a->totalslices; ++i) {
1416:     shift = a->sliidx[i];                                      /* starting index of the slice */
1417:     cp    = a->colidx + shift;                                 /* pointer to the column indices of the slice */
1418:     vp    = a->val + shift;                                    /* pointer to the nonzero values of the slice */
1419:     for (row_in_slice = 0; row_in_slice < 8; ++row_in_slice) { /* loop over rows in the slice */
1420:       row  = 8 * i + row_in_slice;
1421:       nrow = a->rlen[row]; /* number of nonzeros in row */
1422:       /*
1423:         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1424:         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1425:       */
1426:       lastcol = 0;
1427:       if (nrow > 0) {                                /* nonempty row */
1428:         lastcol = cp[8 * (nrow - 1) + row_in_slice]; /* use the index from the last nonzero at current row */
1429:       } else if (!row_in_slice) {                    /* first row of the currect slice is empty */
1430:         for (j = 1; j < 8; j++) {
1431:           if (a->rlen[8 * i + j]) {
1432:             lastcol = cp[j];
1433:             break;
1434:           }
1435:         }
1436:       } else {
1437:         if (a->sliidx[i + 1] != shift) lastcol = cp[row_in_slice - 1]; /* use the index from the previous row */
1438:       }

1440:       for (k = nrow; k < (a->sliidx[i + 1] - shift) / 8; ++k) {
1441:         cp[8 * k + row_in_slice] = lastcol;
1442:         vp[8 * k + row_in_slice] = (MatScalar)0;
1443:       }
1444:     }
1445:   }

1447:   A->info.mallocs += a->reallocs;
1448:   a->reallocs = 0;

1450:   PetscCall(MatSeqSELLInvalidateDiagonal(A));
1451:   PetscFunctionReturn(PETSC_SUCCESS);
1452: }

1454: PetscErrorCode MatGetInfo_SeqSELL(Mat A, MatInfoType flag, MatInfo *info)
1455: {
1456:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;

1458:   PetscFunctionBegin;
1459:   info->block_size   = 1.0;
1460:   info->nz_allocated = a->maxallocmat;
1461:   info->nz_used      = a->sliidx[a->totalslices]; /* include padding zeros */
1462:   info->nz_unneeded  = (a->maxallocmat - a->sliidx[a->totalslices]);
1463:   info->assemblies   = A->num_ass;
1464:   info->mallocs      = A->info.mallocs;
1465:   info->memory       = 0; /* REVIEW ME */
1466:   if (A->factortype) {
1467:     info->fill_ratio_given  = A->info.fill_ratio_given;
1468:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1469:     info->factor_mallocs    = A->info.factor_mallocs;
1470:   } else {
1471:     info->fill_ratio_given  = 0;
1472:     info->fill_ratio_needed = 0;
1473:     info->factor_mallocs    = 0;
1474:   }
1475:   PetscFunctionReturn(PETSC_SUCCESS);
1476: }

1478: PetscErrorCode MatSetValues_SeqSELL(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
1479: {
1480:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1481:   PetscInt     shift, i, k, l, low, high, t, ii, row, col, nrow;
1482:   PetscInt    *cp, nonew = a->nonew, lastcol = -1;
1483:   MatScalar   *vp, value;

1485:   PetscFunctionBegin;
1486:   for (k = 0; k < m; k++) { /* loop over added rows */
1487:     row = im[k];
1488:     if (row < 0) continue;
1489:     PetscCheck(row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->n - 1);
1490:     shift = a->sliidx[row >> 3] + (row & 0x07); /* starting index of the row */
1491:     cp    = a->colidx + shift;                  /* pointer to the row */
1492:     vp    = a->val + shift;                     /* pointer to the row */
1493:     nrow  = a->rlen[row];
1494:     low   = 0;
1495:     high  = nrow;

1497:     for (l = 0; l < n; l++) { /* loop over added columns */
1498:       col = in[l];
1499:       if (col < 0) continue;
1500:       PetscCheck(col < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT, col, A->cmap->n - 1);
1501:       if (a->roworiented) {
1502:         value = v[l + k * n];
1503:       } else {
1504:         value = v[k + l * m];
1505:       }
1506:       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;

1508:       /* search in this row for the specified column, i indicates the column to be set */
1509:       if (col <= lastcol) low = 0;
1510:       else high = nrow;
1511:       lastcol = col;
1512:       while (high - low > 5) {
1513:         t = (low + high) / 2;
1514:         if (*(cp + t * 8) > col) high = t;
1515:         else low = t;
1516:       }
1517:       for (i = low; i < high; i++) {
1518:         if (*(cp + i * 8) > col) break;
1519:         if (*(cp + i * 8) == col) {
1520:           if (is == ADD_VALUES) *(vp + i * 8) += value;
1521:           else *(vp + i * 8) = value;
1522:           low = i + 1;
1523:           goto noinsert;
1524:         }
1525:       }
1526:       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1527:       if (nonew == 1) goto noinsert;
1528:       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
1529:       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1530:       MatSeqXSELLReallocateSELL(A, A->rmap->n, 1, nrow, a->sliidx, row / 8, row, col, a->colidx, a->val, cp, vp, nonew, MatScalar);
1531:       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1532:       for (ii = nrow - 1; ii >= i; ii--) {
1533:         *(cp + (ii + 1) * 8) = *(cp + ii * 8);
1534:         *(vp + (ii + 1) * 8) = *(vp + ii * 8);
1535:       }
1536:       a->rlen[row]++;
1537:       *(cp + i * 8) = col;
1538:       *(vp + i * 8) = value;
1539:       a->nz++;
1540:       A->nonzerostate++;
1541:       low = i + 1;
1542:       high++;
1543:       nrow++;
1544:     noinsert:;
1545:     }
1546:     a->rlen[row] = nrow;
1547:   }
1548:   PetscFunctionReturn(PETSC_SUCCESS);
1549: }

1551: PetscErrorCode MatCopy_SeqSELL(Mat A, Mat B, MatStructure str)
1552: {
1553:   PetscFunctionBegin;
1554:   /* If the two matrices have the same copy implementation, use fast copy. */
1555:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1556:     Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1557:     Mat_SeqSELL *b = (Mat_SeqSELL *)B->data;

1559:     PetscCheck(a->sliidx[a->totalslices] == b->sliidx[b->totalslices], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzeros in two matrices are different");
1560:     PetscCall(PetscArraycpy(b->val, a->val, a->sliidx[a->totalslices]));
1561:   } else {
1562:     PetscCall(MatCopy_Basic(A, B, str));
1563:   }
1564:   PetscFunctionReturn(PETSC_SUCCESS);
1565: }

1567: PetscErrorCode MatSetUp_SeqSELL(Mat A)
1568: {
1569:   PetscFunctionBegin;
1570:   PetscCall(MatSeqSELLSetPreallocation(A, PETSC_DEFAULT, NULL));
1571:   PetscFunctionReturn(PETSC_SUCCESS);
1572: }

1574: PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A, PetscScalar *array[])
1575: {
1576:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;

1578:   PetscFunctionBegin;
1579:   *array = a->val;
1580:   PetscFunctionReturn(PETSC_SUCCESS);
1581: }

1583: PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A, PetscScalar *array[])
1584: {
1585:   PetscFunctionBegin;
1586:   PetscFunctionReturn(PETSC_SUCCESS);
1587: }

1589: PetscErrorCode MatRealPart_SeqSELL(Mat A)
1590: {
1591:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1592:   PetscInt     i;
1593:   MatScalar   *aval = a->val;

1595:   PetscFunctionBegin;
1596:   for (i = 0; i < a->sliidx[a->totalslices]; i++) aval[i] = PetscRealPart(aval[i]);
1597:   PetscFunctionReturn(PETSC_SUCCESS);
1598: }

1600: PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1601: {
1602:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
1603:   PetscInt     i;
1604:   MatScalar   *aval = a->val;

1606:   PetscFunctionBegin;
1607:   for (i = 0; i < a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1608:   PetscCall(MatSeqSELLInvalidateDiagonal(A));
1609:   PetscFunctionReturn(PETSC_SUCCESS);
1610: }

1612: PetscErrorCode MatScale_SeqSELL(Mat inA, PetscScalar alpha)
1613: {
1614:   Mat_SeqSELL *a      = (Mat_SeqSELL *)inA->data;
1615:   MatScalar   *aval   = a->val;
1616:   PetscScalar  oalpha = alpha;
1617:   PetscBLASInt one    = 1, size;

1619:   PetscFunctionBegin;
1620:   PetscCall(PetscBLASIntCast(a->sliidx[a->totalslices], &size));
1621:   PetscCallBLAS("BLASscal", BLASscal_(&size, &oalpha, aval, &one));
1622:   PetscCall(PetscLogFlops(a->nz));
1623:   PetscCall(MatSeqSELLInvalidateDiagonal(inA));
1624:   PetscFunctionReturn(PETSC_SUCCESS);
1625: }

1627: PetscErrorCode MatShift_SeqSELL(Mat Y, PetscScalar a)
1628: {
1629:   Mat_SeqSELL *y = (Mat_SeqSELL *)Y->data;

1631:   PetscFunctionBegin;
1632:   if (!Y->preallocated || !y->nz) PetscCall(MatSeqSELLSetPreallocation(Y, 1, NULL));
1633:   PetscCall(MatShift_Basic(Y, a));
1634:   PetscFunctionReturn(PETSC_SUCCESS);
1635: }

1637: PetscErrorCode MatSOR_SeqSELL(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1638: {
1639:   Mat_SeqSELL       *a = (Mat_SeqSELL *)A->data;
1640:   PetscScalar       *x, sum, *t;
1641:   const MatScalar   *idiag = NULL, *mdiag;
1642:   const PetscScalar *b, *xb;
1643:   PetscInt           n, m = A->rmap->n, i, j, shift;
1644:   const PetscInt    *diag;

1646:   PetscFunctionBegin;
1647:   its = its * lits;

1649:   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1650:   if (!a->idiagvalid) PetscCall(MatInvertDiagonal_SeqSELL(A, omega, fshift));
1651:   a->fshift = fshift;
1652:   a->omega  = omega;

1654:   diag  = a->diag;
1655:   t     = a->ssor_work;
1656:   idiag = a->idiag;
1657:   mdiag = a->mdiag;

1659:   PetscCall(VecGetArray(xx, &x));
1660:   PetscCall(VecGetArrayRead(bb, &b));
1661:   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1662:   PetscCheck(flag != SOR_APPLY_UPPER, PETSC_COMM_SELF, PETSC_ERR_SUP, "SOR_APPLY_UPPER is not implemented");
1663:   PetscCheck(flag != SOR_APPLY_LOWER, PETSC_COMM_SELF, PETSC_ERR_SUP, "SOR_APPLY_LOWER is not implemented");
1664:   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");

1666:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1667:     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1668:       for (i = 0; i < m; i++) {
1669:         shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1670:         sum   = b[i];
1671:         n     = (diag[i] - shift) / 8;
1672:         for (j = 0; j < n; j++) sum -= a->val[shift + j * 8] * x[a->colidx[shift + j * 8]];
1673:         t[i] = sum;
1674:         x[i] = sum * idiag[i];
1675:       }
1676:       xb = t;
1677:       PetscCall(PetscLogFlops(a->nz));
1678:     } else xb = b;
1679:     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1680:       for (i = m - 1; i >= 0; i--) {
1681:         shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1682:         sum   = xb[i];
1683:         n     = a->rlen[i] - (diag[i] - shift) / 8 - 1;
1684:         for (j = 1; j <= n; j++) sum -= a->val[diag[i] + j * 8] * x[a->colidx[diag[i] + j * 8]];
1685:         if (xb == b) {
1686:           x[i] = sum * idiag[i];
1687:         } else {
1688:           x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */
1689:         }
1690:       }
1691:       PetscCall(PetscLogFlops(a->nz)); /* assumes 1/2 in upper */
1692:     }
1693:     its--;
1694:   }
1695:   while (its--) {
1696:     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1697:       for (i = 0; i < m; i++) {
1698:         /* lower */
1699:         shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1700:         sum   = b[i];
1701:         n     = (diag[i] - shift) / 8;
1702:         for (j = 0; j < n; j++) sum -= a->val[shift + j * 8] * x[a->colidx[shift + j * 8]];
1703:         t[i] = sum; /* save application of the lower-triangular part */
1704:         /* upper */
1705:         n = a->rlen[i] - (diag[i] - shift) / 8 - 1;
1706:         for (j = 1; j <= n; j++) sum -= a->val[diag[i] + j * 8] * x[a->colidx[diag[i] + j * 8]];
1707:         x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */
1708:       }
1709:       xb = t;
1710:       PetscCall(PetscLogFlops(2.0 * a->nz));
1711:     } else xb = b;
1712:     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1713:       for (i = m - 1; i >= 0; i--) {
1714:         shift = a->sliidx[i >> 3] + (i & 0x07); /* starting index of the row i */
1715:         sum   = xb[i];
1716:         if (xb == b) {
1717:           /* whole matrix (no checkpointing available) */
1718:           n = a->rlen[i];
1719:           for (j = 0; j < n; j++) sum -= a->val[shift + j * 8] * x[a->colidx[shift + j * 8]];
1720:           x[i] = (1. - omega) * x[i] + (sum + mdiag[i] * x[i]) * idiag[i];
1721:         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1722:           n = a->rlen[i] - (diag[i] - shift) / 8 - 1;
1723:           for (j = 1; j <= n; j++) sum -= a->val[diag[i] + j * 8] * x[a->colidx[diag[i] + j * 8]];
1724:           x[i] = (1. - omega) * x[i] + sum * idiag[i]; /* omega in idiag */
1725:         }
1726:       }
1727:       if (xb == b) {
1728:         PetscCall(PetscLogFlops(2.0 * a->nz));
1729:       } else {
1730:         PetscCall(PetscLogFlops(a->nz)); /* assumes 1/2 in upper */
1731:       }
1732:     }
1733:   }
1734:   PetscCall(VecRestoreArray(xx, &x));
1735:   PetscCall(VecRestoreArrayRead(bb, &b));
1736:   PetscFunctionReturn(PETSC_SUCCESS);
1737: }

1739: static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1740:                                        MatGetRow_SeqSELL,
1741:                                        MatRestoreRow_SeqSELL,
1742:                                        MatMult_SeqSELL,
1743:                                        /* 4*/ MatMultAdd_SeqSELL,
1744:                                        MatMultTranspose_SeqSELL,
1745:                                        MatMultTransposeAdd_SeqSELL,
1746:                                        NULL,
1747:                                        NULL,
1748:                                        NULL,
1749:                                        /* 10*/ NULL,
1750:                                        NULL,
1751:                                        NULL,
1752:                                        MatSOR_SeqSELL,
1753:                                        NULL,
1754:                                        /* 15*/ MatGetInfo_SeqSELL,
1755:                                        MatEqual_SeqSELL,
1756:                                        MatGetDiagonal_SeqSELL,
1757:                                        MatDiagonalScale_SeqSELL,
1758:                                        NULL,
1759:                                        /* 20*/ NULL,
1760:                                        MatAssemblyEnd_SeqSELL,
1761:                                        MatSetOption_SeqSELL,
1762:                                        MatZeroEntries_SeqSELL,
1763:                                        /* 24*/ NULL,
1764:                                        NULL,
1765:                                        NULL,
1766:                                        NULL,
1767:                                        NULL,
1768:                                        /* 29*/ MatSetUp_SeqSELL,
1769:                                        NULL,
1770:                                        NULL,
1771:                                        NULL,
1772:                                        NULL,
1773:                                        /* 34*/ MatDuplicate_SeqSELL,
1774:                                        NULL,
1775:                                        NULL,
1776:                                        NULL,
1777:                                        NULL,
1778:                                        /* 39*/ NULL,
1779:                                        NULL,
1780:                                        NULL,
1781:                                        MatGetValues_SeqSELL,
1782:                                        MatCopy_SeqSELL,
1783:                                        /* 44*/ NULL,
1784:                                        MatScale_SeqSELL,
1785:                                        MatShift_SeqSELL,
1786:                                        NULL,
1787:                                        NULL,
1788:                                        /* 49*/ NULL,
1789:                                        NULL,
1790:                                        NULL,
1791:                                        NULL,
1792:                                        NULL,
1793:                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
1794:                                        NULL,
1795:                                        NULL,
1796:                                        NULL,
1797:                                        NULL,
1798:                                        /* 59*/ NULL,
1799:                                        MatDestroy_SeqSELL,
1800:                                        MatView_SeqSELL,
1801:                                        NULL,
1802:                                        NULL,
1803:                                        /* 64*/ NULL,
1804:                                        NULL,
1805:                                        NULL,
1806:                                        NULL,
1807:                                        NULL,
1808:                                        /* 69*/ NULL,
1809:                                        NULL,
1810:                                        NULL,
1811:                                        NULL,
1812:                                        NULL,
1813:                                        /* 74*/ NULL,
1814:                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1815:                                        NULL,
1816:                                        NULL,
1817:                                        NULL,
1818:                                        /* 79*/ NULL,
1819:                                        NULL,
1820:                                        NULL,
1821:                                        NULL,
1822:                                        NULL,
1823:                                        /* 84*/ NULL,
1824:                                        NULL,
1825:                                        NULL,
1826:                                        NULL,
1827:                                        NULL,
1828:                                        /* 89*/ NULL,
1829:                                        NULL,
1830:                                        NULL,
1831:                                        NULL,
1832:                                        NULL,
1833:                                        /* 94*/ NULL,
1834:                                        NULL,
1835:                                        NULL,
1836:                                        NULL,
1837:                                        NULL,
1838:                                        /* 99*/ NULL,
1839:                                        NULL,
1840:                                        NULL,
1841:                                        MatConjugate_SeqSELL,
1842:                                        NULL,
1843:                                        /*104*/ NULL,
1844:                                        NULL,
1845:                                        NULL,
1846:                                        NULL,
1847:                                        NULL,
1848:                                        /*109*/ NULL,
1849:                                        NULL,
1850:                                        NULL,
1851:                                        NULL,
1852:                                        MatMissingDiagonal_SeqSELL,
1853:                                        /*114*/ NULL,
1854:                                        NULL,
1855:                                        NULL,
1856:                                        NULL,
1857:                                        NULL,
1858:                                        /*119*/ NULL,
1859:                                        NULL,
1860:                                        NULL,
1861:                                        NULL,
1862:                                        NULL,
1863:                                        /*124*/ NULL,
1864:                                        NULL,
1865:                                        NULL,
1866:                                        NULL,
1867:                                        NULL,
1868:                                        /*129*/ NULL,
1869:                                        NULL,
1870:                                        NULL,
1871:                                        NULL,
1872:                                        NULL,
1873:                                        /*134*/ NULL,
1874:                                        NULL,
1875:                                        NULL,
1876:                                        NULL,
1877:                                        NULL,
1878:                                        /*139*/ NULL,
1879:                                        NULL,
1880:                                        NULL,
1881:                                        MatFDColoringSetUp_SeqXAIJ,
1882:                                        NULL,
1883:                                        /*144*/ NULL,
1884:                                        NULL,
1885:                                        NULL,
1886:                                        NULL,
1887:                                        NULL,
1888:                                        NULL,
1889:                                        /*150*/ NULL,
1890:                                        NULL};

1892: PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1893: {
1894:   Mat_SeqSELL *a = (Mat_SeqSELL *)mat->data;

1896:   PetscFunctionBegin;
1897:   PetscCheck(a->nonew, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

1899:   /* allocate space for values if not already there */
1900:   if (!a->saved_values) PetscCall(PetscMalloc1(a->sliidx[a->totalslices] + 1, &a->saved_values));

1902:   /* copy values over */
1903:   PetscCall(PetscArraycpy(a->saved_values, a->val, a->sliidx[a->totalslices]));
1904:   PetscFunctionReturn(PETSC_SUCCESS);
1905: }

1907: PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1908: {
1909:   Mat_SeqSELL *a = (Mat_SeqSELL *)mat->data;

1911:   PetscFunctionBegin;
1912:   PetscCheck(a->nonew, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1913:   PetscCheck(a->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
1914:   PetscCall(PetscArraycpy(a->val, a->saved_values, a->sliidx[a->totalslices]));
1915:   PetscFunctionReturn(PETSC_SUCCESS);
1916: }

1918: /*@C
1919:  MatSeqSELLRestoreArray - returns access to the array where the data for a `MATSEQSELL` matrix is stored obtained by `MatSeqSELLGetArray()`

1921:  Not Collective

1923:  Input Parameters:
1924:  .  mat - a `MATSEQSELL` matrix
1925:  .  array - pointer to the data

1927:  Level: intermediate

1929:  .seealso: `Mat`, `MATSEQSELL`, `MatSeqSELLGetArray()`, `MatSeqSELLRestoreArrayF90()`
1930:  @*/
1931: PetscErrorCode MatSeqSELLRestoreArray(Mat A, PetscScalar **array)
1932: {
1933:   PetscFunctionBegin;
1934:   PetscUseMethod(A, "MatSeqSELLRestoreArray_C", (Mat, PetscScalar **), (A, array));
1935:   PetscFunctionReturn(PETSC_SUCCESS);
1936: }

1938: PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1939: {
1940:   Mat_SeqSELL *b;
1941:   PetscMPIInt  size;

1943:   PetscFunctionBegin;
1944:   PetscCall(PetscCitationsRegister(citation, &cited));
1945:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1946:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Comm must be of size 1");

1948:   PetscCall(PetscNew(&b));

1950:   B->data = (void *)b;

1952:   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));

1954:   b->row                = NULL;
1955:   b->col                = NULL;
1956:   b->icol               = NULL;
1957:   b->reallocs           = 0;
1958:   b->ignorezeroentries  = PETSC_FALSE;
1959:   b->roworiented        = PETSC_TRUE;
1960:   b->nonew              = 0;
1961:   b->diag               = NULL;
1962:   b->solve_work         = NULL;
1963:   B->spptr              = NULL;
1964:   b->saved_values       = NULL;
1965:   b->idiag              = NULL;
1966:   b->mdiag              = NULL;
1967:   b->ssor_work          = NULL;
1968:   b->omega              = 1.0;
1969:   b->fshift             = 0.0;
1970:   b->idiagvalid         = PETSC_FALSE;
1971:   b->keepnonzeropattern = PETSC_FALSE;

1973:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQSELL));
1974:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLGetArray_C", MatSeqSELLGetArray_SeqSELL));
1975:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLRestoreArray_C", MatSeqSELLRestoreArray_SeqSELL));
1976:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqSELL));
1977:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqSELL));
1978:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqSELLSetPreallocation_C", MatSeqSELLSetPreallocation_SeqSELL));
1979:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqsell_seqaij_C", MatConvert_SeqSELL_SeqAIJ));
1980:   PetscFunctionReturn(PETSC_SUCCESS);
1981: }

1983: /*
1984:  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1985:  */
1986: PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
1987: {
1988:   Mat_SeqSELL *c = (Mat_SeqSELL *)C->data, *a = (Mat_SeqSELL *)A->data;
1989:   PetscInt     i, m                           = A->rmap->n;
1990:   PetscInt     totalslices = a->totalslices;

1992:   PetscFunctionBegin;
1993:   C->factortype = A->factortype;
1994:   c->row        = NULL;
1995:   c->col        = NULL;
1996:   c->icol       = NULL;
1997:   c->reallocs   = 0;
1998:   C->assembled  = PETSC_TRUE;

2000:   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
2001:   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));

2003:   PetscCall(PetscMalloc1(8 * totalslices, &c->rlen));
2004:   PetscCall(PetscMalloc1(totalslices + 1, &c->sliidx));

2006:   for (i = 0; i < m; i++) c->rlen[i] = a->rlen[i];
2007:   for (i = 0; i < totalslices + 1; i++) c->sliidx[i] = a->sliidx[i];

2009:   /* allocate the matrix space */
2010:   if (mallocmatspace) {
2011:     PetscCall(PetscMalloc2(a->maxallocmat, &c->val, a->maxallocmat, &c->colidx));

2013:     c->singlemalloc = PETSC_TRUE;

2015:     if (m > 0) {
2016:       PetscCall(PetscArraycpy(c->colidx, a->colidx, a->maxallocmat));
2017:       if (cpvalues == MAT_COPY_VALUES) {
2018:         PetscCall(PetscArraycpy(c->val, a->val, a->maxallocmat));
2019:       } else {
2020:         PetscCall(PetscArrayzero(c->val, a->maxallocmat));
2021:       }
2022:     }
2023:   }

2025:   c->ignorezeroentries = a->ignorezeroentries;
2026:   c->roworiented       = a->roworiented;
2027:   c->nonew             = a->nonew;
2028:   if (a->diag) {
2029:     PetscCall(PetscMalloc1(m, &c->diag));
2030:     for (i = 0; i < m; i++) c->diag[i] = a->diag[i];
2031:   } else c->diag = NULL;

2033:   c->solve_work         = NULL;
2034:   c->saved_values       = NULL;
2035:   c->idiag              = NULL;
2036:   c->ssor_work          = NULL;
2037:   c->keepnonzeropattern = a->keepnonzeropattern;
2038:   c->free_val           = PETSC_TRUE;
2039:   c->free_colidx        = PETSC_TRUE;

2041:   c->maxallocmat  = a->maxallocmat;
2042:   c->maxallocrow  = a->maxallocrow;
2043:   c->rlenmax      = a->rlenmax;
2044:   c->nz           = a->nz;
2045:   C->preallocated = PETSC_TRUE;

2047:   c->nonzerorowcnt = a->nonzerorowcnt;
2048:   C->nonzerostate  = A->nonzerostate;

2050:   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
2051:   PetscFunctionReturn(PETSC_SUCCESS);
2052: }

2054: PetscErrorCode MatDuplicate_SeqSELL(Mat A, MatDuplicateOption cpvalues, Mat *B)
2055: {
2056:   PetscFunctionBegin;
2057:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
2058:   PetscCall(MatSetSizes(*B, A->rmap->n, A->cmap->n, A->rmap->n, A->cmap->n));
2059:   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) PetscCall(MatSetBlockSizesFromMats(*B, A, A));
2060:   PetscCall(MatSetType(*B, ((PetscObject)A)->type_name));
2061:   PetscCall(MatDuplicateNoCreate_SeqSELL(*B, A, cpvalues, PETSC_TRUE));
2062:   PetscFunctionReturn(PETSC_SUCCESS);
2063: }

2065: /*MC
2066:    MATSEQSELL - MATSEQSELL = "seqsell" - A matrix type to be used for sequential sparse matrices,
2067:    based on the sliced Ellpack format

2069:    Options Database Keys:
2070: . -mat_type seqsell - sets the matrix type to "`MATSEQELL` during a call to `MatSetFromOptions()`

2072:    Level: beginner

2074: .seealso: `Mat`, `MatCreateSeqSell()`, `MATSELL`, `MATMPISELL`, `MATSEQAIJ`, `MATAIJ`, `MATMPIAIJ`
2075: M*/

2077: /*MC
2078:    MATSELL - MATSELL = "sell" - A matrix type to be used for sparse matrices.

2080:    This matrix type is identical to `MATSEQSELL` when constructed with a single process communicator,
2081:    and `MATMPISELL` otherwise.  As a result, for single process communicators,
2082:   `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
2083:   for communicators controlling multiple processes.  It is recommended that you call both of
2084:   the above preallocation routines for simplicity.

2086:    Options Database Keys:
2087: . -mat_type sell - sets the matrix type to "sell" during a call to MatSetFromOptions()

2089:   Level: beginner

2091:   Notes:
2092:    This format is only supported for real scalars, double precision, and 32 bit indices (the defaults).

2094:    It can provide better performance on Intel and AMD processes with AVX2 or AVX512 support for matrices that have a similar number of
2095:    non-zeros in contiguous groups of rows. However if the computation is memory bandwidth limited it may not provide much improvement.

2097:   Developer Notes:
2098:    On Intel (and AMD) systems some of the matrix operations use SIMD (AVX) instructions to achieve higher performance.

2100:    The sparse matrix format is as follows. For simplicity we assume a slice size of 2, it is actually 8
2101: .vb
2102:                             (2 0  3 4)
2103:    Consider the matrix A =  (5 0  6 0)
2104:                             (0 0  7 8)
2105:                             (0 0  9 9)

2107:    symbolically the Ellpack format can be written as

2109:         (2 3 4 |)           (0 2 3 |)
2110:    v =  (5 6 0 |)  colidx = (0 2 2 |)
2111:         --------            ---------
2112:         (7 8 |)             (2 3 |)
2113:         (9 9 |)             (2 3 |)

2115:     The data for 2 contiguous rows of the matrix are stored together (in column-major format) (with any left-over rows handled as a special case).
2116:     Any of the rows in a slice fewer columns than the rest of the slice (row 1 above) are padded with a previous valid column in their "extra" colidx[] locations and
2117:     zeros in their "extra" v locations so that the matrix operations do not need special code to handle different length rows within the 2 rows in a slice.

2119:     The one-dimensional representation of v used in the code is (2 5 3 6 4 0 7 9 8 9)  and for colidx is (0 0 2 2 3 2 2 2 3 3)

2121: .ve

2123:       See MatMult_SeqSELL() for how this format is used with the SIMD operations to achieve high performance.

2125:  References:
2126: . * - Hong Zhang, Richard T. Mills, Karl Rupp, and Barry F. Smith, Vectorized Parallel Sparse Matrix-Vector Multiplication in {PETSc} Using {AVX-512},
2127:    Proceedings of the 47th International Conference on Parallel Processing, 2018.

2129: .seealso: `Mat`, `MatCreateSeqSELL()`, `MatCreateSeqAIJ()`, `MatCreateSell()`, `MATSEQSELL`, `MATMPISELL`, `MATSEQAIJ`, `MATMPIAIJ`, `MATAIJ`
2130: M*/

2132: /*@C
2133:        MatCreateSeqSELL - Creates a sparse matrix in `MATSEQSELL` format.

2135:  Collective

2137:  Input Parameters:
2138: +  comm - MPI communicator, set to `PETSC_COMM_SELF`
2139: .  m - number of rows
2140: .  n - number of columns
2141: .  rlenmax - maximum number of nonzeros in a row
2142: -  rlen - array containing the number of nonzeros in the various rows
2143:  (possibly different for each row) or NULL

2145:  Output Parameter:
2146: .  A - the matrix

2148:  It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2149:  MatXXXXSetPreallocation() paradigm instead of this routine directly.
2150:  [MatXXXXSetPreallocation() is, for example, `MatSeqSELLSetPreallocation()`]

2152:  Notes:
2153:  If nnz is given then nz is ignored

2155:  Specify the preallocated storage with either rlenmax or rlen (not both).
2156:  Set rlenmax = `PETSC_DEFAULT` and rlen = NULL for PETSc to control dynamic memory
2157:  allocation.  For large problems you MUST preallocate memory or you
2158:  will get TERRIBLE performance, see the users' manual chapter on matrices.

2160:  Level: intermediate

2162:  .seealso: `Mat`, `MATSEQSELL`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MatSeqSELLSetPreallocation()`, `MATSELL`, `MATSEQSELL`, `MATMPISELL`
2163:  @*/
2164: PetscErrorCode MatCreateSeqSELL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt maxallocrow, const PetscInt rlen[], Mat *A)
2165: {
2166:   PetscFunctionBegin;
2167:   PetscCall(MatCreate(comm, A));
2168:   PetscCall(MatSetSizes(*A, m, n, m, n));
2169:   PetscCall(MatSetType(*A, MATSEQSELL));
2170:   PetscCall(MatSeqSELLSetPreallocation_SeqSELL(*A, maxallocrow, rlen));
2171:   PetscFunctionReturn(PETSC_SUCCESS);
2172: }

2174: PetscErrorCode MatEqual_SeqSELL(Mat A, Mat B, PetscBool *flg)
2175: {
2176:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data, *b = (Mat_SeqSELL *)B->data;
2177:   PetscInt     totalslices = a->totalslices;

2179:   PetscFunctionBegin;
2180:   /* If the  matrix dimensions are not equal,or no of nonzeros */
2181:   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) || (a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2182:     *flg = PETSC_FALSE;
2183:     PetscFunctionReturn(PETSC_SUCCESS);
2184:   }
2185:   /* if the a->colidx are the same */
2186:   PetscCall(PetscArraycmp(a->colidx, b->colidx, a->sliidx[totalslices], flg));
2187:   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
2188:   /* if a->val are the same */
2189:   PetscCall(PetscArraycmp(a->val, b->val, a->sliidx[totalslices], flg));
2190:   PetscFunctionReturn(PETSC_SUCCESS);
2191: }

2193: PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2194: {
2195:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;

2197:   PetscFunctionBegin;
2198:   a->idiagvalid = PETSC_FALSE;
2199:   PetscFunctionReturn(PETSC_SUCCESS);
2200: }

2202: PetscErrorCode MatConjugate_SeqSELL(Mat A)
2203: {
2204: #if defined(PETSC_USE_COMPLEX)
2205:   Mat_SeqSELL *a = (Mat_SeqSELL *)A->data;
2206:   PetscInt     i;
2207:   PetscScalar *val = a->val;

2209:   PetscFunctionBegin;
2210:   for (i = 0; i < a->sliidx[a->totalslices]; i++) val[i] = PetscConj(val[i]);
2211: #else
2212:   PetscFunctionBegin;
2213: #endif
2214:   PetscFunctionReturn(PETSC_SUCCESS);
2215: }