Actual source code: sbaij2.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/baij/seq/baij.h
4: #include src/inline/spops.h
5: #include src/inline/ilu.h
6: #include petscbt.h
7: #include src/mat/impls/sbaij/seq/sbaij.h
11: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
12: {
13: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
15: PetscInt brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
16: PetscBT table,table0;
19: if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20: mbs = a->mbs;
21: ai = a->i;
22: aj = a->j;
23: bs = A->rmap.bs;
24: PetscBTCreate(mbs,table);
25: PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
26: PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);
27: PetscBTCreate(mbs,table0);
29: for (i=0; i<is_max; i++) { /* for each is */
30: isz = 0;
31: PetscBTMemzero(mbs,table);
32:
33: /* Extract the indices, assume there can be duplicate entries */
34: ISGetIndices(is[i],&idx);
35: ISGetLocalSize(is[i],&n);
37: /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
38: bcol_max = 0;
39: for (j=0; j<n ; ++j){
40: brow = idx[j]/bs; /* convert the indices into block indices */
41: if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42: if(!PetscBTLookupSet(table,brow)) {
43: nidx[isz++] = brow;
44: if (bcol_max < brow) bcol_max = brow;
45: }
46: }
47: ISRestoreIndices(is[i],&idx);
48: ISDestroy(is[i]);
49:
50: k = 0;
51: for (j=0; j<ov; j++){ /* for each overlap */
52: /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53: PetscBTMemzero(mbs,table0);
54: for (l=k; l<isz; l++) { PetscBTSet(table0,nidx[l]); }
56: n = isz; /* length of the updated is[i] */
57: for (brow=0; brow<mbs; brow++){
58: start = ai[brow]; end = ai[brow+1];
59: if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60: for (l = start; l<end ; l++){
61: bcol = aj[l];
62: if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
63: }
64: k++;
65: if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
66: } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
67: for (l = start; l<end ; l++){
68: bcol = aj[l];
69: if (bcol > bcol_max) break;
70: if (PetscBTLookup(table0,bcol)){
71: if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
72: break; /* for l = start; l<end ; l++) */
73: }
74: }
75: }
76: }
77: } /* for each overlap */
79: /* expand the Index Set */
80: for (j=0; j<isz; j++) {
81: for (k=0; k<bs; k++)
82: nidx2[j*bs+k] = nidx[j]*bs+k;
83: }
84: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
85: }
86: PetscBTDestroy(table);
87: PetscFree(nidx);
88: PetscFree(nidx2);
89: PetscBTDestroy(table0);
90: return(0);
91: }
95: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
96: {
97: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c;
99: PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
100: PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
101: PetscInt *irow,nrows,*ssmap,bs=A->rmap.bs,bs2=a->bs2;
102: PetscInt *aj = a->j,*ai = a->i;
103: MatScalar *mat_a;
104: Mat C;
105: PetscTruth flag;
108: if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
109: ISSorted(iscol,(PetscTruth*)&i);
110: if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
112: ISGetIndices(isrow,&irow);
113: ISGetSize(isrow,&nrows);
114:
115: PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
116: ssmap = smap;
117: PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
118: PetscMemzero(smap,oldcols*sizeof(PetscInt));
119: for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
120: /* determine lens of each row */
121: for (i=0; i<nrows; i++) {
122: kstart = ai[irow[i]];
123: kend = kstart + a->ilen[irow[i]];
124: lens[i] = 0;
125: for (k=kstart; k<kend; k++) {
126: if (ssmap[aj[k]]) {
127: lens[i]++;
128: }
129: }
130: }
131: /* Create and fill new matrix */
132: if (scall == MAT_REUSE_MATRIX) {
133: c = (Mat_SeqSBAIJ *)((*B)->data);
135: if (c->mbs!=nrows || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
136: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
137: if (!flag) {
138: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
139: }
140: PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
141: C = *B;
142: } else {
143: MatCreate(((PetscObject)A)->comm,&C);
144: MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
145: MatSetType(C,((PetscObject)A)->type_name);
146: MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
147: }
148: c = (Mat_SeqSBAIJ *)(C->data);
149: for (i=0; i<nrows; i++) {
150: row = irow[i];
151: kstart = ai[row];
152: kend = kstart + a->ilen[row];
153: mat_i = c->i[i];
154: mat_j = c->j + mat_i;
155: mat_a = c->a + mat_i*bs2;
156: mat_ilen = c->ilen + i;
157: for (k=kstart; k<kend; k++) {
158: if ((tcol=ssmap[a->j[k]])) {
159: *mat_j++ = tcol - 1;
160: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
161: mat_a += bs2;
162: (*mat_ilen)++;
163: }
164: }
165: }
166:
167: /* Free work space */
168: PetscFree(smap);
169: PetscFree(lens);
170: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
171: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
172:
173: ISRestoreIndices(isrow,&irow);
174: *B = C;
175: return(0);
176: }
180: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
181: {
182: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
183: IS is1;
185: PetscInt *vary,*iary,*irow,nrows,i,bs=A->rmap.bs,count;
188: if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
189:
190: ISGetIndices(isrow,&irow);
191: ISGetSize(isrow,&nrows);
192:
193: /* Verify if the indices corespond to each element in a block
194: and form the IS with compressed IS */
195: PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
196: iary = vary + a->mbs;
197: PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
198: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
199:
200: count = 0;
201: for (i=0; i<a->mbs; i++) {
202: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
203: if (vary[i]==bs) iary[count++] = i;
204: }
205: ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
206:
207: ISRestoreIndices(isrow,&irow);
208: PetscFree(vary);
210: MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
211: ISDestroy(is1);
212: return(0);
213: }
217: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
218: {
220: PetscInt i;
223: if (scall == MAT_INITIAL_MATRIX) {
224: PetscMalloc((n+1)*sizeof(Mat),B);
225: }
227: for (i=0; i<n; i++) {
228: MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
229: }
230: return(0);
231: }
233: /* -------------------------------------------------------*/
234: /* Should check that shapes of vectors and matrices match */
235: /* -------------------------------------------------------*/
236: #include petscblaslapack.h
240: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
241: {
242: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
243: PetscScalar *x,*z,*xb,x1,zero=0.0;
244: MatScalar *v;
246: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
247: PetscInt nonzerorow=0;
250: VecSet(zz,zero);
251: VecGetArray(xx,&x);
252: VecGetArray(zz,&z);
254: v = a->a;
255: xb = x;
256:
257: for (i=0; i<mbs; i++) {
258: n = ai[1] - ai[0]; /* length of i_th row of A */
259: x1 = *xb++;
260: ib = aj + *ai++;
261: jmin = 0;
262: nonzerorow += (n>0);
263: /* if we ALWAYS required a diagonal entry then could remove this if test */
264: /* should we use a tmp to hold the accumulated z[i] */
265: if (*ib == i) { /* (diag of A)*x */
266: z[i] += *v++ * x[*ib++];
267: jmin++;
268: }
269: for (j=jmin; j<n; j++) {
270: cval = *ib;
271: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
272: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
273: }
274: }
276: VecRestoreArray(xx,&x);
277: VecRestoreArray(zz,&z);
278: PetscLogFlops(2*(a->nz*2 - nonzerorow) - nonzerorow); /* nz = (nz+m)/2 */
279: return(0);
280: }
284: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
285: {
286: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
287: PetscScalar *x,*z,*xb,x1,x2,zero=0.0;
288: MatScalar *v;
290: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
291: PetscInt nonzerorow=0;
294: VecSet(zz,zero);
295: VecGetArray(xx,&x);
296: VecGetArray(zz,&z);
297:
298: v = a->a;
299: xb = x;
301: for (i=0; i<mbs; i++) {
302: n = ai[1] - ai[0]; /* length of i_th block row of A */
303: x1 = xb[0]; x2 = xb[1];
304: ib = aj + *ai;
305: jmin = 0;
306: nonzerorow += (n>0);
307: if (*ib == i){ /* (diag of A)*x */
308: z[2*i] += v[0]*x1 + v[2]*x2;
309: z[2*i+1] += v[2]*x1 + v[3]*x2;
310: v += 4; jmin++;
311: }
312: for (j=jmin; j<n; j++) {
313: /* (strict lower triangular part of A)*x */
314: cval = ib[j]*2;
315: z[cval] += v[0]*x1 + v[1]*x2;
316: z[cval+1] += v[2]*x1 + v[3]*x2;
317: /* (strict upper triangular part of A)*x */
318: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
319: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
320: v += 4;
321: }
322: xb +=2; ai++;
323: }
325: VecRestoreArray(xx,&x);
326: VecRestoreArray(zz,&z);
327: PetscLogFlops(8*(a->nz*2 - nonzerorow) - nonzerorow);
328: return(0);
329: }
333: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
334: {
335: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
336: PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0;
337: MatScalar *v;
339: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
340: PetscInt nonzerorow=0;
343: VecSet(zz,zero);
344: VecGetArray(xx,&x);
345: VecGetArray(zz,&z);
346:
347: v = a->a;
348: xb = x;
350: for (i=0; i<mbs; i++) {
351: n = ai[1] - ai[0]; /* length of i_th block row of A */
352: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
353: ib = aj + *ai;
354: jmin = 0;
355: nonzerorow += (n>0);
356: if (*ib == i){ /* (diag of A)*x */
357: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
358: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
359: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
360: v += 9; jmin++;
361: }
362: for (j=jmin; j<n; j++) {
363: /* (strict lower triangular part of A)*x */
364: cval = ib[j]*3;
365: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
366: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
367: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
368: /* (strict upper triangular part of A)*x */
369: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
370: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
371: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
372: v += 9;
373: }
374: xb +=3; ai++;
375: }
377: VecRestoreArray(xx,&x);
378: VecRestoreArray(zz,&z);
379: PetscLogFlops(18*(a->nz*2 - nonzerorow) - nonzerorow);
380: return(0);
381: }
385: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
386: {
387: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
388: PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
389: MatScalar *v;
391: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
392: PetscInt nonzerorow=0;
395: VecSet(zz,zero);
396: VecGetArray(xx,&x);
397: VecGetArray(zz,&z);
398:
399: v = a->a;
400: xb = x;
402: for (i=0; i<mbs; i++) {
403: n = ai[1] - ai[0]; /* length of i_th block row of A */
404: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
405: ib = aj + *ai;
406: jmin = 0;
407: nonzerorow += (n>0);
408: if (*ib == i){ /* (diag of A)*x */
409: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
410: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
411: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
412: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
413: v += 16; jmin++;
414: }
415: for (j=jmin; j<n; j++) {
416: /* (strict lower triangular part of A)*x */
417: cval = ib[j]*4;
418: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
419: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
420: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
421: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
422: /* (strict upper triangular part of A)*x */
423: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
424: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
425: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
426: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
427: v += 16;
428: }
429: xb +=4; ai++;
430: }
432: VecRestoreArray(xx,&x);
433: VecRestoreArray(zz,&z);
434: PetscLogFlops(32*(a->nz*2 - nonzerorow) - nonzerorow);
435: return(0);
436: }
440: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
441: {
442: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
443: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
444: MatScalar *v;
446: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
447: PetscInt nonzerorow=0;
450: VecSet(zz,zero);
451: VecGetArray(xx,&x);
452: VecGetArray(zz,&z);
453:
454: v = a->a;
455: xb = x;
457: for (i=0; i<mbs; i++) {
458: n = ai[1] - ai[0]; /* length of i_th block row of A */
459: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
460: ib = aj + *ai;
461: jmin = 0;
462: nonzerorow += (n>0);
463: if (*ib == i){ /* (diag of A)*x */
464: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
465: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
466: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
467: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
468: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
469: v += 25; jmin++;
470: }
471: for (j=jmin; j<n; j++) {
472: /* (strict lower triangular part of A)*x */
473: cval = ib[j]*5;
474: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
475: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
476: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
477: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
478: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
479: /* (strict upper triangular part of A)*x */
480: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
481: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
482: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
483: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
484: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
485: v += 25;
486: }
487: xb +=5; ai++;
488: }
490: VecRestoreArray(xx,&x);
491: VecRestoreArray(zz,&z);
492: PetscLogFlops(50*(a->nz*2 - nonzerorow) - nonzerorow);
493: return(0);
494: }
499: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
500: {
501: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
502: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
503: MatScalar *v;
505: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
506: PetscInt nonzerorow=0;
509: VecSet(zz,zero);
510: VecGetArray(xx,&x);
511: VecGetArray(zz,&z);
512:
513: v = a->a;
514: xb = x;
516: for (i=0; i<mbs; i++) {
517: n = ai[1] - ai[0]; /* length of i_th block row of A */
518: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
519: ib = aj + *ai;
520: jmin = 0;
521: nonzerorow += (n>0);
522: if (*ib == i){ /* (diag of A)*x */
523: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
524: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
525: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
526: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
527: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
528: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
529: v += 36; jmin++;
530: }
531: for (j=jmin; j<n; j++) {
532: /* (strict lower triangular part of A)*x */
533: cval = ib[j]*6;
534: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
535: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
536: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
537: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
538: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
539: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
540: /* (strict upper triangular part of A)*x */
541: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
542: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
543: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
544: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
545: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
546: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
547: v += 36;
548: }
549: xb +=6; ai++;
550: }
552: VecRestoreArray(xx,&x);
553: VecRestoreArray(zz,&z);
554: PetscLogFlops(72*(a->nz*2 - nonzerorow) - nonzerorow);
555: return(0);
556: }
559: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
560: {
561: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
562: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
563: MatScalar *v;
565: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
566: PetscInt nonzerorow=0;
569: VecSet(zz,zero);
570: VecGetArray(xx,&x);
571: VecGetArray(zz,&z);
572:
573: v = a->a;
574: xb = x;
576: for (i=0; i<mbs; i++) {
577: n = ai[1] - ai[0]; /* length of i_th block row of A */
578: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
579: ib = aj + *ai;
580: jmin = 0;
581: nonzerorow += (n>0);
582: if (*ib == i){ /* (diag of A)*x */
583: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
584: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
585: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
586: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
587: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
588: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
589: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
590: v += 49; jmin++;
591: }
592: for (j=jmin; j<n; j++) {
593: /* (strict lower triangular part of A)*x */
594: cval = ib[j]*7;
595: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
596: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
597: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
598: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
599: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
600: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
601: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
602: /* (strict upper triangular part of A)*x */
603: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
604: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
605: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
606: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
607: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
608: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
609: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
610: v += 49;
611: }
612: xb +=7; ai++;
613: }
614: VecRestoreArray(xx,&x);
615: VecRestoreArray(zz,&z);
616: PetscLogFlops(98*(a->nz*2 - nonzerorow) - nonzerorow);
617: return(0);
618: }
620: /*
621: This will not work with MatScalar == float because it calls the BLAS
622: */
625: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
626: {
627: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
628: PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
629: MatScalar *v;
631: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;
632: PetscInt nonzerorow=0;
635: VecSet(zz,zero);
636: VecGetArray(xx,&x); x_ptr=x;
637: VecGetArray(zz,&z); z_ptr=z;
639: aj = a->j;
640: v = a->a;
641: ii = a->i;
643: if (!a->mult_work) {
644: PetscMalloc((A->rmap.N+1)*sizeof(PetscScalar),&a->mult_work);
645: }
646: work = a->mult_work;
647:
648: for (i=0; i<mbs; i++) {
649: n = ii[1] - ii[0]; ncols = n*bs;
650: workt = work; idx=aj+ii[0];
651: nonzerorow += (n>0);
653: /* upper triangular part */
654: for (j=0; j<n; j++) {
655: xb = x_ptr + bs*(*idx++);
656: for (k=0; k<bs; k++) workt[k] = xb[k];
657: workt += bs;
658: }
659: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
660: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
661:
662: /* strict lower triangular part */
663: idx = aj+ii[0];
664: if (*idx == i){
665: ncols -= bs; v += bs2; idx++; n--;
666: }
667:
668: if (ncols > 0){
669: workt = work;
670: PetscMemzero(workt,ncols*sizeof(PetscScalar));
671: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
672: for (j=0; j<n; j++) {
673: zb = z_ptr + bs*(*idx++);
674: for (k=0; k<bs; k++) zb[k] += workt[k] ;
675: workt += bs;
676: }
677: }
678: x += bs; v += n*bs2; z += bs; ii++;
679: }
680:
681: VecRestoreArray(xx,&x);
682: VecRestoreArray(zz,&z);
683: PetscLogFlops(2*(a->nz*2 - nonzerorow)*bs2 - nonzerorow);
684: return(0);
685: }
690: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
691: {
692: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
693: PetscScalar *x,*z,*xb,x1;
694: MatScalar *v;
696: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
697: PetscInt nonzerorow=0;
700: VecCopy_Seq(yy,zz);
701: VecGetArray(xx,&x);
702: VecGetArray(zz,&z);
703: v = a->a;
704: xb = x;
706: for (i=0; i<mbs; i++) {
707: n = ai[1] - ai[0]; /* length of i_th row of A */
708: x1 = xb[0];
709: ib = aj + *ai;
710: jmin = 0;
711: nonzerorow += (n>0);
712: if (*ib == i) { /* (diag of A)*x */
713: z[i] += *v++ * x[*ib++]; jmin++;
714: }
715: for (j=jmin; j<n; j++) {
716: cval = *ib;
717: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
718: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
719: }
720: xb++; ai++;
721: }
723: VecRestoreArray(xx,&x);
724: VecRestoreArray(zz,&z);
725:
726: PetscLogFlops(2*(a->nz*2 - nonzerorow));
727: return(0);
728: }
732: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
733: {
734: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
735: PetscScalar *x,*z,*xb,x1,x2;
736: MatScalar *v;
738: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
739: PetscInt nonzerorow=0;
742: VecCopy_Seq(yy,zz);
743: VecGetArray(xx,&x);
744: VecGetArray(zz,&z);
746: v = a->a;
747: xb = x;
749: for (i=0; i<mbs; i++) {
750: n = ai[1] - ai[0]; /* length of i_th block row of A */
751: x1 = xb[0]; x2 = xb[1];
752: ib = aj + *ai;
753: jmin = 0;
754: nonzerorow += (n>0);
755: if (*ib == i){ /* (diag of A)*x */
756: z[2*i] += v[0]*x1 + v[2]*x2;
757: z[2*i+1] += v[2]*x1 + v[3]*x2;
758: v += 4; jmin++;
759: }
760: for (j=jmin; j<n; j++) {
761: /* (strict lower triangular part of A)*x */
762: cval = ib[j]*2;
763: z[cval] += v[0]*x1 + v[1]*x2;
764: z[cval+1] += v[2]*x1 + v[3]*x2;
765: /* (strict upper triangular part of A)*x */
766: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
767: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
768: v += 4;
769: }
770: xb +=2; ai++;
771: }
772: VecRestoreArray(xx,&x);
773: VecRestoreArray(zz,&z);
775: PetscLogFlops(4*(a->nz*2 - nonzerorow));
776: return(0);
777: }
781: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
782: {
783: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
784: PetscScalar *x,*z,*xb,x1,x2,x3;
785: MatScalar *v;
787: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
788: PetscInt nonzerorow=0;
791: VecCopy_Seq(yy,zz);
792: VecGetArray(xx,&x);
793: VecGetArray(zz,&z);
795: v = a->a;
796: xb = x;
798: for (i=0; i<mbs; i++) {
799: n = ai[1] - ai[0]; /* length of i_th block row of A */
800: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
801: ib = aj + *ai;
802: jmin = 0;
803: nonzerorow += (n>0);
804: if (*ib == i){ /* (diag of A)*x */
805: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
806: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
807: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
808: v += 9; jmin++;
809: }
810: for (j=jmin; j<n; j++) {
811: /* (strict lower triangular part of A)*x */
812: cval = ib[j]*3;
813: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
814: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
815: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
816: /* (strict upper triangular part of A)*x */
817: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
818: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
819: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
820: v += 9;
821: }
822: xb +=3; ai++;
823: }
825: VecRestoreArray(xx,&x);
826: VecRestoreArray(zz,&z);
828: PetscLogFlops(18*(a->nz*2 - nonzerorow));
829: return(0);
830: }
834: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
835: {
836: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
837: PetscScalar *x,*z,*xb,x1,x2,x3,x4;
838: MatScalar *v;
840: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
841: PetscInt nonzerorow=0;
844: VecCopy_Seq(yy,zz);
845: VecGetArray(xx,&x);
846: VecGetArray(zz,&z);
848: v = a->a;
849: xb = x;
851: for (i=0; i<mbs; i++) {
852: n = ai[1] - ai[0]; /* length of i_th block row of A */
853: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
854: ib = aj + *ai;
855: jmin = 0;
856: nonzerorow += (n>0);
857: if (*ib == i){ /* (diag of A)*x */
858: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
859: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
860: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
861: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
862: v += 16; jmin++;
863: }
864: for (j=jmin; j<n; j++) {
865: /* (strict lower triangular part of A)*x */
866: cval = ib[j]*4;
867: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
868: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
869: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
870: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
871: /* (strict upper triangular part of A)*x */
872: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
873: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
874: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
875: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
876: v += 16;
877: }
878: xb +=4; ai++;
879: }
881: VecRestoreArray(xx,&x);
882: VecRestoreArray(zz,&z);
884: PetscLogFlops(32*(a->nz*2 - nonzerorow));
885: return(0);
886: }
890: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
891: {
892: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
893: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5;
894: MatScalar *v;
896: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
897: PetscInt nonzerorow=0;
900: VecCopy_Seq(yy,zz);
901: VecGetArray(xx,&x);
902: VecGetArray(zz,&z);
904: v = a->a;
905: xb = x;
907: for (i=0; i<mbs; i++) {
908: n = ai[1] - ai[0]; /* length of i_th block row of A */
909: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
910: ib = aj + *ai;
911: jmin = 0;
912: nonzerorow += (n>0);
913: if (*ib == i){ /* (diag of A)*x */
914: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
915: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
916: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
917: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
918: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
919: v += 25; jmin++;
920: }
921: for (j=jmin; j<n; j++) {
922: /* (strict lower triangular part of A)*x */
923: cval = ib[j]*5;
924: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
925: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
926: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
927: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
928: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
929: /* (strict upper triangular part of A)*x */
930: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
931: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
932: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
933: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
934: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
935: v += 25;
936: }
937: xb +=5; ai++;
938: }
940: VecRestoreArray(xx,&x);
941: VecRestoreArray(zz,&z);
943: PetscLogFlops(50*(a->nz*2 - nonzerorow));
944: return(0);
945: }
948: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
949: {
950: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
951: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6;
952: MatScalar *v;
954: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
955: PetscInt nonzerorow=0;
958: VecCopy_Seq(yy,zz);
959: VecGetArray(xx,&x);
960: VecGetArray(zz,&z);
962: v = a->a;
963: xb = x;
965: for (i=0; i<mbs; i++) {
966: n = ai[1] - ai[0]; /* length of i_th block row of A */
967: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
968: ib = aj + *ai;
969: jmin = 0;
970: nonzerorow += (n>0);
971: if (*ib == i){ /* (diag of A)*x */
972: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
973: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
974: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
975: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
976: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
977: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
978: v += 36; jmin++;
979: }
980: for (j=jmin; j<n; j++) {
981: /* (strict lower triangular part of A)*x */
982: cval = ib[j]*6;
983: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
984: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
985: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
986: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
987: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
988: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
989: /* (strict upper triangular part of A)*x */
990: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
991: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
992: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
993: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
994: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
995: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
996: v += 36;
997: }
998: xb +=6; ai++;
999: }
1001: VecRestoreArray(xx,&x);
1002: VecRestoreArray(zz,&z);
1004: PetscLogFlops(72*(a->nz*2 - nonzerorow));
1005: return(0);
1006: }
1010: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1011: {
1012: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1013: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1014: MatScalar *v;
1016: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
1017: PetscInt nonzerorow=0;
1020: VecCopy_Seq(yy,zz);
1021: VecGetArray(xx,&x);
1022: VecGetArray(zz,&z);
1024: v = a->a;
1025: xb = x;
1027: for (i=0; i<mbs; i++) {
1028: n = ai[1] - ai[0]; /* length of i_th block row of A */
1029: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1030: ib = aj + *ai;
1031: jmin = 0;
1032: nonzerorow += (n>0);
1033: if (*ib == i){ /* (diag of A)*x */
1034: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1035: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1036: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1037: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1038: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1039: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1040: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1041: v += 49; jmin++;
1042: }
1043: for (j=jmin; j<n; j++) {
1044: /* (strict lower triangular part of A)*x */
1045: cval = ib[j]*7;
1046: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1047: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1048: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1049: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1050: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1051: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1052: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1053: /* (strict upper triangular part of A)*x */
1054: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1055: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1056: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1057: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1058: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1059: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1060: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1061: v += 49;
1062: }
1063: xb +=7; ai++;
1064: }
1066: VecRestoreArray(xx,&x);
1067: VecRestoreArray(zz,&z);
1069: PetscLogFlops(98*(a->nz*2 - nonzerorow));
1070: return(0);
1071: }
1075: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1076: {
1077: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1078: PetscScalar *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1079: MatScalar *v;
1081: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;
1082: PetscInt nonzerorow=0;
1085: VecCopy_Seq(yy,zz);
1086: VecGetArray(xx,&x); x_ptr=x;
1087: VecGetArray(zz,&z); z_ptr=z;
1089: aj = a->j;
1090: v = a->a;
1091: ii = a->i;
1093: if (!a->mult_work) {
1094: PetscMalloc((A->rmap.n+1)*sizeof(PetscScalar),&a->mult_work);
1095: }
1096: work = a->mult_work;
1097:
1098:
1099: for (i=0; i<mbs; i++) {
1100: n = ii[1] - ii[0]; ncols = n*bs;
1101: workt = work; idx=aj+ii[0];
1102: nonzerorow += (n>0);
1104: /* upper triangular part */
1105: for (j=0; j<n; j++) {
1106: xb = x_ptr + bs*(*idx++);
1107: for (k=0; k<bs; k++) workt[k] = xb[k];
1108: workt += bs;
1109: }
1110: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1111: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1113: /* strict lower triangular part */
1114: idx = aj+ii[0];
1115: if (*idx == i){
1116: ncols -= bs; v += bs2; idx++; n--;
1117: }
1118: if (ncols > 0){
1119: workt = work;
1120: PetscMemzero(workt,ncols*sizeof(PetscScalar));
1121: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1122: for (j=0; j<n; j++) {
1123: zb = z_ptr + bs*(*idx++);
1124: for (k=0; k<bs; k++) zb[k] += workt[k] ;
1125: workt += bs;
1126: }
1127: }
1129: x += bs; v += n*bs2; z += bs; ii++;
1130: }
1132: VecRestoreArray(xx,&x);
1133: VecRestoreArray(zz,&z);
1135: PetscLogFlops(2*(a->nz*2 - nonzerorow));
1136: return(0);
1137: }
1141: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1142: {
1143: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1144: PetscScalar oalpha = alpha;
1145: PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;
1149: BLASscal_(&totalnz,&oalpha,a->a,&one);
1150: PetscLogFlops(totalnz);
1151: return(0);
1152: }
1156: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1157: {
1158: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1159: MatScalar *v = a->a;
1160: PetscReal sum_diag = 0.0, sum_off = 0.0, *sum;
1161: PetscInt i,j,k,bs = A->rmap.bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1163: PetscInt *jl,*il,jmin,jmax,nexti,ik,*col;
1164:
1166: if (type == NORM_FROBENIUS) {
1167: for (k=0; k<mbs; k++){
1168: jmin = a->i[k]; jmax = a->i[k+1];
1169: col = aj + jmin;
1170: if (*col == k){ /* diagonal block */
1171: for (i=0; i<bs2; i++){
1172: #if defined(PETSC_USE_COMPLEX)
1173: sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1174: #else
1175: sum_diag += (*v)*(*v); v++;
1176: #endif
1177: }
1178: jmin++;
1179: }
1180: for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */
1181: for (i=0; i<bs2; i++){
1182: #if defined(PETSC_USE_COMPLEX)
1183: sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1184: #else
1185: sum_off += (*v)*(*v); v++;
1186: #endif
1187: }
1188: }
1189: }
1190: *norm = sqrt(sum_diag + 2*sum_off);
1191: } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1192: PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);
1193: jl = il + mbs;
1194: sum = (PetscReal*)(jl + mbs);
1195: for (i=0; i<mbs; i++) jl[i] = mbs;
1196: il[0] = 0;
1198: *norm = 0.0;
1199: for (k=0; k<mbs; k++) { /* k_th block row */
1200: for (j=0; j<bs; j++) sum[j]=0.0;
1201: /*-- col sum --*/
1202: i = jl[k]; /* first |A(i,k)| to be added */
1203: /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1204: at step k */
1205: while (i<mbs){
1206: nexti = jl[i]; /* next block row to be added */
1207: ik = il[i]; /* block index of A(i,k) in the array a */
1208: for (j=0; j<bs; j++){
1209: v = a->a + ik*bs2 + j*bs;
1210: for (k1=0; k1<bs; k1++) {
1211: sum[j] += PetscAbsScalar(*v); v++;
1212: }
1213: }
1214: /* update il, jl */
1215: jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1216: jmax = a->i[i+1];
1217: if (jmin < jmax){
1218: il[i] = jmin;
1219: j = a->j[jmin];
1220: jl[i] = jl[j]; jl[j]=i;
1221: }
1222: i = nexti;
1223: }
1224: /*-- row sum --*/
1225: jmin = a->i[k]; jmax = a->i[k+1];
1226: for (i=jmin; i<jmax; i++) {
1227: for (j=0; j<bs; j++){
1228: v = a->a + i*bs2 + j;
1229: for (k1=0; k1<bs; k1++){
1230: sum[j] += PetscAbsScalar(*v); v += bs;
1231: }
1232: }
1233: }
1234: /* add k_th block row to il, jl */
1235: col = aj+jmin;
1236: if (*col == k) jmin++;
1237: if (jmin < jmax){
1238: il[k] = jmin;
1239: j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1240: }
1241: for (j=0; j<bs; j++){
1242: if (sum[j] > *norm) *norm = sum[j];
1243: }
1244: }
1245: PetscFree(il);
1246: } else {
1247: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1248: }
1249: return(0);
1250: }
1254: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1255: {
1256: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1261: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1262: if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) {
1263: *flg = PETSC_FALSE;
1264: return(0);
1265: }
1266:
1267: /* if the a->i are the same */
1268: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1269: if (!*flg) {
1270: return(0);
1271: }
1272:
1273: /* if a->j are the same */
1274: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1275: if (!*flg) {
1276: return(0);
1277: }
1278: /* if a->a are the same */
1279: PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(A->rmap.bs)*sizeof(PetscScalar),flg);
1280: return(0);
1281: }
1285: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1286: {
1287: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1289: PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1290: PetscScalar *x,zero = 0.0;
1291: MatScalar *aa,*aa_j;
1294: bs = A->rmap.bs;
1295: if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1296:
1297: aa = a->a;
1298: ai = a->i;
1299: aj = a->j;
1300: ambs = a->mbs;
1301: bs2 = a->bs2;
1303: VecSet(v,zero);
1304: VecGetArray(v,&x);
1305: VecGetLocalSize(v,&n);
1306: if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1307: for (i=0; i<ambs; i++) {
1308: j=ai[i];
1309: if (aj[j] == i) { /* if this is a diagonal element */
1310: row = i*bs;
1311: aa_j = aa + j*bs2;
1312: if (A->factor && bs==1){
1313: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1314: } else {
1315: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1316: }
1317: }
1318: }
1319:
1320: VecRestoreArray(v,&x);
1321: return(0);
1322: }
1326: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1327: {
1328: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1329: PetscScalar *l,x,*li,*ri;
1330: MatScalar *aa,*v;
1332: PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1333: PetscTruth flg;
1336: if (ll != rr){
1337: VecEqual(ll,rr,&flg);
1338: if (!flg)
1339: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1340: }
1341: if (!ll) return(0);
1342: ai = a->i;
1343: aj = a->j;
1344: aa = a->a;
1345: m = A->rmap.N;
1346: bs = A->rmap.bs;
1347: mbs = a->mbs;
1348: bs2 = a->bs2;
1350: VecGetArray(ll,&l);
1351: VecGetLocalSize(ll,&lm);
1352: if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1353: for (i=0; i<mbs; i++) { /* for each block row */
1354: M = ai[i+1] - ai[i];
1355: li = l + i*bs;
1356: v = aa + bs2*ai[i];
1357: for (j=0; j<M; j++) { /* for each block */
1358: ri = l + bs*aj[ai[i]+j];
1359: for (k=0; k<bs; k++) {
1360: x = ri[k];
1361: for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1362: }
1363: }
1364: }
1365: VecRestoreArray(ll,&l);
1366: PetscLogFlops(2*a->nz);
1367: return(0);
1368: }
1372: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1373: {
1374: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1377: info->rows_global = (double)A->rmap.N;
1378: info->columns_global = (double)A->rmap.N;
1379: info->rows_local = (double)A->rmap.N;
1380: info->columns_local = (double)A->rmap.N;
1381: info->block_size = a->bs2;
1382: info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */
1383: info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1384: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1385: info->assemblies = A->num_ass;
1386: info->mallocs = a->reallocs;
1387: info->memory = ((PetscObject)A)->mem;
1388: if (A->factor) {
1389: info->fill_ratio_given = A->info.fill_ratio_given;
1390: info->fill_ratio_needed = A->info.fill_ratio_needed;
1391: info->factor_mallocs = A->info.factor_mallocs;
1392: } else {
1393: info->fill_ratio_given = 0;
1394: info->fill_ratio_needed = 0;
1395: info->factor_mallocs = 0;
1396: }
1397: return(0);
1398: }
1403: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1404: {
1405: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1409: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1410: return(0);
1411: }
1415: /*
1416: This code does not work since it only checks the upper triangular part of
1417: the matrix. Hence it is not listed in the function table.
1418: */
1419: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1420: {
1421: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1423: PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1424: PetscReal atmp;
1425: MatScalar *aa;
1426: PetscScalar *x;
1427: PetscInt ncols,brow,bcol,krow,kcol;
1430: if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1431: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1432: bs = A->rmap.bs;
1433: aa = a->a;
1434: ai = a->i;
1435: aj = a->j;
1436: mbs = a->mbs;
1438: VecSet(v,0.0);
1439: VecGetArray(v,&x);
1440: VecGetLocalSize(v,&n);
1441: if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1442: for (i=0; i<mbs; i++) {
1443: ncols = ai[1] - ai[0]; ai++;
1444: brow = bs*i;
1445: for (j=0; j<ncols; j++){
1446: bcol = bs*(*aj);
1447: for (kcol=0; kcol<bs; kcol++){
1448: col = bcol + kcol; /* col index */
1449: for (krow=0; krow<bs; krow++){
1450: atmp = PetscAbsScalar(*aa); aa++;
1451: row = brow + krow; /* row index */
1452: if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1453: if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1454: }
1455: }
1456: aj++;
1457: }
1458: }
1459: VecRestoreArray(v,&x);
1460: return(0);
1461: }