Actual source code: sbaij.c
2: /*
3: Defines the basic matrix operations for the SBAIJ (compressed row)
4: matrix storage format.
5: */
6: #include src/mat/impls/baij/seq/baij.h
7: #include src/inline/spops.h
8: #include src/mat/impls/sbaij/seq/sbaij.h
10: #define CHUNKSIZE 10
12: /*
13: Checks for missing diagonals
14: */
17: PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A)
18: {
19: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
21: PetscInt *diag,*jj = a->j,i;
24: MatMarkDiagonal_SeqSBAIJ(A);
25: diag = a->diag;
26: for (i=0; i<a->mbs; i++) {
27: if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i);
28: }
29: return(0);
30: }
34: PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A)
35: {
36: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
38: PetscInt i,mbs = a->mbs;
41: if (a->diag) return(0);
43: PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);
44: PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));
45: for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
46: return(0);
47: }
51: static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
52: {
53: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
54: PetscInt n = a->mbs,i;
57: *nn = n;
58: if (!ia) return(0);
60: if (oshift == 1) {
61: /* temporarily add 1 to i and j indices */
62: PetscInt nz = a->i[n];
63: for (i=0; i<nz; i++) a->j[i]++;
64: for (i=0; i<n+1; i++) a->i[i]++;
65: *ia = a->i; *ja = a->j;
66: } else {
67: *ia = a->i; *ja = a->j;
68: }
69: return(0);
70: }
74: static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
75: {
76: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
77: PetscInt i,n = a->mbs;
80: if (!ia) return(0);
82: if (oshift == 1) {
83: PetscInt nz = a->i[n]-1;
84: for (i=0; i<nz; i++) a->j[i]--;
85: for (i=0; i<n+1; i++) a->i[i]--;
86: }
87: return(0);
88: }
92: PetscErrorCode MatDestroy_SeqSBAIJ(Mat A)
93: {
94: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
98: PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz);
99: PetscFree(a->a);
100: if (!a->singlemalloc) {
101: PetscFree(a->i);
102: PetscFree(a->j);
103: }
104: if (a->row) {
105: ISDestroy(a->row);
106: }
107: if (a->diag) {PetscFree(a->diag);}
108: if (a->ilen) {PetscFree(a->ilen);}
109: if (a->imax) {PetscFree(a->imax);}
110: if (a->icol) {ISDestroy(a->icol);}
111: if (a->solve_work) {PetscFree(a->solve_work);}
112: if (a->solves_work) {PetscFree(a->solves_work);}
113: if (a->mult_work) {PetscFree(a->mult_work);}
114: if (a->saved_values) {PetscFree(a->saved_values);}
115: if (a->xtoy) {PetscFree(a->xtoy);}
117: if (a->inew){
118: PetscFree(a->inew);
119: a->inew = 0;
120: }
121: PetscFree(a);
123: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
124: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
125: PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);
126: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);
127: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);
128: PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);
129: return(0);
130: }
134: PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op)
135: {
136: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
139: switch (op) {
140: case MAT_ROW_ORIENTED:
141: a->roworiented = PETSC_TRUE;
142: break;
143: case MAT_COLUMN_ORIENTED:
144: a->roworiented = PETSC_FALSE;
145: break;
146: case MAT_COLUMNS_SORTED:
147: a->sorted = PETSC_TRUE;
148: break;
149: case MAT_COLUMNS_UNSORTED:
150: a->sorted = PETSC_FALSE;
151: break;
152: case MAT_KEEP_ZEROED_ROWS:
153: a->keepzeroedrows = PETSC_TRUE;
154: break;
155: case MAT_NO_NEW_NONZERO_LOCATIONS:
156: a->nonew = 1;
157: break;
158: case MAT_NEW_NONZERO_LOCATION_ERR:
159: a->nonew = -1;
160: break;
161: case MAT_NEW_NONZERO_ALLOCATION_ERR:
162: a->nonew = -2;
163: break;
164: case MAT_YES_NEW_NONZERO_LOCATIONS:
165: a->nonew = 0;
166: break;
167: case MAT_ROWS_SORTED:
168: case MAT_ROWS_UNSORTED:
169: case MAT_YES_NEW_DIAGONALS:
170: case MAT_IGNORE_OFF_PROC_ENTRIES:
171: case MAT_USE_HASH_TABLE:
172: PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n");
173: break;
174: case MAT_NO_NEW_DIAGONALS:
175: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
176: case MAT_NOT_SYMMETRIC:
177: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
178: case MAT_HERMITIAN:
179: SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
180: case MAT_SYMMETRIC:
181: case MAT_STRUCTURALLY_SYMMETRIC:
182: case MAT_NOT_HERMITIAN:
183: case MAT_SYMMETRY_ETERNAL:
184: case MAT_NOT_SYMMETRY_ETERNAL:
185: break;
186: default:
187: SETERRQ(PETSC_ERR_SUP,"unknown option");
188: }
189: return(0);
190: }
194: PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v)
195: {
196: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
198: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2;
199: MatScalar *aa,*aa_i;
200: PetscScalar *v_i;
203: bs = A->bs;
204: ai = a->i;
205: aj = a->j;
206: aa = a->a;
207: bs2 = a->bs2;
208:
209: if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row);
210:
211: bn = row/bs; /* Block number */
212: bp = row % bs; /* Block position */
213: M = ai[bn+1] - ai[bn];
214: *ncols = bs*M;
215:
216: if (v) {
217: *v = 0;
218: if (*ncols) {
219: PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);
220: for (i=0; i<M; i++) { /* for each block in the block row */
221: v_i = *v + i*bs;
222: aa_i = aa + bs2*(ai[bn] + i);
223: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
224: }
225: }
226: }
228: if (cols) {
229: *cols = 0;
230: if (*ncols) {
231: PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);
232: for (i=0; i<M; i++) { /* for each block in the block row */
233: cols_i = *cols + i*bs;
234: itmp = bs*aj[ai[bn] + i];
235: for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
236: }
237: }
238: }
240: /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
241: /* this segment is currently removed, so only entries in the upper triangle are obtained */
242: #ifdef column_search
243: v_i = *v + M*bs;
244: cols_i = *cols + M*bs;
245: for (i=0; i<bn; i++){ /* for each block row */
246: M = ai[i+1] - ai[i];
247: for (j=0; j<M; j++){
248: itmp = aj[ai[i] + j]; /* block column value */
249: if (itmp == bn){
250: aa_i = aa + bs2*(ai[i] + j) + bs*bp;
251: for (k=0; k<bs; k++) {
252: *cols_i++ = i*bs+k;
253: *v_i++ = aa_i[k];
254: }
255: *ncols += bs;
256: break;
257: }
258: }
259: }
260: #endif
262: return(0);
263: }
267: PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
268: {
272: if (idx) {if (*idx) {PetscFree(*idx);}}
273: if (v) {if (*v) {PetscFree(*v);}}
274: return(0);
275: }
279: PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B)
280: {
283: MatDuplicate(A,MAT_COPY_VALUES,B);
284: return(0);
285: }
289: static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
290: {
291: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
292: PetscErrorCode ierr;
293: PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2;
294: char *name;
295: PetscViewerFormat format;
298: PetscObjectGetName((PetscObject)A,&name);
299: PetscViewerGetFormat(viewer,&format);
300: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
301: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
302: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
303: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
304: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
305: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
306: for (i=0; i<a->mbs; i++) {
307: for (j=0; j<bs; j++) {
308: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
309: for (k=a->i[i]; k<a->i[i+1]; k++) {
310: for (l=0; l<bs; l++) {
311: #if defined(PETSC_USE_COMPLEX)
312: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
313: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
314: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
315: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
316: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
317: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
318: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
319: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
320: }
321: #else
322: if (a->a[bs2*k + l*bs + j] != 0.0) {
323: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
324: }
325: #endif
326: }
327: }
328: PetscViewerASCIIPrintf(viewer,"\n");
329: }
330: }
331: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
332: } else {
333: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
334: for (i=0; i<a->mbs; i++) {
335: for (j=0; j<bs; j++) {
336: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
337: for (k=a->i[i]; k<a->i[i+1]; k++) {
338: for (l=0; l<bs; l++) {
339: #if defined(PETSC_USE_COMPLEX)
340: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
341: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
342: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
343: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
344: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
345: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
346: } else {
347: PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
348: }
349: #else
350: PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
351: #endif
352: }
353: }
354: PetscViewerASCIIPrintf(viewer,"\n");
355: }
356: }
357: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
358: }
359: PetscViewerFlush(viewer);
360: return(0);
361: }
365: static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
366: {
367: Mat A = (Mat) Aa;
368: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
370: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
371: PetscMPIInt rank;
372: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
373: MatScalar *aa;
374: MPI_Comm comm;
375: PetscViewer viewer;
378: /*
379: This is nasty. If this is called from an originally parallel matrix
380: then all processes call this,but only the first has the matrix so the
381: rest should return immediately.
382: */
383: PetscObjectGetComm((PetscObject)draw,&comm);
384: MPI_Comm_rank(comm,&rank);
385: if (rank) return(0);
387: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
389: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
390: PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
392: /* loop over matrix elements drawing boxes */
393: color = PETSC_DRAW_BLUE;
394: for (i=0,row=0; i<mbs; i++,row+=bs) {
395: for (j=a->i[i]; j<a->i[i+1]; j++) {
396: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
397: x_l = a->j[j]*bs; x_r = x_l + 1.0;
398: aa = a->a + j*bs2;
399: for (k=0; k<bs; k++) {
400: for (l=0; l<bs; l++) {
401: if (PetscRealPart(*aa++) >= 0.) continue;
402: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
403: }
404: }
405: }
406: }
407: color = PETSC_DRAW_CYAN;
408: for (i=0,row=0; i<mbs; i++,row+=bs) {
409: for (j=a->i[i]; j<a->i[i+1]; j++) {
410: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
411: x_l = a->j[j]*bs; x_r = x_l + 1.0;
412: aa = a->a + j*bs2;
413: for (k=0; k<bs; k++) {
414: for (l=0; l<bs; l++) {
415: if (PetscRealPart(*aa++) != 0.) continue;
416: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
417: }
418: }
419: }
420: }
422: color = PETSC_DRAW_RED;
423: for (i=0,row=0; i<mbs; i++,row+=bs) {
424: for (j=a->i[i]; j<a->i[i+1]; j++) {
425: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
426: x_l = a->j[j]*bs; x_r = x_l + 1.0;
427: aa = a->a + j*bs2;
428: for (k=0; k<bs; k++) {
429: for (l=0; l<bs; l++) {
430: if (PetscRealPart(*aa++) <= 0.) continue;
431: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
432: }
433: }
434: }
435: }
436: return(0);
437: }
441: static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
442: {
444: PetscReal xl,yl,xr,yr,w,h;
445: PetscDraw draw;
446: PetscTruth isnull;
450: PetscViewerDrawGetDraw(viewer,0,&draw);
451: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
453: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
454: xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
455: xr += w; yr += h; xl = -w; yl = -h;
456: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
457: PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);
458: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
459: return(0);
460: }
464: PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
465: {
467: PetscTruth iascii,isdraw;
470: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
471: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
472: if (iascii){
473: MatView_SeqSBAIJ_ASCII(A,viewer);
474: } else if (isdraw) {
475: MatView_SeqSBAIJ_Draw(A,viewer);
476: } else {
477: Mat B;
478: MatConvert(A,MATSEQAIJ,&B);
479: MatView(B,viewer);
480: MatDestroy(B);
481: }
482: return(0);
483: }
488: PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
489: {
490: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
491: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
492: PetscInt *ai = a->i,*ailen = a->ilen;
493: PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
494: MatScalar *ap,*aa = a->a,zero = 0.0;
497: for (k=0; k<m; k++) { /* loop over rows */
498: row = im[k]; brow = row/bs;
499: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
500: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
501: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
502: nrow = ailen[brow];
503: for (l=0; l<n; l++) { /* loop over columns */
504: if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]);
505: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
506: col = in[l] ;
507: bcol = col/bs;
508: cidx = col%bs;
509: ridx = row%bs;
510: high = nrow;
511: low = 0; /* assume unsorted */
512: while (high-low > 5) {
513: t = (low+high)/2;
514: if (rp[t] > bcol) high = t;
515: else low = t;
516: }
517: for (i=low; i<high; i++) {
518: if (rp[i] > bcol) break;
519: if (rp[i] == bcol) {
520: *v++ = ap[bs2*i+bs*cidx+ridx];
521: goto finished;
522: }
523: }
524: *v++ = zero;
525: finished:;
526: }
527: }
528: return(0);
529: }
534: PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
535: {
536: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
537: PetscErrorCode ierr;
538: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
539: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
540: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
541: PetscTruth roworiented=a->roworiented;
542: const MatScalar *value = v;
543: MatScalar *ap,*aa = a->a,*bap;
544:
546: if (roworiented) {
547: stepval = (n-1)*bs;
548: } else {
549: stepval = (m-1)*bs;
550: }
551: for (k=0; k<m; k++) { /* loop over added rows */
552: row = im[k];
553: if (row < 0) continue;
554: #if defined(PETSC_USE_BOPT_g)
555: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
556: #endif
557: rp = aj + ai[row];
558: ap = aa + bs2*ai[row];
559: rmax = imax[row];
560: nrow = ailen[row];
561: low = 0;
562: for (l=0; l<n; l++) { /* loop over added columns */
563: if (in[l] < 0) continue;
564: col = in[l];
565: #if defined(PETSC_USE_BOPT_g)
566: if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1);
567: #endif
568: if (col < row) continue; /* ignore lower triangular block */
569: if (roworiented) {
570: value = v + k*(stepval+bs)*bs + l*bs;
571: } else {
572: value = v + l*(stepval+bs)*bs + k*bs;
573: }
574: if (!sorted) low = 0; high = nrow;
575: while (high-low > 7) {
576: t = (low+high)/2;
577: if (rp[t] > col) high = t;
578: else low = t;
579: }
580: for (i=low; i<high; i++) {
581: if (rp[i] > col) break;
582: if (rp[i] == col) {
583: bap = ap + bs2*i;
584: if (roworiented) {
585: if (is == ADD_VALUES) {
586: for (ii=0; ii<bs; ii++,value+=stepval) {
587: for (jj=ii; jj<bs2; jj+=bs) {
588: bap[jj] += *value++;
589: }
590: }
591: } else {
592: for (ii=0; ii<bs; ii++,value+=stepval) {
593: for (jj=ii; jj<bs2; jj+=bs) {
594: bap[jj] = *value++;
595: }
596: }
597: }
598: } else {
599: if (is == ADD_VALUES) {
600: for (ii=0; ii<bs; ii++,value+=stepval) {
601: for (jj=0; jj<bs; jj++) {
602: *bap++ += *value++;
603: }
604: }
605: } else {
606: for (ii=0; ii<bs; ii++,value+=stepval) {
607: for (jj=0; jj<bs; jj++) {
608: *bap++ = *value++;
609: }
610: }
611: }
612: }
613: goto noinsert2;
614: }
615: }
616: if (nonew == 1) goto noinsert2;
617: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
618: if (nrow >= rmax) {
619: /* there is no extra room in row, therefore enlarge */
620: PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
621: MatScalar *new_a;
623: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
625: /* malloc new storage space */
626: len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
627: PetscMalloc(len,&new_a);
628: new_j = (PetscInt*)(new_a + bs2*new_nz);
629: new_i = new_j + new_nz;
631: /* copy over old data into new slots */
632: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
633: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
634: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));
635: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
636: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));
637: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
638: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
639: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
640: /* free up old matrix storage */
641: PetscFree(a->a);
642: if (!a->singlemalloc) {
643: PetscFree(a->i);
644: PetscFree(a->j);
645: }
646: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
647: a->singlemalloc = PETSC_TRUE;
649: rp = aj + ai[row]; ap = aa + bs2*ai[row];
650: rmax = imax[row] = imax[row] + CHUNKSIZE;
651: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
652: a->maxnz += bs2*CHUNKSIZE;
653: a->reallocs++;
654: a->nz++;
655: }
656: N = nrow++ - 1;
657: /* shift up all the later entries in this row */
658: for (ii=N; ii>=i; ii--) {
659: rp[ii+1] = rp[ii];
660: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
661: }
662: if (N >= i) {
663: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
664: }
665: rp[i] = col;
666: bap = ap + bs2*i;
667: if (roworiented) {
668: for (ii=0; ii<bs; ii++,value+=stepval) {
669: for (jj=ii; jj<bs2; jj+=bs) {
670: bap[jj] = *value++;
671: }
672: }
673: } else {
674: for (ii=0; ii<bs; ii++,value+=stepval) {
675: for (jj=0; jj<bs; jj++) {
676: *bap++ = *value++;
677: }
678: }
679: }
680: noinsert2:;
681: low = i;
682: }
683: ailen[row] = nrow;
684: }
685: return(0);
686: }
690: PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
691: {
692: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
694: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
695: PetscInt m = A->m,*ip,N,*ailen = a->ilen;
696: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
697: MatScalar *aa = a->a,*ap;
700: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
702: if (m) rmax = ailen[0];
703: for (i=1; i<mbs; i++) {
704: /* move each row back by the amount of empty slots (fshift) before it*/
705: fshift += imax[i-1] - ailen[i-1];
706: rmax = PetscMax(rmax,ailen[i]);
707: if (fshift) {
708: ip = aj + ai[i]; ap = aa + bs2*ai[i];
709: N = ailen[i];
710: for (j=0; j<N; j++) {
711: ip[j-fshift] = ip[j];
712: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
713: }
714: }
715: ai[i] = ai[i-1] + ailen[i-1];
716: }
717: if (mbs) {
718: fshift += imax[mbs-1] - ailen[mbs-1];
719: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
720: }
721: /* reset ilen and imax for each row */
722: for (i=0; i<mbs; i++) {
723: ailen[i] = imax[i] = ai[i+1] - ai[i];
724: }
725: a->nz = ai[mbs];
727: /* diagonals may have moved, reset it */
728: if (a->diag) {
729: PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));
730: }
731: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",
732: m,A->m,A->bs,fshift*bs2,a->nz*bs2);
733: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",
734: a->reallocs);
735: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax);
736: a->reallocs = 0;
737: A->info.nz_unneeded = (PetscReal)fshift*bs2;
738:
739: return(0);
740: }
742: /*
743: This function returns an array of flags which indicate the locations of contiguous
744: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
745: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
746: Assume: sizes should be long enough to hold all the values.
747: */
750: PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
751: {
752: PetscInt i,j,k,row;
753: PetscTruth flg;
756: for (i=0,j=0; i<n; j++) {
757: row = idx[i];
758: if (row%bs!=0) { /* Not the begining of a block */
759: sizes[j] = 1;
760: i++;
761: } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
762: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
763: i++;
764: } else { /* Begining of the block, so check if the complete block exists */
765: flg = PETSC_TRUE;
766: for (k=1; k<bs; k++) {
767: if (row+k != idx[i+k]) { /* break in the block */
768: flg = PETSC_FALSE;
769: break;
770: }
771: }
772: if (flg == PETSC_TRUE) { /* No break in the bs */
773: sizes[j] = bs;
774: i+= bs;
775: } else {
776: sizes[j] = 1;
777: i++;
778: }
779: }
780: }
781: *bs_max = j;
782: return(0);
783: }
784:
787: PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag)
788: {
790: SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
791: }
793: /* Only add/insert a(i,j) with i<=j (blocks).
794: Any a(i,j) with i>j input by user is ingored.
795: */
799: PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
800: {
801: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
803: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
804: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
805: PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
806: PetscInt ridx,cidx,bs2=a->bs2;
807: MatScalar *ap,value,*aa=a->a,*bap;
811: for (k=0; k<m; k++) { /* loop over added rows */
812: row = im[k]; /* row number */
813: brow = row/bs; /* block row number */
814: if (row < 0) continue;
815: #if defined(PETSC_USE_BOPT_g)
816: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
817: #endif
818: rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
819: ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
820: rmax = imax[brow]; /* maximum space allocated for this row */
821: nrow = ailen[brow]; /* actual length of this row */
822: low = 0;
824: for (l=0; l<n; l++) { /* loop over added columns */
825: if (in[l] < 0) continue;
826: #if defined(PETSC_USE_BOPT_g)
827: if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1);
828: #endif
829: col = in[l];
830: bcol = col/bs; /* block col number */
832: if (brow <= bcol){
833: ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
834: if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
835: /* element value a(k,l) */
836: if (roworiented) {
837: value = v[l + k*n];
838: } else {
839: value = v[k + l*m];
840: }
842: /* move pointer bap to a(k,l) quickly and add/insert value */
843: if (!sorted) low = 0; high = nrow;
844: while (high-low > 7) {
845: t = (low+high)/2;
846: if (rp[t] > bcol) high = t;
847: else low = t;
848: }
849: for (i=low; i<high; i++) {
850: /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */
851: if (rp[i] > bcol) break;
852: if (rp[i] == bcol) {
853: bap = ap + bs2*i + bs*cidx + ridx;
854: if (is == ADD_VALUES) *bap += value;
855: else *bap = value;
856: /* for diag block, add/insert its symmetric element a(cidx,ridx) */
857: if (brow == bcol && ridx < cidx){
858: bap = ap + bs2*i + bs*ridx + cidx;
859: if (is == ADD_VALUES) *bap += value;
860: else *bap = value;
861: }
862: goto noinsert1;
863: }
864: }
865:
866: if (nonew == 1) goto noinsert1;
867: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
868: if (nrow >= rmax) {
869: /* there is no extra room in row, therefore enlarge */
870: PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
871: MatScalar *new_a;
873: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
875: /* Malloc new storage space */
876: len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt);
877: PetscMalloc(len,&new_a);
878: new_j = (PetscInt*)(new_a + bs2*new_nz);
879: new_i = new_j + new_nz;
881: /* copy over old data into new slots */
882: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
883: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
884: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));
885: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
886: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));
887: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
888: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
889: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
890: /* free up old matrix storage */
891: PetscFree(a->a);
892: if (!a->singlemalloc) {
893: PetscFree(a->i);
894: PetscFree(a->j);
895: }
896: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
897: a->singlemalloc = PETSC_TRUE;
899: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
900: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
901: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));
902: a->maxnz += bs2*CHUNKSIZE;
903: a->reallocs++;
904: a->nz++;
905: }
907: N = nrow++ - 1;
908: /* shift up all the later entries in this row */
909: for (ii=N; ii>=i; ii--) {
910: rp[ii+1] = rp[ii];
911: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
912: }
913: if (N>=i) {
914: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
915: }
916: rp[i] = bcol;
917: ap[bs2*i + bs*cidx + ridx] = value;
918: noinsert1:;
919: low = i;
920: /* } */
921: }
922: } /* end of if .. if.. */
923: } /* end of loop over added columns */
924: ailen[brow] = nrow;
925: } /* end of loop over added rows */
927: return(0);
928: }
930: EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*);
931: EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*);
932: EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt);
933: EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*);
934: EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
935: EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat);
936: EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
937: EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
938: EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec);
939: EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
940: EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
941: EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat);
942: EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec);
944: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
945: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
946: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
947: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
948: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
949: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
950: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
951: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
953: EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs);
955: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
956: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
957: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
958: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
959: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
960: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
961: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
962: EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
964: EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
965: EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
966: EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
967: EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
968: EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
969: EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
970: EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
971: EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
972: EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*);
974: EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
975: EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
976: EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
977: EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
978: EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
979: EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
980: EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
981: EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
983: EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
984: EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
985: EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
986: EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
987: EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
988: EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
989: EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
990: EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
992: #ifdef HAVE_MatICCFactor
993: /* modified from MatILUFactor_SeqSBAIJ, needs further work! */
996: PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info)
997: {
998: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
999: Mat outA;
1001: PetscTruth row_identity,col_identity;
1004: outA = inA;
1005: inA->factor = FACTOR_CHOLESKY;
1007: if (!a->diag) {
1008: MatMarkDiagonal_SeqSBAIJ(inA);
1009: }
1010: /*
1011: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1012: for ILU(0) factorization with natural ordering
1013: */
1014: switch (a->bs) {
1015: case 1:
1016: inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1017: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1018: inA->ops->solves = MatSolves_SeqSBAIJ_1;
1019: PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n");
1020: case 2:
1021: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1022: inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1023: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1024: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1025: break;
1026: case 3:
1027: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1028: inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1029: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1030: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1031: break;
1032: case 4:
1033: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1034: inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1035: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1036: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1037: break;
1038: case 5:
1039: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1040: inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1041: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1042: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1043: break;
1044: case 6:
1045: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1046: inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1047: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1048: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1049: break;
1050: case 7:
1051: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1052: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1053: inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1054: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1055: break;
1056: default:
1057: a->row = row;
1058: a->icol = col;
1059: PetscObjectReference((PetscObject)row);
1060: PetscObjectReference((PetscObject)col);
1062: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1063: ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));
1064: PetscLogObjectParent(inA,a->icol);
1066: if (!a->solve_work) {
1067: PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1068: PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1069: }
1070: }
1072: MatCholeskyFactorNumeric(inA,&outA);
1074: return(0);
1075: }
1076: #endif
1080: PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A)
1081: {
1082: static PetscTruth called = PETSC_FALSE;
1083: MPI_Comm comm = A->comm;
1084: PetscErrorCode ierr;
1087: if (called) {return(0);} else called = PETSC_TRUE;
1088: (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");
1089: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");
1090: return(0);
1091: }
1096: PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices)
1097: {
1098: Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1099: PetscInt i,nz,n;
1102: nz = baij->maxnz;
1103: n = mat->n;
1104: for (i=0; i<nz; i++) {
1105: baij->j[i] = indices[i];
1106: }
1107: baij->nz = nz;
1108: for (i=0; i<n; i++) {
1109: baij->ilen[i] = baij->imax[i];
1110: }
1112: return(0);
1113: }
1118: /*@
1119: MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1120: in the matrix.
1122: Input Parameters:
1123: + mat - the SeqSBAIJ matrix
1124: - indices - the column indices
1126: Level: advanced
1128: Notes:
1129: This can be called if you have precomputed the nonzero structure of the
1130: matrix and want to provide it to the matrix object to improve the performance
1131: of the MatSetValues() operation.
1133: You MUST have set the correct numbers of nonzeros per row in the call to
1134: MatCreateSeqSBAIJ().
1136: MUST be called before any calls to MatSetValues()
1138: .seealso: MatCreateSeqSBAIJ
1139: @*/
1140: PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1141: {
1142: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1147: PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);
1148: if (f) {
1149: (*f)(mat,indices);
1150: } else {
1151: SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices");
1152: }
1153: return(0);
1154: }
1158: PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A)
1159: {
1163: MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1164: return(0);
1165: }
1169: PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1170: {
1171: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1173: *array = a->a;
1174: return(0);
1175: }
1179: PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[])
1180: {
1182: return(0);
1183: }
1185: #include petscblaslapack.h
1188: PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1189: {
1190: Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data;
1192: PetscInt i,bs=Y->bs,bs2,j;
1193: PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1;
1196: if (str == SAME_NONZERO_PATTERN) {
1197: BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1198: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1199: if (y->xtoy && y->XtoY != X) {
1200: PetscFree(y->xtoy);
1201: MatDestroy(y->XtoY);
1202: }
1203: if (!y->xtoy) { /* get xtoy */
1204: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1205: y->XtoY = X;
1206: }
1207: bs2 = bs*bs;
1208: for (i=0; i<x->nz; i++) {
1209: j = 0;
1210: while (j < bs2){
1211: y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1212: j++;
1213: }
1214: }
1215: PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1216: } else {
1217: MatAXPY_Basic(a,X,Y,str);
1218: }
1219: return(0);
1220: }
1224: PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg)
1225: {
1227: *flg = PETSC_TRUE;
1228: return(0);
1229: }
1233: PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg)
1234: {
1236: *flg = PETSC_TRUE;
1237: return(0);
1238: }
1242: PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg)
1243: {
1245: *flg = PETSC_FALSE;
1246: return(0);
1247: }
1249: /* -------------------------------------------------------------------*/
1250: static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1251: MatGetRow_SeqSBAIJ,
1252: MatRestoreRow_SeqSBAIJ,
1253: MatMult_SeqSBAIJ_N,
1254: /* 4*/ MatMultAdd_SeqSBAIJ_N,
1255: MatMult_SeqSBAIJ_N,
1256: MatMultAdd_SeqSBAIJ_N,
1257: MatSolve_SeqSBAIJ_N,
1258: 0,
1259: 0,
1260: /*10*/ 0,
1261: 0,
1262: MatCholeskyFactor_SeqSBAIJ,
1263: MatRelax_SeqSBAIJ,
1264: MatTranspose_SeqSBAIJ,
1265: /*15*/ MatGetInfo_SeqSBAIJ,
1266: MatEqual_SeqSBAIJ,
1267: MatGetDiagonal_SeqSBAIJ,
1268: MatDiagonalScale_SeqSBAIJ,
1269: MatNorm_SeqSBAIJ,
1270: /*20*/ 0,
1271: MatAssemblyEnd_SeqSBAIJ,
1272: 0,
1273: MatSetOption_SeqSBAIJ,
1274: MatZeroEntries_SeqSBAIJ,
1275: /*25*/ MatZeroRows_SeqSBAIJ,
1276: 0,
1277: 0,
1278: MatCholeskyFactorSymbolic_SeqSBAIJ,
1279: MatCholeskyFactorNumeric_SeqSBAIJ_N,
1280: /*30*/ MatSetUpPreallocation_SeqSBAIJ,
1281: 0,
1282: MatICCFactorSymbolic_SeqSBAIJ,
1283: MatGetArray_SeqSBAIJ,
1284: MatRestoreArray_SeqSBAIJ,
1285: /*35*/ MatDuplicate_SeqSBAIJ,
1286: 0,
1287: 0,
1288: 0,
1289: 0,
1290: /*40*/ MatAXPY_SeqSBAIJ,
1291: MatGetSubMatrices_SeqSBAIJ,
1292: MatIncreaseOverlap_SeqSBAIJ,
1293: MatGetValues_SeqSBAIJ,
1294: 0,
1295: /*45*/ MatPrintHelp_SeqSBAIJ,
1296: MatScale_SeqSBAIJ,
1297: 0,
1298: 0,
1299: 0,
1300: /*50*/ 0,
1301: MatGetRowIJ_SeqSBAIJ,
1302: MatRestoreRowIJ_SeqSBAIJ,
1303: 0,
1304: 0,
1305: /*55*/ 0,
1306: 0,
1307: 0,
1308: 0,
1309: MatSetValuesBlocked_SeqSBAIJ,
1310: /*60*/ MatGetSubMatrix_SeqSBAIJ,
1311: 0,
1312: 0,
1313: MatGetPetscMaps_Petsc,
1314: 0,
1315: /*65*/ 0,
1316: 0,
1317: 0,
1318: 0,
1319: 0,
1320: /*70*/ MatGetRowMax_SeqSBAIJ,
1321: 0,
1322: 0,
1323: 0,
1324: 0,
1325: /*75*/ 0,
1326: 0,
1327: 0,
1328: 0,
1329: 0,
1330: /*80*/ 0,
1331: 0,
1332: 0,
1333: #if !defined(PETSC_USE_COMPLEX)
1334: MatGetInertia_SeqSBAIJ,
1335: #else
1336: 0,
1337: #endif
1338: MatLoad_SeqSBAIJ,
1339: /*85*/ MatIsSymmetric_SeqSBAIJ,
1340: MatIsHermitian_SeqSBAIJ,
1341: MatIsStructurallySymmetric_SeqSBAIJ,
1342: 0,
1343: 0,
1344: /*90*/ 0,
1345: 0,
1346: 0,
1347: 0,
1348: 0,
1349: /*95*/ 0,
1350: 0,
1351: 0,
1352: 0};
1357: PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat)
1358: {
1359: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1360: PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2;
1364: if (aij->nonew != 1) {
1365: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1366: }
1368: /* allocate space for values if not already there */
1369: if (!aij->saved_values) {
1370: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1371: }
1373: /* copy values over */
1374: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1375: return(0);
1376: }
1382: PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat)
1383: {
1384: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1386: PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2;
1389: if (aij->nonew != 1) {
1390: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1391: }
1392: if (!aij->saved_values) {
1393: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1394: }
1396: /* copy values over */
1397: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1398: return(0);
1399: }
1405: PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
1406: {
1407: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1409: PetscInt i,len,mbs,bs2;
1410: PetscTruth flg;
1413: B->preallocated = PETSC_TRUE;
1414: PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);
1415: mbs = B->m/bs;
1416: bs2 = bs*bs;
1418: if (mbs*bs != B->m) {
1419: SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1420: }
1422: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1423: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
1424: if (nnz) {
1425: for (i=0; i<mbs; i++) {
1426: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
1427: if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs);
1428: }
1429: }
1431: PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);
1432: if (!flg) {
1433: switch (bs) {
1434: case 1:
1435: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1436: B->ops->solve = MatSolve_SeqSBAIJ_1;
1437: B->ops->solves = MatSolves_SeqSBAIJ_1;
1438: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1439: B->ops->mult = MatMult_SeqSBAIJ_1;
1440: B->ops->multadd = MatMultAdd_SeqSBAIJ_1;
1441: break;
1442: case 2:
1443: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1444: B->ops->solve = MatSolve_SeqSBAIJ_2;
1445: B->ops->solvetranspose = MatSolve_SeqSBAIJ_2;
1446: B->ops->mult = MatMult_SeqSBAIJ_2;
1447: B->ops->multadd = MatMultAdd_SeqSBAIJ_2;
1448: break;
1449: case 3:
1450: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1451: B->ops->solve = MatSolve_SeqSBAIJ_3;
1452: B->ops->solvetranspose = MatSolve_SeqSBAIJ_3;
1453: B->ops->mult = MatMult_SeqSBAIJ_3;
1454: B->ops->multadd = MatMultAdd_SeqSBAIJ_3;
1455: break;
1456: case 4:
1457: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1458: B->ops->solve = MatSolve_SeqSBAIJ_4;
1459: B->ops->solvetranspose = MatSolve_SeqSBAIJ_4;
1460: B->ops->mult = MatMult_SeqSBAIJ_4;
1461: B->ops->multadd = MatMultAdd_SeqSBAIJ_4;
1462: break;
1463: case 5:
1464: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1465: B->ops->solve = MatSolve_SeqSBAIJ_5;
1466: B->ops->solvetranspose = MatSolve_SeqSBAIJ_5;
1467: B->ops->mult = MatMult_SeqSBAIJ_5;
1468: B->ops->multadd = MatMultAdd_SeqSBAIJ_5;
1469: break;
1470: case 6:
1471: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1472: B->ops->solve = MatSolve_SeqSBAIJ_6;
1473: B->ops->solvetranspose = MatSolve_SeqSBAIJ_6;
1474: B->ops->mult = MatMult_SeqSBAIJ_6;
1475: B->ops->multadd = MatMultAdd_SeqSBAIJ_6;
1476: break;
1477: case 7:
1478: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1479: B->ops->solve = MatSolve_SeqSBAIJ_7;
1480: B->ops->solvetranspose = MatSolve_SeqSBAIJ_7;
1481: B->ops->mult = MatMult_SeqSBAIJ_7;
1482: B->ops->multadd = MatMultAdd_SeqSBAIJ_7;
1483: break;
1484: }
1485: }
1487: b->mbs = mbs;
1488: b->nbs = mbs;
1489: PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);
1490: if (!nnz) {
1491: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1492: else if (nz <= 0) nz = 1;
1493: for (i=0; i<mbs; i++) {
1494: b->imax[i] = nz;
1495: }
1496: nz = nz*mbs; /* total nz */
1497: } else {
1498: nz = 0;
1499: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1500: }
1501: /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1503: /* allocate the matrix space */
1504: len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt);
1505: PetscMalloc(len,&b->a);
1506: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1507: b->j = (PetscInt*)(b->a + nz*bs2);
1508: PetscMemzero(b->j,nz*sizeof(PetscInt));
1509: b->i = b->j + nz;
1510: b->singlemalloc = PETSC_TRUE;
1512: /* pointer to beginning of each row */
1513: b->i[0] = 0;
1514: for (i=1; i<mbs+1; i++) {
1515: b->i[i] = b->i[i-1] + b->imax[i-1];
1516: }
1518: /* b->ilen will count nonzeros in each block row so far. */
1519: PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);
1520: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1521: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1522:
1523: B->bs = bs;
1524: b->bs2 = bs2;
1525: b->nz = 0;
1526: b->maxnz = nz*bs2;
1527:
1528: b->inew = 0;
1529: b->jnew = 0;
1530: b->anew = 0;
1531: b->a2anew = 0;
1532: b->permute = PETSC_FALSE;
1533: return(0);
1534: }
1538: EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1539: EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*);
1542: /*MC
1543: MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices,
1544: based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored.
1546: Options Database Keys:
1547: . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions()
1549: Level: beginner
1551: .seealso: MatCreateSeqSBAIJ
1552: M*/
1557: PetscErrorCode MatCreate_SeqSBAIJ(Mat B)
1558: {
1559: Mat_SeqSBAIJ *b;
1561: PetscMPIInt size;
1564: MPI_Comm_size(B->comm,&size);
1565: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1566: B->m = B->M = PetscMax(B->m,B->M);
1567: B->n = B->N = PetscMax(B->n,B->N);
1569: PetscNew(Mat_SeqSBAIJ,&b);
1570: B->data = (void*)b;
1571: PetscMemzero(b,sizeof(Mat_SeqSBAIJ));
1572: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1573: B->ops->destroy = MatDestroy_SeqSBAIJ;
1574: B->ops->view = MatView_SeqSBAIJ;
1575: B->factor = 0;
1576: B->lupivotthreshold = 1.0;
1577: B->mapping = 0;
1578: b->row = 0;
1579: b->icol = 0;
1580: b->reallocs = 0;
1581: b->saved_values = 0;
1582:
1583: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1584: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1586: b->sorted = PETSC_FALSE;
1587: b->roworiented = PETSC_TRUE;
1588: b->nonew = 0;
1589: b->diag = 0;
1590: b->solve_work = 0;
1591: b->mult_work = 0;
1592: B->spptr = 0;
1593: b->keepzeroedrows = PETSC_FALSE;
1594: b->xtoy = 0;
1595: b->XtoY = 0;
1596:
1597: b->inew = 0;
1598: b->jnew = 0;
1599: b->anew = 0;
1600: b->a2anew = 0;
1601: b->permute = PETSC_FALSE;
1603: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1604: "MatStoreValues_SeqSBAIJ",
1605: MatStoreValues_SeqSBAIJ);
1606: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1607: "MatRetrieveValues_SeqSBAIJ",
1608: (void*)MatRetrieveValues_SeqSBAIJ);
1609: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1610: "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1611: MatSeqSBAIJSetColumnIndices_SeqSBAIJ);
1612: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C",
1613: "MatConvert_SeqSBAIJ_SeqAIJ",
1614: MatConvert_SeqSBAIJ_SeqAIJ);
1615: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C",
1616: "MatConvert_SeqSBAIJ_SeqBAIJ",
1617: MatConvert_SeqSBAIJ_SeqBAIJ);
1618: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",
1619: "MatSeqSBAIJSetPreallocation_SeqSBAIJ",
1620: MatSeqSBAIJSetPreallocation_SeqSBAIJ);
1622: B->symmetric = PETSC_TRUE;
1623: B->structurally_symmetric = PETSC_TRUE;
1624: B->symmetric_set = PETSC_TRUE;
1625: B->structurally_symmetric_set = PETSC_TRUE;
1626: return(0);
1627: }
1632: /*@C
1633: MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1634: compressed row) format. For good matrix assembly performance the
1635: user should preallocate the matrix storage by setting the parameter nz
1636: (or the array nnz). By setting these parameters accurately, performance
1637: during matrix assembly can be increased by more than a factor of 50.
1639: Collective on Mat
1641: Input Parameters:
1642: + A - the symmetric matrix
1643: . bs - size of block
1644: . nz - number of block nonzeros per block row (same for all rows)
1645: - nnz - array containing the number of block nonzeros in the upper triangular plus
1646: diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1648: Options Database Keys:
1649: . -mat_no_unroll - uses code that does not unroll the loops in the
1650: block calculations (much slower)
1651: . -mat_block_size - size of the blocks to use
1653: Level: intermediate
1655: Notes:
1656: Specify the preallocated storage with either nz or nnz (not both).
1657: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1658: allocation. For additional details, see the users manual chapter on
1659: matrices.
1661: If the nnz parameter is given then the nz parameter is ignored
1664: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1665: @*/
1666: PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
1667: {
1668: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
1671: PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);
1672: if (f) {
1673: (*f)(B,bs,nz,nnz);
1674: }
1675: return(0);
1676: }
1680: /*@C
1681: MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1682: compressed row) format. For good matrix assembly performance the
1683: user should preallocate the matrix storage by setting the parameter nz
1684: (or the array nnz). By setting these parameters accurately, performance
1685: during matrix assembly can be increased by more than a factor of 50.
1687: Collective on MPI_Comm
1689: Input Parameters:
1690: + comm - MPI communicator, set to PETSC_COMM_SELF
1691: . bs - size of block
1692: . m - number of rows, or number of columns
1693: . nz - number of block nonzeros per block row (same for all rows)
1694: - nnz - array containing the number of block nonzeros in the upper triangular plus
1695: diagonal portion of each block (possibly different for each block row) or PETSC_NULL
1697: Output Parameter:
1698: . A - the symmetric matrix
1700: Options Database Keys:
1701: . -mat_no_unroll - uses code that does not unroll the loops in the
1702: block calculations (much slower)
1703: . -mat_block_size - size of the blocks to use
1705: Level: intermediate
1707: Notes:
1709: Specify the preallocated storage with either nz or nnz (not both).
1710: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1711: allocation. For additional details, see the users manual chapter on
1712: matrices.
1714: If the nnz parameter is given then the nz parameter is ignored
1716: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1717: @*/
1718: PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1719: {
1721:
1723: MatCreate(comm,m,n,m,n,A);
1724: MatSetType(*A,MATSEQSBAIJ);
1725: MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);
1726: return(0);
1727: }
1731: PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1732: {
1733: Mat C;
1734: Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1736: PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1739: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1741: *B = 0;
1742: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1743: MatSetType(C,A->type_name);
1744: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1745: c = (Mat_SeqSBAIJ*)C->data;
1747: C->preallocated = PETSC_TRUE;
1748: C->factor = A->factor;
1749: c->row = 0;
1750: c->icol = 0;
1751: c->saved_values = 0;
1752: c->keepzeroedrows = a->keepzeroedrows;
1753: C->assembled = PETSC_TRUE;
1755: C->M = A->M;
1756: C->N = A->N;
1757: C->bs = A->bs;
1758: c->bs2 = a->bs2;
1759: c->mbs = a->mbs;
1760: c->nbs = a->nbs;
1762: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);
1763: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);
1764: for (i=0; i<mbs; i++) {
1765: c->imax[i] = a->imax[i];
1766: c->ilen[i] = a->ilen[i];
1767: }
1769: /* allocate the matrix space */
1770: c->singlemalloc = PETSC_TRUE;
1771: len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt));
1772: PetscMalloc(len,&c->a);
1773: c->j = (PetscInt*)(c->a + nz*bs2);
1774: c->i = c->j + nz;
1775: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
1776: if (mbs > 0) {
1777: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
1778: if (cpvalues == MAT_COPY_VALUES) {
1779: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1780: } else {
1781: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1782: }
1783: }
1785: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1786: c->sorted = a->sorted;
1787: c->roworiented = a->roworiented;
1788: c->nonew = a->nonew;
1790: if (a->diag) {
1791: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
1792: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
1793: for (i=0; i<mbs; i++) {
1794: c->diag[i] = a->diag[i];
1795: }
1796: } else c->diag = 0;
1797: c->nz = a->nz;
1798: c->maxnz = a->maxnz;
1799: c->solve_work = 0;
1800: c->mult_work = 0;
1801: *B = C;
1802: PetscFListDuplicate(A->qlist,&C->qlist);
1803: return(0);
1804: }
1808: PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1809: {
1810: Mat_SeqSBAIJ *a;
1811: Mat B;
1813: int fd;
1814: PetscMPIInt size;
1815: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
1816: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1817: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
1818: PetscInt *masked,nmask,tmp,bs2,ishift;
1819: PetscScalar *aa;
1820: MPI_Comm comm = ((PetscObject)viewer)->comm;
1823: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1824: bs2 = bs*bs;
1826: MPI_Comm_size(comm,&size);
1827: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1828: PetscViewerBinaryGetDescriptor(viewer,&fd);
1829: PetscBinaryRead(fd,header,4,PETSC_INT);
1830: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1831: M = header[1]; N = header[2]; nz = header[3];
1833: if (header[3] < 0) {
1834: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1835: }
1837: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1839: /*
1840: This code adds extra rows to make sure the number of rows is
1841: divisible by the blocksize
1842: */
1843: mbs = M/bs;
1844: extra_rows = bs - M + bs*(mbs);
1845: if (extra_rows == bs) extra_rows = 0;
1846: else mbs++;
1847: if (extra_rows) {
1848: PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n");
1849: }
1851: /* read in row lengths */
1852: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
1853: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1854: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1856: /* read in column indices */
1857: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
1858: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1859: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1861: /* loop over row lengths determining block row lengths */
1862: PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);
1863: PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));
1864: PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
1865: PetscMemzero(mask,mbs*sizeof(PetscInt));
1866: masked = mask + mbs;
1867: rowcount = 0; nzcount = 0;
1868: for (i=0; i<mbs; i++) {
1869: nmask = 0;
1870: for (j=0; j<bs; j++) {
1871: kmax = rowlengths[rowcount];
1872: for (k=0; k<kmax; k++) {
1873: tmp = jj[nzcount++]/bs; /* block col. index */
1874: if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1875: }
1876: rowcount++;
1877: }
1878: s_browlengths[i] += nmask;
1879:
1880: /* zero out the mask elements we set */
1881: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1882: }
1884: /* create our matrix */
1885: MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);
1886: MatSetType(B,type);
1887: MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);
1888: a = (Mat_SeqSBAIJ*)B->data;
1890: /* set matrix "i" values */
1891: a->i[0] = 0;
1892: for (i=1; i<= mbs; i++) {
1893: a->i[i] = a->i[i-1] + s_browlengths[i-1];
1894: a->ilen[i-1] = s_browlengths[i-1];
1895: }
1896: a->nz = a->i[mbs];
1898: /* read in nonzero values */
1899: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1900: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1901: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1903: /* set "a" and "j" values into matrix */
1904: nzcount = 0; jcount = 0;
1905: for (i=0; i<mbs; i++) {
1906: nzcountb = nzcount;
1907: nmask = 0;
1908: for (j=0; j<bs; j++) {
1909: kmax = rowlengths[i*bs+j];
1910: for (k=0; k<kmax; k++) {
1911: tmp = jj[nzcount++]/bs; /* block col. index */
1912: if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1913: }
1914: }
1915: /* sort the masked values */
1916: PetscSortInt(nmask,masked);
1918: /* set "j" values into matrix */
1919: maskcount = 1;
1920: for (j=0; j<nmask; j++) {
1921: a->j[jcount++] = masked[j];
1922: mask[masked[j]] = maskcount++;
1923: }
1925: /* set "a" values into matrix */
1926: ishift = bs2*a->i[i];
1927: for (j=0; j<bs; j++) {
1928: kmax = rowlengths[i*bs+j];
1929: for (k=0; k<kmax; k++) {
1930: tmp = jj[nzcountb]/bs ; /* block col. index */
1931: if (tmp >= i){
1932: block = mask[tmp] - 1;
1933: point = jj[nzcountb] - bs*tmp;
1934: idx = ishift + bs2*block + j + bs*point;
1935: a->a[idx] = aa[nzcountb];
1936: }
1937: nzcountb++;
1938: }
1939: }
1940: /* zero out the mask elements we set */
1941: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1942: }
1943: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1945: PetscFree(rowlengths);
1946: PetscFree(s_browlengths);
1947: PetscFree(aa);
1948: PetscFree(jj);
1949: PetscFree(mask);
1951: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1952: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1953: MatView_Private(B);
1954: *A = B;
1955: return(0);
1956: }
1960: PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1961: {
1962: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1963: MatScalar *aa=a->a,*v,*v1;
1964: PetscScalar *x,*b,*t,sum,d;
1966: PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j;
1967: PetscInt nz,nz1,*vj,*vj1,i;
1970: its = its*lits;
1971: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1973: if (bs > 1)
1974: SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1976: VecGetArray(xx,&x);
1977: if (xx != bb) {
1978: VecGetArray(bb,&b);
1979: } else {
1980: b = x;
1981: }
1983: PetscMalloc(m*sizeof(PetscScalar),&t);
1984:
1985: if (flag & SOR_ZERO_INITIAL_GUESS) {
1986: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1987: for (i=0; i<m; i++)
1988: t[i] = b[i];
1990: for (i=0; i<m; i++){
1991: d = *(aa + ai[i]); /* diag[i] */
1992: v = aa + ai[i] + 1;
1993: vj = aj + ai[i] + 1;
1994: nz = ai[i+1] - ai[i] - 1;
1995: x[i] = omega*t[i]/d;
1996: while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1997: PetscLogFlops(2*nz-1);
1998: }
1999: }
2001: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2002: for (i=0; i<m; i++)
2003: t[i] = b[i];
2004:
2005: for (i=0; i<m-1; i++){ /* update rhs */
2006: v = aa + ai[i] + 1;
2007: vj = aj + ai[i] + 1;
2008: nz = ai[i+1] - ai[i] - 1;
2009: while (nz--) t[*vj++] -= x[i]*(*v++);
2010: PetscLogFlops(2*nz-1);
2011: }
2012: for (i=m-1; i>=0; i--){
2013: d = *(aa + ai[i]);
2014: v = aa + ai[i] + 1;
2015: vj = aj + ai[i] + 1;
2016: nz = ai[i+1] - ai[i] - 1;
2017: sum = t[i];
2018: while (nz--) sum -= x[*vj++]*(*v++);
2019: PetscLogFlops(2*nz-1);
2020: x[i] = (1-omega)*x[i] + omega*sum/d;
2021: }
2022: }
2023: its--;
2024: }
2026: while (its--) {
2027: /*
2028: forward sweep:
2029: for i=0,...,m-1:
2030: sum[i] = (b[i] - U(i,:)x )/d[i];
2031: x[i] = (1-omega)x[i] + omega*sum[i];
2032: b = b - x[i]*U^T(i,:);
2033:
2034: */
2035: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
2036: for (i=0; i<m; i++)
2037: t[i] = b[i];
2039: for (i=0; i<m; i++){
2040: d = *(aa + ai[i]); /* diag[i] */
2041: v = aa + ai[i] + 1; v1=v;
2042: vj = aj + ai[i] + 1; vj1=vj;
2043: nz = ai[i+1] - ai[i] - 1; nz1=nz;
2044: sum = t[i];
2045: while (nz1--) sum -= (*v1++)*x[*vj1++];
2046: x[i] = (1-omega)*x[i] + omega*sum/d;
2047: while (nz--) t[*vj++] -= x[i]*(*v++);
2048: PetscLogFlops(4*nz-2);
2049: }
2050: }
2051:
2052: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
2053: /*
2054: backward sweep:
2055: b = b - x[i]*U^T(i,:), i=0,...,n-2
2056: for i=m-1,...,0:
2057: sum[i] = (b[i] - U(i,:)x )/d[i];
2058: x[i] = (1-omega)x[i] + omega*sum[i];
2059: */
2060: for (i=0; i<m; i++)
2061: t[i] = b[i];
2062:
2063: for (i=0; i<m-1; i++){ /* update rhs */
2064: v = aa + ai[i] + 1;
2065: vj = aj + ai[i] + 1;
2066: nz = ai[i+1] - ai[i] - 1;
2067: while (nz--) t[*vj++] -= x[i]*(*v++);
2068: PetscLogFlops(2*nz-1);
2069: }
2070: for (i=m-1; i>=0; i--){
2071: d = *(aa + ai[i]);
2072: v = aa + ai[i] + 1;
2073: vj = aj + ai[i] + 1;
2074: nz = ai[i+1] - ai[i] - 1;
2075: sum = t[i];
2076: while (nz--) sum -= x[*vj++]*(*v++);
2077: PetscLogFlops(2*nz-1);
2078: x[i] = (1-omega)*x[i] + omega*sum/d;
2079: }
2080: }
2081: }
2083: PetscFree(t);
2084: VecRestoreArray(xx,&x);
2085: if (bb != xx) {
2086: VecRestoreArray(bb,&b);
2087: }
2089: return(0);
2090: }