Actual source code: sbaij.c
1: /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/vec/vecimpl.h
9: #include src/inline/spops.h
10: #include src/mat/impls/sbaij/seq/sbaij.h
12: #define CHUNKSIZE 10
14: /*
15: Checks for missing diagonals
16: */
17: int MatMissingDiagonal_SeqSBAIJ(Mat A)
18: {
19: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
20: int *diag,*jj = a->j,i,ierr;
23: MatMarkDiagonal_SeqSBAIJ(A);
24: diag = a->diag;
25: for (i=0; i<a->mbs; i++) {
26: if (jj[diag[i]] != i) {
27: SETERRQ1(1,"Matrix is missing diagonal number %d",i);
28: }
29: }
30: return(0);
31: }
33: int MatMarkDiagonal_SeqSBAIJ(Mat A)
34: {
35: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
36: int i,j,*diag,m = a->mbs,ierr;
39: if (a->diag) return(0);
41: PetscMalloc((m+1)*sizeof(int),&diag);
42: PetscLogObjectMemory(A,(m+1)*sizeof(int));
43: for (i=0; i<m; i++) {
44: diag[i] = a->i[i+1];
45: for (j=a->i[i]; j<a->i[i+1]; j++) {
46: if (a->j[j] == i) {
47: diag[i] = j;
48: break;
49: }
50: }
51: }
52: a->diag = diag;
53: return(0);
54: }
57: extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
59: static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
60: {
61: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
64: if (ia) {
65: SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
66: }
67: *nn = a->mbs;
68: return(0);
69: }
71: static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
72: {
74: if (!ia) return(0);
75: SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
76: /* return(0); */
77: }
79: int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
80: {
81: Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
84: *bs = sbaij->bs;
85: return(0);
86: }
88: int MatDestroy_SeqSBAIJ(Mat A)
89: {
90: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
91: int ierr;
94: #if defined(PETSC_USE_LOG)
95: PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
96: #endif
97: PetscFree(a->a);
98: if (!a->singlemalloc) {
99: PetscFree(a->i);
100: PetscFree(a->j);
101: }
102: if (a->row) {
103: ISDestroy(a->row);
104: }
105: if (a->diag) {PetscFree(a->diag);}
106: if (a->ilen) {PetscFree(a->ilen);}
107: if (a->imax) {PetscFree(a->imax);}
108: if (a->solve_work) {PetscFree(a->solve_work);}
109: if (a->mult_work) {PetscFree(a->mult_work);}
110: if (a->icol) {ISDestroy(a->icol);}
111: if (a->saved_values) {PetscFree(a->saved_values);}
113: if (a->inew){
114: PetscFree(a->inew);
115: a->inew = 0;
116: }
117: PetscFree(a);
118: return(0);
119: }
121: int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
122: {
123: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
126: switch (op) {
127: case MAT_ROW_ORIENTED:
128: a->roworiented = PETSC_TRUE;
129: break;
130: case MAT_COLUMN_ORIENTED:
131: a->roworiented = PETSC_FALSE;
132: break;
133: case MAT_COLUMNS_SORTED:
134: a->sorted = PETSC_TRUE;
135: break;
136: case MAT_COLUMNS_UNSORTED:
137: a->sorted = PETSC_FALSE;
138: break;
139: case MAT_KEEP_ZEROED_ROWS:
140: a->keepzeroedrows = PETSC_TRUE;
141: break;
142: case MAT_NO_NEW_NONZERO_LOCATIONS:
143: a->nonew = 1;
144: break;
145: case MAT_NEW_NONZERO_LOCATION_ERR:
146: a->nonew = -1;
147: break;
148: case MAT_NEW_NONZERO_ALLOCATION_ERR:
149: a->nonew = -2;
150: break;
151: case MAT_YES_NEW_NONZERO_LOCATIONS:
152: a->nonew = 0;
153: break;
154: case MAT_ROWS_SORTED:
155: case MAT_ROWS_UNSORTED:
156: case MAT_YES_NEW_DIAGONALS:
157: case MAT_IGNORE_OFF_PROC_ENTRIES:
158: case MAT_USE_HASH_TABLE:
159: case MAT_USE_SINGLE_PRECISION_SOLVES:
160: PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignoredn");
161: break;
162: case MAT_NO_NEW_DIAGONALS:
163: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
164: default:
165: SETERRQ(PETSC_ERR_SUP,"unknown option");
166: }
167: return(0);
168: }
170: int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
171: {
172: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
173: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
174: MatScalar *aa,*aa_i;
175: PetscScalar *v_i;
178: bs = a->bs;
179: ai = a->i;
180: aj = a->j;
181: aa = a->a;
182: bs2 = a->bs2;
183:
184: if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
185:
186: bn = row/bs; /* Block number */
187: bp = row % bs; /* Block position */
188: M = ai[bn+1] - ai[bn];
189: *ncols = bs*M;
190:
191: if (v) {
192: *v = 0;
193: if (*ncols) {
194: PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);
195: for (i=0; i<M; i++) { /* for each block in the block row */
196: v_i = *v + i*bs;
197: aa_i = aa + bs2*(ai[bn] + i);
198: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
199: }
200: }
201: }
203: if (cols) {
204: *cols = 0;
205: if (*ncols) {
206: PetscMalloc((*ncols+row)*sizeof(int),cols);
207: for (i=0; i<M; i++) { /* for each block in the block row */
208: cols_i = *cols + i*bs;
209: itmp = bs*aj[ai[bn] + i];
210: for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
211: }
212: }
213: }
215: /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
216: /* this segment is currently removed, so only entries in the upper triangle are obtained */
217: #ifdef column_search
218: v_i = *v + M*bs;
219: cols_i = *cols + M*bs;
220: for (i=0; i<bn; i++){ /* for each block row */
221: M = ai[i+1] - ai[i];
222: for (j=0; j<M; j++){
223: itmp = aj[ai[i] + j]; /* block column value */
224: if (itmp == bn){
225: aa_i = aa + bs2*(ai[i] + j) + bs*bp;
226: for (k=0; k<bs; k++) {
227: *cols_i++ = i*bs+k;
228: *v_i++ = aa_i[k];
229: }
230: *ncols += bs;
231: break;
232: }
233: }
234: }
235: #endif
237: return(0);
238: }
240: int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
241: {
245: if (idx) {if (*idx) {PetscFree(*idx);}}
246: if (v) {if (*v) {PetscFree(*v);}}
247: return(0);
248: }
250: int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
251: {
253: SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
254: /* return(0); */
255: }
257: static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
258: {
259: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
260: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
261: PetscScalar *aa;
262: FILE *file;
265: ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);
266: ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
267: col_lens[0] = MAT_FILE_COOKIE;
269: col_lens[1] = A->m;
270: col_lens[2] = A->m;
271: col_lens[3] = a->s_nz*bs2;
273: /* store lengths of each row and write (including header) to file */
274: count = 0;
275: for (i=0; i<a->mbs; i++) {
276: for (j=0; j<bs; j++) {
277: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
278: }
279: }
280: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
281: PetscFree(col_lens);
283: /* store column indices (zero start index) */
284: ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);
285: count = 0;
286: for (i=0; i<a->mbs; i++) {
287: for (j=0; j<bs; j++) {
288: for (k=a->i[i]; k<a->i[i+1]; k++) {
289: for (l=0; l<bs; l++) {
290: jj[count++] = bs*a->j[k] + l;
291: }
292: }
293: }
294: }
295: PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);
296: PetscFree(jj);
298: /* store nonzero values */
299: ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);
300: count = 0;
301: for (i=0; i<a->mbs; i++) {
302: for (j=0; j<bs; j++) {
303: for (k=a->i[i]; k<a->i[i+1]; k++) {
304: for (l=0; l<bs; l++) {
305: aa[count++] = a->a[bs2*k + l*bs + j];
306: }
307: }
308: }
309: }
310: PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);
311: PetscFree(aa);
313: PetscViewerBinaryGetInfoPointer(viewer,&file);
314: if (file) {
315: fprintf(file,"-matload_block_size %dn",a->bs);
316: }
317: return(0);
318: }
320: static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
321: {
322: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
323: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
324: char *name;
325: PetscViewerFormat format;
328: PetscObjectGetName((PetscObject)A,&name);
329: PetscViewerGetFormat(viewer,&format);
330: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
331: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
332: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
333: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
334: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
335: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
336: for (i=0; i<a->mbs; i++) {
337: for (j=0; j<bs; j++) {
338: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
339: for (k=a->i[i]; k<a->i[i+1]; k++) {
340: for (l=0; l<bs; l++) {
341: #if defined(PETSC_USE_COMPLEX)
342: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
343: PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
344: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
345: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
346: PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
347: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
348: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
349: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
350: }
351: #else
352: if (a->a[bs2*k + l*bs + j] != 0.0) {
353: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
354: }
355: #endif
356: }
357: }
358: PetscViewerASCIIPrintf(viewer,"n");
359: }
360: }
361: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
362: } else {
363: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
364: for (i=0; i<a->mbs; i++) {
365: for (j=0; j<bs; j++) {
366: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
367: for (k=a->i[i]; k<a->i[i+1]; k++) {
368: for (l=0; l<bs; l++) {
369: #if defined(PETSC_USE_COMPLEX)
370: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
371: PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
372: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
373: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
374: PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
375: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
376: } else {
377: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
378: }
379: #else
380: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
381: #endif
382: }
383: }
384: PetscViewerASCIIPrintf(viewer,"n");
385: }
386: }
387: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
388: }
389: PetscViewerFlush(viewer);
390: return(0);
391: }
393: static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
394: {
395: Mat A = (Mat) Aa;
396: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
397: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
398: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
399: MatScalar *aa;
400: MPI_Comm comm;
401: PetscViewer viewer;
404: /*
405: This is nasty. If this is called from an originally parallel matrix
406: then all processes call this,but only the first has the matrix so the
407: rest should return immediately.
408: */
409: PetscObjectGetComm((PetscObject)draw,&comm);
410: MPI_Comm_rank(comm,&rank);
411: if (rank) return(0);
413: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
415: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
416: PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
418: /* loop over matrix elements drawing boxes */
419: color = PETSC_DRAW_BLUE;
420: for (i=0,row=0; i<mbs; i++,row+=bs) {
421: for (j=a->i[i]; j<a->i[i+1]; j++) {
422: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
423: x_l = a->j[j]*bs; x_r = x_l + 1.0;
424: aa = a->a + j*bs2;
425: for (k=0; k<bs; k++) {
426: for (l=0; l<bs; l++) {
427: if (PetscRealPart(*aa++) >= 0.) continue;
428: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
429: }
430: }
431: }
432: }
433: color = PETSC_DRAW_CYAN;
434: for (i=0,row=0; i<mbs; i++,row+=bs) {
435: for (j=a->i[i]; j<a->i[i+1]; j++) {
436: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
437: x_l = a->j[j]*bs; x_r = x_l + 1.0;
438: aa = a->a + j*bs2;
439: for (k=0; k<bs; k++) {
440: for (l=0; l<bs; l++) {
441: if (PetscRealPart(*aa++) != 0.) continue;
442: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
443: }
444: }
445: }
446: }
448: color = PETSC_DRAW_RED;
449: for (i=0,row=0; i<mbs; i++,row+=bs) {
450: for (j=a->i[i]; j<a->i[i+1]; j++) {
451: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
452: x_l = a->j[j]*bs; x_r = x_l + 1.0;
453: aa = a->a + j*bs2;
454: for (k=0; k<bs; k++) {
455: for (l=0; l<bs; l++) {
456: if (PetscRealPart(*aa++) <= 0.) continue;
457: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
458: }
459: }
460: }
461: }
462: return(0);
463: }
465: static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
466: {
467: int ierr;
468: PetscReal xl,yl,xr,yr,w,h;
469: PetscDraw draw;
470: PetscTruth isnull;
474: PetscViewerDrawGetDraw(viewer,0,&draw);
475: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
477: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
478: xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
479: xr += w; yr += h; xl = -w; yl = -h;
480: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
481: PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);
482: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
483: return(0);
484: }
486: int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
487: {
488: int ierr;
489: PetscTruth issocket,isascii,isbinary,isdraw;
492: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
493: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
494: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
495: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
496: if (issocket) {
497: SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
498: } else if (isascii){
499: MatView_SeqSBAIJ_ASCII(A,viewer);
500: } else if (isbinary) {
501: MatView_SeqSBAIJ_Binary(A,viewer);
502: } else if (isdraw) {
503: MatView_SeqSBAIJ_Draw(A,viewer);
504: } else {
505: SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
506: }
507: return(0);
508: }
511: int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
512: {
513: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
514: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
515: int *ai = a->i,*ailen = a->ilen;
516: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
517: MatScalar *ap,*aa = a->a,zero = 0.0;
520: for (k=0; k<m; k++) { /* loop over rows */
521: row = im[k]; brow = row/bs;
522: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
523: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
524: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
525: nrow = ailen[brow];
526: for (l=0; l<n; l++) { /* loop over columns */
527: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
528: if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
529: col = in[l] ;
530: bcol = col/bs;
531: cidx = col%bs;
532: ridx = row%bs;
533: high = nrow;
534: low = 0; /* assume unsorted */
535: while (high-low > 5) {
536: t = (low+high)/2;
537: if (rp[t] > bcol) high = t;
538: else low = t;
539: }
540: for (i=low; i<high; i++) {
541: if (rp[i] > bcol) break;
542: if (rp[i] == bcol) {
543: *v++ = ap[bs2*i+bs*cidx+ridx];
544: goto finished;
545: }
546: }
547: *v++ = zero;
548: finished:;
549: }
550: }
551: return(0);
552: }
555: int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
556: {
558: SETERRQ(1,"Function not yet written for SBAIJ format");
559: /* return(0); */
560: }
562: int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
563: {
564: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
565: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
566: int m = A->m,*ip,N,*ailen = a->ilen;
567: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
568: MatScalar *aa = a->a,*ap;
571: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
573: if (m) rmax = ailen[0];
574: for (i=1; i<mbs; i++) {
575: /* move each row back by the amount of empty slots (fshift) before it*/
576: fshift += imax[i-1] - ailen[i-1];
577: rmax = PetscMax(rmax,ailen[i]);
578: if (fshift) {
579: ip = aj + ai[i]; ap = aa + bs2*ai[i];
580: N = ailen[i];
581: for (j=0; j<N; j++) {
582: ip[j-fshift] = ip[j];
583: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
584: }
585: }
586: ai[i] = ai[i-1] + ailen[i-1];
587: }
588: if (mbs) {
589: fshift += imax[mbs-1] - ailen[mbs-1];
590: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
591: }
592: /* reset ilen and imax for each row */
593: for (i=0; i<mbs; i++) {
594: ailen[i] = imax[i] = ai[i+1] - ai[i];
595: }
596: a->s_nz = ai[mbs];
598: /* diagonals may have moved, so kill the diagonal pointers */
599: if (fshift && a->diag) {
600: PetscFree(a->diag);
601: PetscLogObjectMemory(A,-(m+1)*sizeof(int));
602: a->diag = 0;
603: }
604: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",
605: m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
606: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %dn",
607: a->reallocs);
608: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %dn",rmax);
609: a->reallocs = 0;
610: A->info.nz_unneeded = (PetscReal)fshift*bs2;
612: return(0);
613: }
615: /*
616: This function returns an array of flags which indicate the locations of contiguous
617: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
618: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
619: Assume: sizes should be long enough to hold all the values.
620: */
621: static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
622: {
623: int i,j,k,row;
624: PetscTruth flg;
627: for (i=0,j=0; i<n; j++) {
628: row = idx[i];
629: if (row%bs!=0) { /* Not the begining of a block */
630: sizes[j] = 1;
631: i++;
632: } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
633: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
634: i++;
635: } else { /* Begining of the block, so check if the complete block exists */
636: flg = PETSC_TRUE;
637: for (k=1; k<bs; k++) {
638: if (row+k != idx[i+k]) { /* break in the block */
639: flg = PETSC_FALSE;
640: break;
641: }
642: }
643: if (flg == PETSC_TRUE) { /* No break in the bs */
644: sizes[j] = bs;
645: i+= bs;
646: } else {
647: sizes[j] = 1;
648: i++;
649: }
650: }
651: }
652: *bs_max = j;
653: return(0);
654: }
655:
656: int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
657: {
658: Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data;
659: int ierr,i,j,k,count,is_n,*is_idx,*rows;
660: int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
661: PetscScalar zero = 0.0;
662: MatScalar *aa;
665: /* Make a copy of the IS and sort it */
666: ISGetSize(is,&is_n);
667: ISGetIndices(is,&is_idx);
669: /* allocate memory for rows,sizes */
670: PetscMalloc((3*is_n+1)*sizeof(int),&rows);
671: sizes = rows + is_n;
673: /* initialize copy IS values to rows, and sort them */
674: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
675: PetscSortInt(is_n,rows);
676: if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
677: for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
678: bs_max = is_n; /* bs_max: num. of contiguous block row in the row */
679: } else {
680: MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
681: }
682: ISRestoreIndices(is,&is_idx);
684: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
685: row = rows[j]; /* row to be zeroed */
686: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
687: count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
688: aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
689: if (sizes[i] == bs && !sbaij->keepzeroedrows) {
690: if (diag) {
691: if (sbaij->ilen[row/bs] > 0) {
692: sbaij->ilen[row/bs] = 1;
693: sbaij->j[sbaij->i[row/bs]] = row/bs;
694: PetscMemzero(aa,count*bs*sizeof(MatScalar));
695: }
696: /* Now insert all the diagoanl values for this bs */
697: for (k=0; k<bs; k++) {
698: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
699: }
700: } else { /* (!diag) */
701: sbaij->ilen[row/bs] = 0;
702: } /* end (!diag) */
703: } else { /* (sizes[i] != bs), broken block */
704: #if defined (PETSC_USE_DEBUG)
705: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
706: #endif
707: for (k=0; k<count; k++) {
708: aa[0] = zero;
709: aa+=bs;
710: }
711: if (diag) {
712: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
713: }
714: }
715: }
717: PetscFree(rows);
718: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
719: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
720: return(0);
721: }
723: /* Only add/insert a(i,j) with i<=j (blocks).
724: Any a(i,j) with i>j input by user is ingored.
725: */
727: int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
728: {
729: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
730: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
731: int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
732: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
733: int ridx,cidx,bs2=a->bs2,ierr;
734: MatScalar *ap,value,*aa=a->a,*bap;
738: for (k=0; k<m; k++) { /* loop over added rows */
739: row = im[k]; /* row number */
740: brow = row/bs; /* block row number */
741: if (row < 0) continue;
742: #if defined(PETSC_USE_BOPT_g)
743: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
744: #endif
745: rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
746: ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
747: rmax = imax[brow]; /* maximum space allocated for this row */
748: nrow = ailen[brow]; /* actual length of this row */
749: low = 0;
751: for (l=0; l<n; l++) { /* loop over added columns */
752: if (in[l] < 0) continue;
753: #if defined(PETSC_USE_BOPT_g)
754: if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
755: #endif
756: col = in[l];
757: bcol = col/bs; /* block col number */
759: if (brow <= bcol){
760: ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
761: if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
762: /* element value a(k,l) */
763: if (roworiented) {
764: value = v[l + k*n];
765: } else {
766: value = v[k + l*m];
767: }
769: /* move pointer bap to a(k,l) quickly and add/insert value */
770: if (!sorted) low = 0; high = nrow;
771: while (high-low > 7) {
772: t = (low+high)/2;
773: if (rp[t] > bcol) high = t;
774: else low = t;
775: }
776: for (i=low; i<high; i++) {
777: /* printf("The loop of i=low.., rp[%d]=%dn",i,rp[i]); */
778: if (rp[i] > bcol) break;
779: if (rp[i] == bcol) {
780: bap = ap + bs2*i + bs*cidx + ridx;
781: if (is == ADD_VALUES) *bap += value;
782: else *bap = value;
783: /* for diag block, add/insert its symmetric element a(cidx,ridx) */
784: if (brow == bcol && ridx < cidx){
785: bap = ap + bs2*i + bs*ridx + cidx;
786: if (is == ADD_VALUES) *bap += value;
787: else *bap = value;
788: }
789: goto noinsert1;
790: }
791: }
792:
793: if (nonew == 1) goto noinsert1;
794: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
795: if (nrow >= rmax) {
796: /* there is no extra room in row, therefore enlarge */
797: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
798: MatScalar *new_a;
800: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
802: /* Malloc new storage space */
803: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
804: ierr = PetscMalloc(len,&new_a);
805: new_j = (int*)(new_a + bs2*new_nz);
806: new_i = new_j + new_nz;
808: /* copy over old data into new slots */
809: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
810: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
811: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
812: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
813: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
814: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
815: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
816: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
817: /* free up old matrix storage */
818: PetscFree(a->a);
819: if (!a->singlemalloc) {
820: PetscFree(a->i);
821: PetscFree(a->j);
822: }
823: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
824: a->singlemalloc = PETSC_TRUE;
826: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
827: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
828: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
829: a->s_maxnz += bs2*CHUNKSIZE;
830: a->reallocs++;
831: a->s_nz++;
832: }
834: N = nrow++ - 1;
835: /* shift up all the later entries in this row */
836: for (ii=N; ii>=i; ii--) {
837: rp[ii+1] = rp[ii];
838: ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
839: }
840: if (N>=i) {
841: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
842: }
843: rp[i] = bcol;
844: ap[bs2*i + bs*cidx + ridx] = value;
845: noinsert1:;
846: low = i;
847: /* } */
848: }
849: } /* end of if .. if.. */
850: } /* end of loop over added columns */
851: ailen[brow] = nrow;
852: } /* end of loop over added rows */
854: return(0);
855: }
857: extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
858: extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
859: extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
860: extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
861: extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
862: extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
863: extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
864: extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
865: extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
866: extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
867: extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
868: extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
869: extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
870: extern int MatZeroEntries_SeqSBAIJ(Mat);
871: extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
873: extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
874: extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
875: extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
876: extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
877: extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
878: extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
879: extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
880: extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
881: extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
882: extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
883: extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
884: extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
885: extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
886: extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
887: extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
889: extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
890: extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
891: extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
892: extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
893: extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
894: extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
895: extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
896: extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
898: extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
899: extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
900: extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
901: extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
902: extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
903: extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
904: extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
905: extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
907: extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
908: extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
909: extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
910: extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
911: extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
912: extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
913: extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
914: extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
916: extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
917: extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
918: extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
919: extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
920: extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
921: extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
922: extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
923: extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
925: #ifdef HAVE_MatICCFactor
926: /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */
927: int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
928: {
929: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
930: Mat outA;
931: int ierr;
932: PetscTruth row_identity,col_identity;
935: /*
936: if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
937: ISIdentity(row,&row_identity);
938: ISIdentity(col,&col_identity);
939: if (!row_identity || !col_identity) {
940: SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
941: }
942: */
944: outA = inA;
945: inA->factor = FACTOR_CHOLESKY;
947: if (!a->diag) {
948: MatMarkDiagonal_SeqSBAIJ(inA);
949: }
950: /*
951: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
952: for ILU(0) factorization with natural ordering
953: */
954: switch (a->bs) {
955: case 1:
956: inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
957: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
958: PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1n");
959: case 2:
960: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
961: inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
962: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering;
963: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2n");
964: break;
965: case 3:
966: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
967: inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
968: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering;
969: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3n");
970: break;
971: case 4:
972: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
973: inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
974: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering;
975: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4n");
976: break;
977: case 5:
978: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
979: inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
980: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering;
981: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5n");
982: break;
983: case 6:
984: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
985: inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
986: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering;
987: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6n");
988: break;
989: case 7:
990: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
991: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering;
992: inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
993: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7n");
994: break;
995: default:
996: a->row = row;
997: a->icol = col;
998: ierr = PetscObjectReference((PetscObject)row);
999: ierr = PetscObjectReference((PetscObject)col);
1001: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1002: ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));
1003: PetscLogObjectParent(inA,a->icol);
1005: if (!a->solve_work) {
1006: PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1007: PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1008: }
1009: }
1011: MatCholeskyFactorNumeric(inA,&outA);
1013: return(0);
1014: }
1015: #endif
1017: int MatPrintHelp_SeqSBAIJ(Mat A)
1018: {
1019: static PetscTruth called = PETSC_FALSE;
1020: MPI_Comm comm = A->comm;
1021: int ierr;
1024: if (called) {return(0);} else called = PETSC_TRUE;
1025: (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):n");
1026: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>n");
1027: return(0);
1028: }
1030: EXTERN_C_BEGIN
1031: int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1032: {
1033: Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1034: int i,nz,n;
1037: nz = baij->s_maxnz;
1038: n = mat->n;
1039: for (i=0; i<nz; i++) {
1040: baij->j[i] = indices[i];
1041: }
1042: baij->s_nz = nz;
1043: for (i=0; i<n; i++) {
1044: baij->ilen[i] = baij->imax[i];
1045: }
1047: return(0);
1048: }
1049: EXTERN_C_END
1051: /*@
1052: MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1053: in the matrix.
1055: Input Parameters:
1056: + mat - the SeqSBAIJ matrix
1057: - indices - the column indices
1059: Level: advanced
1061: Notes:
1062: This can be called if you have precomputed the nonzero structure of the
1063: matrix and want to provide it to the matrix object to improve the performance
1064: of the MatSetValues() operation.
1066: You MUST have set the correct numbers of nonzeros per row in the call to
1067: MatCreateSeqSBAIJ().
1069: MUST be called before any calls to MatSetValues()
1071: .seealso: MatCreateSeqSBAIJ
1072: @*/
1073: int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1074: {
1075: int ierr,(*f)(Mat,int *);
1079: PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);
1080: if (f) {
1081: (*f)(mat,indices);
1082: } else {
1083: SETERRQ(1,"Wrong type of matrix to set column indices");
1084: }
1085: return(0);
1086: }
1088: int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1089: {
1090: int ierr;
1093: MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1094: return(0);
1095: }
1097: /* -------------------------------------------------------------------*/
1098: static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1099: MatGetRow_SeqSBAIJ,
1100: MatRestoreRow_SeqSBAIJ,
1101: MatMult_SeqSBAIJ_N,
1102: MatMultAdd_SeqSBAIJ_N,
1103: MatMultTranspose_SeqSBAIJ,
1104: MatMultTransposeAdd_SeqSBAIJ,
1105: MatSolve_SeqSBAIJ_N,
1106: 0,
1107: 0,
1108: 0,
1109: 0,
1110: MatCholeskyFactor_SeqSBAIJ,
1111: MatRelax_SeqSBAIJ,
1112: MatTranspose_SeqSBAIJ,
1113: MatGetInfo_SeqSBAIJ,
1114: MatEqual_SeqSBAIJ,
1115: MatGetDiagonal_SeqSBAIJ,
1116: MatDiagonalScale_SeqSBAIJ,
1117: MatNorm_SeqSBAIJ,
1118: 0,
1119: MatAssemblyEnd_SeqSBAIJ,
1120: 0,
1121: MatSetOption_SeqSBAIJ,
1122: MatZeroEntries_SeqSBAIJ,
1123: MatZeroRows_SeqSBAIJ,
1124: 0,
1125: 0,
1126: MatCholeskyFactorSymbolic_SeqSBAIJ,
1127: MatCholeskyFactorNumeric_SeqSBAIJ_N,
1128: MatSetUpPreallocation_SeqSBAIJ,
1129: 0,
1130: MatICCFactorSymbolic_SeqSBAIJ,
1131: 0,
1132: 0,
1133: MatDuplicate_SeqSBAIJ,
1134: 0,
1135: 0,
1136: 0,
1137: 0,
1138: 0,
1139: MatGetSubMatrices_SeqSBAIJ,
1140: MatIncreaseOverlap_SeqSBAIJ,
1141: MatGetValues_SeqSBAIJ,
1142: 0,
1143: MatPrintHelp_SeqSBAIJ,
1144: MatScale_SeqSBAIJ,
1145: 0,
1146: 0,
1147: 0,
1148: MatGetBlockSize_SeqSBAIJ,
1149: MatGetRowIJ_SeqSBAIJ,
1150: MatRestoreRowIJ_SeqSBAIJ,
1151: 0,
1152: 0,
1153: 0,
1154: 0,
1155: 0,
1156: 0,
1157: MatSetValuesBlocked_SeqSBAIJ,
1158: MatGetSubMatrix_SeqSBAIJ,
1159: 0,
1160: 0,
1161: MatGetPetscMaps_Petsc,
1162: 0,
1163: 0,
1164: 0,
1165: 0,
1166: 0,
1167: 0,
1168: MatGetRowMax_SeqSBAIJ};
1170: EXTERN_C_BEGIN
1171: int MatStoreValues_SeqSBAIJ(Mat mat)
1172: {
1173: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1174: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
1175: int ierr;
1178: if (aij->nonew != 1) {
1179: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1180: }
1182: /* allocate space for values if not already there */
1183: if (!aij->saved_values) {
1184: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1185: }
1187: /* copy values over */
1188: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1189: return(0);
1190: }
1191: EXTERN_C_END
1193: EXTERN_C_BEGIN
1194: int MatRetrieveValues_SeqSBAIJ(Mat mat)
1195: {
1196: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1197: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1200: if (aij->nonew != 1) {
1201: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1202: }
1203: if (!aij->saved_values) {
1204: SETERRQ(1,"Must call MatStoreValues(A);first");
1205: }
1207: /* copy values over */
1208: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1209: return(0);
1210: }
1211: EXTERN_C_END
1213: EXTERN_C_BEGIN
1214: int MatCreate_SeqSBAIJ(Mat B)
1215: {
1216: Mat_SeqSBAIJ *b;
1217: int ierr,size;
1220: MPI_Comm_size(B->comm,&size);
1221: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1222: B->m = B->M = PetscMax(B->m,B->M);
1223: B->n = B->N = PetscMax(B->n,B->N);
1225: ierr = PetscNew(Mat_SeqSBAIJ,&b);
1226: B->data = (void*)b;
1227: ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));
1228: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1229: B->ops->destroy = MatDestroy_SeqSBAIJ;
1230: B->ops->view = MatView_SeqSBAIJ;
1231: B->factor = 0;
1232: B->lupivotthreshold = 1.0;
1233: B->mapping = 0;
1234: b->row = 0;
1235: b->icol = 0;
1236: b->reallocs = 0;
1237: b->saved_values = 0;
1238:
1239: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1240: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1242: b->sorted = PETSC_FALSE;
1243: b->roworiented = PETSC_TRUE;
1244: b->nonew = 0;
1245: b->diag = 0;
1246: b->solve_work = 0;
1247: b->mult_work = 0;
1248: b->spptr = 0;
1249: b->keepzeroedrows = PETSC_FALSE;
1250:
1251: b->inew = 0;
1252: b->jnew = 0;
1253: b->anew = 0;
1254: b->a2anew = 0;
1255: b->permute = PETSC_FALSE;
1257: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1258: "MatStoreValues_SeqSBAIJ",
1259: (void*)MatStoreValues_SeqSBAIJ);
1260: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1261: "MatRetrieveValues_SeqSBAIJ",
1262: (void*)MatRetrieveValues_SeqSBAIJ);
1263: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1264: "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1265: (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);
1266: return(0);
1267: }
1268: EXTERN_C_END
1270: /*@C
1271: MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1272: compressed row) format. For good matrix assembly performance the
1273: user should preallocate the matrix storage by setting the parameter nz
1274: (or the array nnz). By setting these parameters accurately, performance
1275: during matrix assembly can be increased by more than a factor of 50.
1277: Collective on Mat
1279: Input Parameters:
1280: + A - the symmetric matrix
1281: . bs - size of block
1282: . nz - number of block nonzeros per block row (same for all rows)
1283: - nnz - array containing the number of block nonzeros in the various block rows
1284: (possibly different for each block row) or PETSC_NULL
1286: Options Database Keys:
1287: . -mat_no_unroll - uses code that does not unroll the loops in the
1288: block calculations (much slower)
1289: . -mat_block_size - size of the blocks to use
1291: Level: intermediate
1293: Notes:
1294: The block AIJ format is compatible with standard Fortran 77
1295: storage. That is, the stored row and column indices can begin at
1296: either one (as in Fortran) or zero. See the users' manual for details.
1298: Specify the preallocated storage with either nz or nnz (not both).
1299: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1300: allocation. For additional details, see the users manual chapter on
1301: matrices.
1303: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1304: @*/
1305: int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1306: {
1307: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1308: int i,len,ierr,mbs,bs2;
1309: PetscTruth flg;
1310: int s_nz;
1313: B->preallocated = PETSC_TRUE;
1314: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
1315: mbs = B->m/bs;
1316: bs2 = bs*bs;
1318: if (mbs*bs != B->m) {
1319: SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1320: }
1322: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1323: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1324: if (nnz) {
1325: for (i=0; i<mbs; i++) {
1326: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1327: if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1328: }
1329: }
1331: ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1332: if (!flg) {
1333: switch (bs) {
1334: case 1:
1335: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1336: B->ops->solve = MatSolve_SeqSBAIJ_1;
1337: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1;
1338: B->ops->mult = MatMult_SeqSBAIJ_1;
1339: B->ops->multadd = MatMultAdd_SeqSBAIJ_1;
1340: break;
1341: case 2:
1342: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1343: B->ops->solve = MatSolve_SeqSBAIJ_2;
1344: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2;
1345: B->ops->mult = MatMult_SeqSBAIJ_2;
1346: B->ops->multadd = MatMultAdd_SeqSBAIJ_2;
1347: break;
1348: case 3:
1349: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1350: B->ops->solve = MatSolve_SeqSBAIJ_3;
1351: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3;
1352: B->ops->mult = MatMult_SeqSBAIJ_3;
1353: B->ops->multadd = MatMultAdd_SeqSBAIJ_3;
1354: break;
1355: case 4:
1356: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1357: B->ops->solve = MatSolve_SeqSBAIJ_4;
1358: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4;
1359: B->ops->mult = MatMult_SeqSBAIJ_4;
1360: B->ops->multadd = MatMultAdd_SeqSBAIJ_4;
1361: break;
1362: case 5:
1363: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1364: B->ops->solve = MatSolve_SeqSBAIJ_5;
1365: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5;
1366: B->ops->mult = MatMult_SeqSBAIJ_5;
1367: B->ops->multadd = MatMultAdd_SeqSBAIJ_5;
1368: break;
1369: case 6:
1370: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1371: B->ops->solve = MatSolve_SeqSBAIJ_6;
1372: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6;
1373: B->ops->mult = MatMult_SeqSBAIJ_6;
1374: B->ops->multadd = MatMultAdd_SeqSBAIJ_6;
1375: break;
1376: case 7:
1377: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1378: B->ops->solve = MatSolve_SeqSBAIJ_7;
1379: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7;
1380: B->ops->mult = MatMult_SeqSBAIJ_7;
1381: B->ops->multadd = MatMultAdd_SeqSBAIJ_7;
1382: break;
1383: }
1384: }
1386: b->mbs = mbs;
1387: b->nbs = mbs;
1388: ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1389: if (!nnz) {
1390: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1391: else if (nz <= 0) nz = 1;
1392: for (i=0; i<mbs; i++) {
1393: b->imax[i] = nz;
1394: }
1395: nz = nz*mbs; /* total nz */
1396: } else {
1397: nz = 0;
1398: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1399: }
1400: /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1401: s_nz = nz;
1403: /* allocate the matrix space */
1404: len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1405: PetscMalloc(len,&b->a);
1406: PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));
1407: b->j = (int*)(b->a + s_nz*bs2);
1408: PetscMemzero(b->j,s_nz*sizeof(int));
1409: b->i = b->j + s_nz;
1410: b->singlemalloc = PETSC_TRUE;
1412: /* pointer to beginning of each row */
1413: b->i[0] = 0;
1414: for (i=1; i<mbs+1; i++) {
1415: b->i[i] = b->i[i-1] + b->imax[i-1];
1416: }
1418: /* b->ilen will count nonzeros in each block row so far. */
1419: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1420: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1421: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1422:
1423: b->bs = bs;
1424: b->bs2 = bs2;
1425: b->s_nz = 0;
1426: b->s_maxnz = s_nz*bs2;
1427:
1428: b->inew = 0;
1429: b->jnew = 0;
1430: b->anew = 0;
1431: b->a2anew = 0;
1432: b->permute = PETSC_FALSE;
1433: return(0);
1434: }
1437: /*@C
1438: MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1439: compressed row) format. For good matrix assembly performance the
1440: user should preallocate the matrix storage by setting the parameter nz
1441: (or the array nnz). By setting these parameters accurately, performance
1442: during matrix assembly can be increased by more than a factor of 50.
1444: Collective on MPI_Comm
1446: Input Parameters:
1447: + comm - MPI communicator, set to PETSC_COMM_SELF
1448: . bs - size of block
1449: . m - number of rows, or number of columns
1450: . nz - number of block nonzeros per block row (same for all rows)
1451: - nnz - array containing the number of block nonzeros in the various block rows
1452: (possibly different for each block row) or PETSC_NULL
1454: Output Parameter:
1455: . A - the symmetric matrix
1457: Options Database Keys:
1458: . -mat_no_unroll - uses code that does not unroll the loops in the
1459: block calculations (much slower)
1460: . -mat_block_size - size of the blocks to use
1462: Level: intermediate
1464: Notes:
1465: The block AIJ format is compatible with standard Fortran 77
1466: storage. That is, the stored row and column indices can begin at
1467: either one (as in Fortran) or zero. See the users' manual for details.
1469: Specify the preallocated storage with either nz or nnz (not both).
1470: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1471: allocation. For additional details, see the users manual chapter on
1472: matrices.
1474: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1475: @*/
1476: int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1477: {
1479:
1481: MatCreate(comm,m,n,m,n,A);
1482: MatSetType(*A,MATSEQSBAIJ);
1483: MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);
1484: return(0);
1485: }
1487: int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1488: {
1489: Mat C;
1490: Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1491: int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1494: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1496: *B = 0;
1497: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1498: MatSetType(C,MATSEQSBAIJ);
1499: c = (Mat_SeqSBAIJ*)C->data;
1501: ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1502: C->preallocated = PETSC_TRUE;
1503: C->factor = A->factor;
1504: c->row = 0;
1505: c->icol = 0;
1506: c->saved_values = 0;
1507: c->keepzeroedrows = a->keepzeroedrows;
1508: C->assembled = PETSC_TRUE;
1510: c->bs = a->bs;
1511: c->bs2 = a->bs2;
1512: c->mbs = a->mbs;
1513: c->nbs = a->nbs;
1515: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1516: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1517: for (i=0; i<mbs; i++) {
1518: c->imax[i] = a->imax[i];
1519: c->ilen[i] = a->ilen[i];
1520: }
1522: /* allocate the matrix space */
1523: c->singlemalloc = PETSC_TRUE;
1524: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1525: PetscMalloc(len,&c->a);
1526: c->j = (int*)(c->a + nz*bs2);
1527: c->i = c->j + nz;
1528: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1529: if (mbs > 0) {
1530: PetscMemcpy(c->j,a->j,nz*sizeof(int));
1531: if (cpvalues == MAT_COPY_VALUES) {
1532: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1533: } else {
1534: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1535: }
1536: }
1538: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1539: c->sorted = a->sorted;
1540: c->roworiented = a->roworiented;
1541: c->nonew = a->nonew;
1543: if (a->diag) {
1544: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1545: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1546: for (i=0; i<mbs; i++) {
1547: c->diag[i] = a->diag[i];
1548: }
1549: } else c->diag = 0;
1550: c->s_nz = a->s_nz;
1551: c->s_maxnz = a->s_maxnz;
1552: c->solve_work = 0;
1553: c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
1554: c->mult_work = 0;
1555: *B = C;
1556: PetscFListDuplicate(A->qlist,&C->qlist);
1557: return(0);
1558: }
1560: EXTERN_C_BEGIN
1561: int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1562: {
1563: Mat_SeqSBAIJ *a;
1564: Mat B;
1565: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1566: int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount;
1567: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
1568: int *masked,nmask,tmp,bs2,ishift;
1569: PetscScalar *aa;
1570: MPI_Comm comm = ((PetscObject)viewer)->comm;
1573: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1574: bs2 = bs*bs;
1576: MPI_Comm_size(comm,&size);
1577: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1578: PetscViewerBinaryGetDescriptor(viewer,&fd);
1579: PetscBinaryRead(fd,header,4,PETSC_INT);
1580: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1581: M = header[1]; N = header[2]; nz = header[3];
1583: if (header[3] < 0) {
1584: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1585: }
1587: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1589: /*
1590: This code adds extra rows to make sure the number of rows is
1591: divisible by the blocksize
1592: */
1593: mbs = M/bs;
1594: extra_rows = bs - M + bs*(mbs);
1595: if (extra_rows == bs) extra_rows = 0;
1596: else mbs++;
1597: if (extra_rows) {
1598: PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksizen");
1599: }
1601: /* read in row lengths */
1602: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1603: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1604: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1606: /* read in column indices */
1607: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1608: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1609: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1611: /* loop over row lengths determining block row lengths */
1612: ierr = PetscMalloc(mbs*sizeof(int),&browlengths);
1613: ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);
1614: ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));
1615: ierr = PetscMalloc(2*mbs*sizeof(int),&mask);
1616: ierr = PetscMemzero(mask,mbs*sizeof(int));
1617: masked = mask + mbs;
1618: rowcount = 0; nzcount = 0;
1619: for (i=0; i<mbs; i++) {
1620: nmask = 0;
1621: for (j=0; j<bs; j++) {
1622: kmax = rowlengths[rowcount];
1623: for (k=0; k<kmax; k++) {
1624: tmp = jj[nzcount++]/bs; /* block col. index */
1625: if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1626: }
1627: rowcount++;
1628: }
1629: s_browlengths[i] += nmask;
1630: browlengths[i] = 2*s_browlengths[i];
1632: /* zero out the mask elements we set */
1633: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1634: }
1636: /* create our matrix */
1637: MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1638: B = *A;
1639: a = (Mat_SeqSBAIJ*)B->data;
1641: /* set matrix "i" values */
1642: a->i[0] = 0;
1643: for (i=1; i<= mbs; i++) {
1644: a->i[i] = a->i[i-1] + s_browlengths[i-1];
1645: a->ilen[i-1] = s_browlengths[i-1];
1646: }
1647: a->s_nz = 0;
1648: for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i];
1650: /* read in nonzero values */
1651: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1652: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1653: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1655: /* set "a" and "j" values into matrix */
1656: nzcount = 0; jcount = 0;
1657: for (i=0; i<mbs; i++) {
1658: nzcountb = nzcount;
1659: nmask = 0;
1660: for (j=0; j<bs; j++) {
1661: kmax = rowlengths[i*bs+j];
1662: for (k=0; k<kmax; k++) {
1663: tmp = jj[nzcount++]/bs; /* block col. index */
1664: if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1665: }
1666: }
1667: /* sort the masked values */
1668: PetscSortInt(nmask,masked);
1670: /* set "j" values into matrix */
1671: maskcount = 1;
1672: for (j=0; j<nmask; j++) {
1673: a->j[jcount++] = masked[j];
1674: mask[masked[j]] = maskcount++;
1675: }
1677: /* set "a" values into matrix */
1678: ishift = bs2*a->i[i];
1679: for (j=0; j<bs; j++) {
1680: kmax = rowlengths[i*bs+j];
1681: for (k=0; k<kmax; k++) {
1682: tmp = jj[nzcountb]/bs ; /* block col. index */
1683: if (tmp >= i){
1684: block = mask[tmp] - 1;
1685: point = jj[nzcountb] - bs*tmp;
1686: idx = ishift + bs2*block + j + bs*point;
1687: a->a[idx] = aa[nzcountb];
1688: }
1689: nzcountb++;
1690: }
1691: }
1692: /* zero out the mask elements we set */
1693: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1694: }
1695: if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1697: PetscFree(rowlengths);
1698: PetscFree(browlengths);
1699: PetscFree(s_browlengths);
1700: PetscFree(aa);
1701: PetscFree(jj);
1702: PetscFree(mask);
1704: B->assembled = PETSC_TRUE;
1705: MatView_Private(B);
1706: return(0);
1707: }
1708: EXTERN_C_END
1710: int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1711: {
1712: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1713: MatScalar *aa=a->a,*v,*v1;
1714: PetscScalar *x,*b,*t,sum,d;
1715: int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1716: int nz,nz1,*vj,*vj1,i;
1719: its = its*lits;
1720: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1722: if (bs > 1)
1723: SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1725: VecGetArray(xx,&x);
1726: if (xx != bb) {
1727: VecGetArray(bb,&b);
1728: } else {
1729: b = x;
1730: }
1732: PetscMalloc(m*sizeof(PetscScalar),&t);
1733:
1734: if (flag & SOR_ZERO_INITIAL_GUESS) {
1735: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1736: for (i=0; i<m; i++)
1737: t[i] = b[i];
1739: for (i=0; i<m; i++){
1740: d = *(aa + ai[i]); /* diag[i] */
1741: v = aa + ai[i] + 1;
1742: vj = aj + ai[i] + 1;
1743: nz = ai[i+1] - ai[i] - 1;
1744: x[i] = omega*t[i]/d;
1745: while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1746: PetscLogFlops(2*nz-1);
1747: }
1748: }
1750: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1751: for (i=0; i<m; i++)
1752: t[i] = b[i];
1753:
1754: for (i=0; i<m-1; i++){ /* update rhs */
1755: v = aa + ai[i] + 1;
1756: vj = aj + ai[i] + 1;
1757: nz = ai[i+1] - ai[i] - 1;
1758: while (nz--) t[*vj++] -= x[i]*(*v++);
1759: PetscLogFlops(2*nz-1);
1760: }
1761: for (i=m-1; i>=0; i--){
1762: d = *(aa + ai[i]);
1763: v = aa + ai[i] + 1;
1764: vj = aj + ai[i] + 1;
1765: nz = ai[i+1] - ai[i] - 1;
1766: sum = t[i];
1767: while (nz--) sum -= x[*vj++]*(*v++);
1768: PetscLogFlops(2*nz-1);
1769: x[i] = (1-omega)*x[i] + omega*sum/d;
1770: }
1771: }
1772: its--;
1773: }
1775: while (its--) {
1776: /*
1777: forward sweep:
1778: for i=0,...,m-1:
1779: sum[i] = (b[i] - U(i,:)x )/d[i];
1780: x[i] = (1-omega)x[i] + omega*sum[i];
1781: b = b - x[i]*U^T(i,:);
1782:
1783: */
1784: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1785: for (i=0; i<m; i++)
1786: t[i] = b[i];
1788: for (i=0; i<m; i++){
1789: d = *(aa + ai[i]); /* diag[i] */
1790: v = aa + ai[i] + 1; v1=v;
1791: vj = aj + ai[i] + 1; vj1=vj;
1792: nz = ai[i+1] - ai[i] - 1; nz1=nz;
1793: sum = t[i];
1794: while (nz1--) sum -= (*v1++)*x[*vj1++];
1795: x[i] = (1-omega)*x[i] + omega*sum/d;
1796: while (nz--) t[*vj++] -= x[i]*(*v++);
1797: PetscLogFlops(4*nz-2);
1798: }
1799: }
1800:
1801: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1802: /*
1803: backward sweep:
1804: b = b - x[i]*U^T(i,:), i=0,...,n-2
1805: for i=m-1,...,0:
1806: sum[i] = (b[i] - U(i,:)x )/d[i];
1807: x[i] = (1-omega)x[i] + omega*sum[i];
1808: */
1809: for (i=0; i<m; i++)
1810: t[i] = b[i];
1811:
1812: for (i=0; i<m-1; i++){ /* update rhs */
1813: v = aa + ai[i] + 1;
1814: vj = aj + ai[i] + 1;
1815: nz = ai[i+1] - ai[i] - 1;
1816: while (nz--) t[*vj++] -= x[i]*(*v++);
1817: PetscLogFlops(2*nz-1);
1818: }
1819: for (i=m-1; i>=0; i--){
1820: d = *(aa + ai[i]);
1821: v = aa + ai[i] + 1;
1822: vj = aj + ai[i] + 1;
1823: nz = ai[i+1] - ai[i] - 1;
1824: sum = t[i];
1825: while (nz--) sum -= x[*vj++]*(*v++);
1826: PetscLogFlops(2*nz-1);
1827: x[i] = (1-omega)*x[i] + omega*sum/d;
1828: }
1829: }
1830: }
1832: PetscFree(t);
1833: VecRestoreArray(xx,&x);
1834: if (bb != xx) {
1835: VecRestoreArray(bb,&b);
1836: }
1838: return(0);
1839: }