Actual source code: baij.c
1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/vec/vecimpl.h
9: #include src/inline/spops.h
10: #include petscsys.h
12: /* UGLY, ugly, ugly
13: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16: converts the entries into single precision and then calls ..._MatScalar() to put them
17: into the single precision data structures.
18: */
19: #if defined(PETSC_USE_MAT_SINGLE)
20: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21: #else
22: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23: #endif
25: #define CHUNKSIZE 10
27: /*
28: Checks for missing diagonals
29: */
30: int MatMissingDiagonal_SeqBAIJ(Mat A)
31: {
32: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
33: int *diag,*jj = a->j,i,ierr;
36: MatMarkDiagonal_SeqBAIJ(A);
37: diag = a->diag;
38: for (i=0; i<a->mbs; i++) {
39: if (jj[diag[i]] != i) {
40: SETERRQ1(1,"Matrix is missing diagonal number %d",i);
41: }
42: }
43: return(0);
44: }
46: int MatMarkDiagonal_SeqBAIJ(Mat A)
47: {
48: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
49: int i,j,*diag,m = a->mbs,ierr;
52: if (a->diag) return(0);
54: PetscMalloc((m+1)*sizeof(int),&diag);
55: PetscLogObjectMemory(A,(m+1)*sizeof(int));
56: for (i=0; i<m; i++) {
57: diag[i] = a->i[i+1];
58: for (j=a->i[i]; j<a->i[i+1]; j++) {
59: if (a->j[j] == i) {
60: diag[i] = j;
61: break;
62: }
63: }
64: }
65: a->diag = diag;
66: return(0);
67: }
70: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
72: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
73: {
74: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
75: int ierr,n = a->mbs,i;
78: *nn = n;
79: if (!ia) return(0);
80: if (symmetric) {
81: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
82: } else if (oshift == 1) {
83: /* temporarily add 1 to i and j indices */
84: int nz = a->i[n];
85: for (i=0; i<nz; i++) a->j[i]++;
86: for (i=0; i<n+1; i++) a->i[i]++;
87: *ia = a->i; *ja = a->j;
88: } else {
89: *ia = a->i; *ja = a->j;
90: }
92: return(0);
93: }
95: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
96: {
97: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
98: int i,n = a->mbs,ierr;
101: if (!ia) return(0);
102: if (symmetric) {
103: PetscFree(*ia);
104: PetscFree(*ja);
105: } else if (oshift == 1) {
106: int nz = a->i[n]-1;
107: for (i=0; i<nz; i++) a->j[i]--;
108: for (i=0; i<n+1; i++) a->i[i]--;
109: }
110: return(0);
111: }
113: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
114: {
115: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
118: *bs = baij->bs;
119: return(0);
120: }
123: int MatDestroy_SeqBAIJ(Mat A)
124: {
125: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
126: int ierr;
129: #if defined(PETSC_USE_LOG)
130: PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
131: #endif
132: PetscFree(a->a);
133: if (!a->singlemalloc) {
134: PetscFree(a->i);
135: PetscFree(a->j);
136: }
137: if (a->row) {
138: ISDestroy(a->row);
139: }
140: if (a->col) {
141: ISDestroy(a->col);
142: }
143: if (a->diag) {PetscFree(a->diag);}
144: if (a->ilen) {PetscFree(a->ilen);}
145: if (a->imax) {PetscFree(a->imax);}
146: if (a->solve_work) {PetscFree(a->solve_work);}
147: if (a->mult_work) {PetscFree(a->mult_work);}
148: if (a->icol) {ISDestroy(a->icol);}
149: if (a->saved_values) {PetscFree(a->saved_values);}
150: #if defined(PETSC_USE_MAT_SINGLE)
151: if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
152: #endif
153: PetscFree(a);
154: return(0);
155: }
157: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
158: {
159: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
162: switch (op) {
163: case MAT_ROW_ORIENTED:
164: a->roworiented = PETSC_TRUE;
165: break;
166: case MAT_COLUMN_ORIENTED:
167: a->roworiented = PETSC_FALSE;
168: break;
169: case MAT_COLUMNS_SORTED:
170: a->sorted = PETSC_TRUE;
171: break;
172: case MAT_COLUMNS_UNSORTED:
173: a->sorted = PETSC_FALSE;
174: break;
175: case MAT_KEEP_ZEROED_ROWS:
176: a->keepzeroedrows = PETSC_TRUE;
177: break;
178: case MAT_NO_NEW_NONZERO_LOCATIONS:
179: a->nonew = 1;
180: break;
181: case MAT_NEW_NONZERO_LOCATION_ERR:
182: a->nonew = -1;
183: break;
184: case MAT_NEW_NONZERO_ALLOCATION_ERR:
185: a->nonew = -2;
186: break;
187: case MAT_YES_NEW_NONZERO_LOCATIONS:
188: a->nonew = 0;
189: break;
190: case MAT_ROWS_SORTED:
191: case MAT_ROWS_UNSORTED:
192: case MAT_YES_NEW_DIAGONALS:
193: case MAT_IGNORE_OFF_PROC_ENTRIES:
194: case MAT_USE_HASH_TABLE:
195: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
196: break;
197: case MAT_NO_NEW_DIAGONALS:
198: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
199: case MAT_USE_SINGLE_PRECISION_SOLVES:
200: if (a->bs==4) {
201: PetscTruth sse_enabled_local,sse_enabled_global;
202: int ierr;
204: sse_enabled_local = PETSC_FALSE;
205: sse_enabled_global = PETSC_FALSE;
207: PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);
208: #if defined(PETSC_HAVE_SSE)
209: if (sse_enabled_local) {
210: a->single_precision_solves = PETSC_TRUE;
211: A->ops->solve = MatSolve_SeqBAIJ_Update;
212: A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
213: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES setn");
214: break;
215: } else {
216: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
217: }
218: #else
219: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
220: #endif
221: }
222: break;
223: default:
224: SETERRQ(PETSC_ERR_SUP,"unknown option");
225: }
226: return(0);
227: }
229: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
230: {
231: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
232: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
233: MatScalar *aa,*aa_i;
234: PetscScalar *v_i;
237: bs = a->bs;
238: ai = a->i;
239: aj = a->j;
240: aa = a->a;
241: bs2 = a->bs2;
242:
243: if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
244:
245: bn = row/bs; /* Block number */
246: bp = row % bs; /* Block Position */
247: M = ai[bn+1] - ai[bn];
248: *nz = bs*M;
249:
250: if (v) {
251: *v = 0;
252: if (*nz) {
253: PetscMalloc((*nz)*sizeof(PetscScalar),v);
254: for (i=0; i<M; i++) { /* for each block in the block row */
255: v_i = *v + i*bs;
256: aa_i = aa + bs2*(ai[bn] + i);
257: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
258: }
259: }
260: }
262: if (idx) {
263: *idx = 0;
264: if (*nz) {
265: PetscMalloc((*nz)*sizeof(int),idx);
266: for (i=0; i<M; i++) { /* for each block in the block row */
267: idx_i = *idx + i*bs;
268: itmp = bs*aj[ai[bn] + i];
269: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
270: }
271: }
272: }
273: return(0);
274: }
276: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
277: {
281: if (idx) {if (*idx) {PetscFree(*idx);}}
282: if (v) {if (*v) {PetscFree(*v);}}
283: return(0);
284: }
286: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
287: {
288: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
289: Mat C;
290: int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
291: int *rows,*cols,bs2=a->bs2;
292: PetscScalar *array;
295: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
296: PetscMalloc((1+nbs)*sizeof(int),&col);
297: PetscMemzero(col,(1+nbs)*sizeof(int));
299: #if defined(PETSC_USE_MAT_SINGLE)
300: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
301: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
302: #else
303: array = a->a;
304: #endif
306: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
307: MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
308: PetscFree(col);
309: PetscMalloc(2*bs*sizeof(int),&rows);
310: cols = rows + bs;
311: for (i=0; i<mbs; i++) {
312: cols[0] = i*bs;
313: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
314: len = ai[i+1] - ai[i];
315: for (j=0; j<len; j++) {
316: rows[0] = (*aj++)*bs;
317: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
318: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
319: array += bs2;
320: }
321: }
322: PetscFree(rows);
323: #if defined(PETSC_USE_MAT_SINGLE)
324: PetscFree(array);
325: #endif
326:
327: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
328: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
329:
330: if (B) {
331: *B = C;
332: } else {
333: MatHeaderCopy(A,C);
334: }
335: return(0);
336: }
338: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
339: {
340: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
341: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
342: PetscScalar *aa;
343: FILE *file;
346: ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);
347: ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
348: col_lens[0] = MAT_FILE_COOKIE;
350: col_lens[1] = A->m;
351: col_lens[2] = A->n;
352: col_lens[3] = a->nz*bs2;
354: /* store lengths of each row and write (including header) to file */
355: count = 0;
356: for (i=0; i<a->mbs; i++) {
357: for (j=0; j<bs; j++) {
358: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
359: }
360: }
361: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
362: PetscFree(col_lens);
364: /* store column indices (zero start index) */
365: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
366: count = 0;
367: for (i=0; i<a->mbs; i++) {
368: for (j=0; j<bs; j++) {
369: for (k=a->i[i]; k<a->i[i+1]; k++) {
370: for (l=0; l<bs; l++) {
371: jj[count++] = bs*a->j[k] + l;
372: }
373: }
374: }
375: }
376: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
377: PetscFree(jj);
379: /* store nonzero values */
380: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
381: count = 0;
382: for (i=0; i<a->mbs; i++) {
383: for (j=0; j<bs; j++) {
384: for (k=a->i[i]; k<a->i[i+1]; k++) {
385: for (l=0; l<bs; l++) {
386: aa[count++] = a->a[bs2*k + l*bs + j];
387: }
388: }
389: }
390: }
391: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
392: PetscFree(aa);
394: PetscViewerBinaryGetInfoPointer(viewer,&file);
395: if (file) {
396: fprintf(file,"-matload_block_size %dn",a->bs);
397: }
398: return(0);
399: }
401: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
402: {
403: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
404: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
405: PetscViewerFormat format;
408: PetscViewerGetFormat(viewer,&format);
409: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
410: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
411: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
412: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
413: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
414: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
415: for (i=0; i<a->mbs; i++) {
416: for (j=0; j<bs; j++) {
417: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
418: for (k=a->i[i]; k<a->i[i+1]; k++) {
419: for (l=0; l<bs; l++) {
420: #if defined(PETSC_USE_COMPLEX)
421: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
422: PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
423: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
424: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
425: PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
426: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
427: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
428: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
429: }
430: #else
431: if (a->a[bs2*k + l*bs + j] != 0.0) {
432: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
433: }
434: #endif
435: }
436: }
437: PetscViewerASCIIPrintf(viewer,"n");
438: }
439: }
440: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
441: } else {
442: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
443: for (i=0; i<a->mbs; i++) {
444: for (j=0; j<bs; j++) {
445: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
446: for (k=a->i[i]; k<a->i[i+1]; k++) {
447: for (l=0; l<bs; l++) {
448: #if defined(PETSC_USE_COMPLEX)
449: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
450: PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
451: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
452: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
453: PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
454: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
455: } else {
456: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
457: }
458: #else
459: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
460: #endif
461: }
462: }
463: PetscViewerASCIIPrintf(viewer,"n");
464: }
465: }
466: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
467: }
468: PetscViewerFlush(viewer);
469: return(0);
470: }
472: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
473: {
474: Mat A = (Mat) Aa;
475: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
476: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
477: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
478: MatScalar *aa;
479: PetscViewer viewer;
483: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
484: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
486: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
488: /* loop over matrix elements drawing boxes */
489: color = PETSC_DRAW_BLUE;
490: for (i=0,row=0; i<mbs; i++,row+=bs) {
491: for (j=a->i[i]; j<a->i[i+1]; j++) {
492: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
493: x_l = a->j[j]*bs; x_r = x_l + 1.0;
494: aa = a->a + j*bs2;
495: for (k=0; k<bs; k++) {
496: for (l=0; l<bs; l++) {
497: if (PetscRealPart(*aa++) >= 0.) continue;
498: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
499: }
500: }
501: }
502: }
503: color = PETSC_DRAW_CYAN;
504: for (i=0,row=0; i<mbs; i++,row+=bs) {
505: for (j=a->i[i]; j<a->i[i+1]; j++) {
506: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
507: x_l = a->j[j]*bs; x_r = x_l + 1.0;
508: aa = a->a + j*bs2;
509: for (k=0; k<bs; k++) {
510: for (l=0; l<bs; l++) {
511: if (PetscRealPart(*aa++) != 0.) continue;
512: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
513: }
514: }
515: }
516: }
518: color = PETSC_DRAW_RED;
519: for (i=0,row=0; i<mbs; i++,row+=bs) {
520: for (j=a->i[i]; j<a->i[i+1]; j++) {
521: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
522: x_l = a->j[j]*bs; x_r = x_l + 1.0;
523: aa = a->a + j*bs2;
524: for (k=0; k<bs; k++) {
525: for (l=0; l<bs; l++) {
526: if (PetscRealPart(*aa++) <= 0.) continue;
527: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
528: }
529: }
530: }
531: }
532: return(0);
533: }
535: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
536: {
537: int ierr;
538: PetscReal xl,yl,xr,yr,w,h;
539: PetscDraw draw;
540: PetscTruth isnull;
544: PetscViewerDrawGetDraw(viewer,0,&draw);
545: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
547: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
548: xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
549: xr += w; yr += h; xl = -w; yl = -h;
550: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
551: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
552: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
553: return(0);
554: }
556: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
557: {
558: int ierr;
559: PetscTruth issocket,isascii,isbinary,isdraw;
562: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
563: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
564: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
565: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
566: if (issocket) {
567: SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
568: } else if (isascii){
569: MatView_SeqBAIJ_ASCII(A,viewer);
570: } else if (isbinary) {
571: MatView_SeqBAIJ_Binary(A,viewer);
572: } else if (isdraw) {
573: MatView_SeqBAIJ_Draw(A,viewer);
574: } else {
575: SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
576: }
577: return(0);
578: }
581: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
582: {
583: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
584: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
585: int *ai = a->i,*ailen = a->ilen;
586: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
587: MatScalar *ap,*aa = a->a,zero = 0.0;
590: for (k=0; k<m; k++) { /* loop over rows */
591: row = im[k]; brow = row/bs;
592: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
593: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
594: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
595: nrow = ailen[brow];
596: for (l=0; l<n; l++) { /* loop over columns */
597: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
598: if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
599: col = in[l] ;
600: bcol = col/bs;
601: cidx = col%bs;
602: ridx = row%bs;
603: high = nrow;
604: low = 0; /* assume unsorted */
605: while (high-low > 5) {
606: t = (low+high)/2;
607: if (rp[t] > bcol) high = t;
608: else low = t;
609: }
610: for (i=low; i<high; i++) {
611: if (rp[i] > bcol) break;
612: if (rp[i] == bcol) {
613: *v++ = ap[bs2*i+bs*cidx+ridx];
614: goto finished;
615: }
616: }
617: *v++ = zero;
618: finished:;
619: }
620: }
621: return(0);
622: }
624: #if defined(PETSC_USE_MAT_SINGLE)
625: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
626: {
627: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
628: int ierr,i,N = m*n*b->bs2;
629: MatScalar *vsingle;
632: if (N > b->setvalueslen) {
633: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
634: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
635: b->setvalueslen = N;
636: }
637: vsingle = b->setvaluescopy;
638: for (i=0; i<N; i++) {
639: vsingle[i] = v[i];
640: }
641: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
642: return(0);
643: }
644: #endif
647: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
648: {
649: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
650: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
651: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
652: int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
653: PetscTruth roworiented=a->roworiented;
654: MatScalar *value = v,*ap,*aa = a->a,*bap;
657: if (roworiented) {
658: stepval = (n-1)*bs;
659: } else {
660: stepval = (m-1)*bs;
661: }
662: for (k=0; k<m; k++) { /* loop over added rows */
663: row = im[k];
664: if (row < 0) continue;
665: #if defined(PETSC_USE_BOPT_g)
666: if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
667: #endif
668: rp = aj + ai[row];
669: ap = aa + bs2*ai[row];
670: rmax = imax[row];
671: nrow = ailen[row];
672: low = 0;
673: for (l=0; l<n; l++) { /* loop over added columns */
674: if (in[l] < 0) continue;
675: #if defined(PETSC_USE_BOPT_g)
676: if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
677: #endif
678: col = in[l];
679: if (roworiented) {
680: value = v + k*(stepval+bs)*bs + l*bs;
681: } else {
682: value = v + l*(stepval+bs)*bs + k*bs;
683: }
684: if (!sorted) low = 0; high = nrow;
685: while (high-low > 7) {
686: t = (low+high)/2;
687: if (rp[t] > col) high = t;
688: else low = t;
689: }
690: for (i=low; i<high; i++) {
691: if (rp[i] > col) break;
692: if (rp[i] == col) {
693: bap = ap + bs2*i;
694: if (roworiented) {
695: if (is == ADD_VALUES) {
696: for (ii=0; ii<bs; ii++,value+=stepval) {
697: for (jj=ii; jj<bs2; jj+=bs) {
698: bap[jj] += *value++;
699: }
700: }
701: } else {
702: for (ii=0; ii<bs; ii++,value+=stepval) {
703: for (jj=ii; jj<bs2; jj+=bs) {
704: bap[jj] = *value++;
705: }
706: }
707: }
708: } else {
709: if (is == ADD_VALUES) {
710: for (ii=0; ii<bs; ii++,value+=stepval) {
711: for (jj=0; jj<bs; jj++) {
712: *bap++ += *value++;
713: }
714: }
715: } else {
716: for (ii=0; ii<bs; ii++,value+=stepval) {
717: for (jj=0; jj<bs; jj++) {
718: *bap++ = *value++;
719: }
720: }
721: }
722: }
723: goto noinsert2;
724: }
725: }
726: if (nonew == 1) goto noinsert2;
727: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
728: if (nrow >= rmax) {
729: /* there is no extra room in row, therefore enlarge */
730: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
731: MatScalar *new_a;
733: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
735: /* malloc new storage space */
736: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
737: ierr = PetscMalloc(len,&new_a);
738: new_j = (int*)(new_a + bs2*new_nz);
739: new_i = new_j + new_nz;
741: /* copy over old data into new slots */
742: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
743: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
744: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
745: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
746: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
747: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
748: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
749: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
750: /* free up old matrix storage */
751: PetscFree(a->a);
752: if (!a->singlemalloc) {
753: PetscFree(a->i);
754: PetscFree(a->j);
755: }
756: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
757: a->singlemalloc = PETSC_TRUE;
759: rp = aj + ai[row]; ap = aa + bs2*ai[row];
760: rmax = imax[row] = imax[row] + CHUNKSIZE;
761: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
762: a->maxnz += bs2*CHUNKSIZE;
763: a->reallocs++;
764: a->nz++;
765: }
766: N = nrow++ - 1;
767: /* shift up all the later entries in this row */
768: for (ii=N; ii>=i; ii--) {
769: rp[ii+1] = rp[ii];
770: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
771: }
772: if (N >= i) {
773: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
774: }
775: rp[i] = col;
776: bap = ap + bs2*i;
777: if (roworiented) {
778: for (ii=0; ii<bs; ii++,value+=stepval) {
779: for (jj=ii; jj<bs2; jj+=bs) {
780: bap[jj] = *value++;
781: }
782: }
783: } else {
784: for (ii=0; ii<bs; ii++,value+=stepval) {
785: for (jj=0; jj<bs; jj++) {
786: *bap++ = *value++;
787: }
788: }
789: }
790: noinsert2:;
791: low = i;
792: }
793: ailen[row] = nrow;
794: }
795: return(0);
796: }
799: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
800: {
801: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
802: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
803: int m = A->m,*ip,N,*ailen = a->ilen;
804: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
805: MatScalar *aa = a->a,*ap;
808: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
810: if (m) rmax = ailen[0];
811: for (i=1; i<mbs; i++) {
812: /* move each row back by the amount of empty slots (fshift) before it*/
813: fshift += imax[i-1] - ailen[i-1];
814: rmax = PetscMax(rmax,ailen[i]);
815: if (fshift) {
816: ip = aj + ai[i]; ap = aa + bs2*ai[i];
817: N = ailen[i];
818: for (j=0; j<N; j++) {
819: ip[j-fshift] = ip[j];
820: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
821: }
822: }
823: ai[i] = ai[i-1] + ailen[i-1];
824: }
825: if (mbs) {
826: fshift += imax[mbs-1] - ailen[mbs-1];
827: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
828: }
829: /* reset ilen and imax for each row */
830: for (i=0; i<mbs; i++) {
831: ailen[i] = imax[i] = ai[i+1] - ai[i];
832: }
833: a->nz = ai[mbs];
835: /* diagonals may have moved, so kill the diagonal pointers */
836: if (fshift && a->diag) {
837: PetscFree(a->diag);
838: PetscLogObjectMemory(A,-(m+1)*sizeof(int));
839: a->diag = 0;
840: }
841: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
842: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",a->reallocs);
843: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
844: a->reallocs = 0;
845: A->info.nz_unneeded = (PetscReal)fshift*bs2;
847: return(0);
848: }
852: /*
853: This function returns an array of flags which indicate the locations of contiguous
854: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
855: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
856: Assume: sizes should be long enough to hold all the values.
857: */
858: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
859: {
860: int i,j,k,row;
861: PetscTruth flg;
864: for (i=0,j=0; i<n; j++) {
865: row = idx[i];
866: if (row%bs!=0) { /* Not the begining of a block */
867: sizes[j] = 1;
868: i++;
869: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
870: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
871: i++;
872: } else { /* Begining of the block, so check if the complete block exists */
873: flg = PETSC_TRUE;
874: for (k=1; k<bs; k++) {
875: if (row+k != idx[i+k]) { /* break in the block */
876: flg = PETSC_FALSE;
877: break;
878: }
879: }
880: if (flg == PETSC_TRUE) { /* No break in the bs */
881: sizes[j] = bs;
882: i+= bs;
883: } else {
884: sizes[j] = 1;
885: i++;
886: }
887: }
888: }
889: *bs_max = j;
890: return(0);
891: }
892:
893: int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
894: {
895: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
896: int ierr,i,j,k,count,is_n,*is_idx,*rows;
897: int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
898: PetscScalar zero = 0.0;
899: MatScalar *aa;
902: /* Make a copy of the IS and sort it */
903: ISGetLocalSize(is,&is_n);
904: ISGetIndices(is,&is_idx);
906: /* allocate memory for rows,sizes */
907: ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
908: sizes = rows + is_n;
910: /* copy IS values to rows, and sort them */
911: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
912: PetscSortInt(is_n,rows);
913: if (baij->keepzeroedrows) {
914: for (i=0; i<is_n; i++) { sizes[i] = 1; }
915: bs_max = is_n;
916: } else {
917: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
918: }
919: ISRestoreIndices(is,&is_idx);
921: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
922: row = rows[j];
923: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
924: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
925: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
926: if (sizes[i] == bs && !baij->keepzeroedrows) {
927: if (diag) {
928: if (baij->ilen[row/bs] > 0) {
929: baij->ilen[row/bs] = 1;
930: baij->j[baij->i[row/bs]] = row/bs;
931: PetscMemzero(aa,count*bs*sizeof(MatScalar));
932: }
933: /* Now insert all the diagonal values for this bs */
934: for (k=0; k<bs; k++) {
935: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
936: }
937: } else { /* (!diag) */
938: baij->ilen[row/bs] = 0;
939: } /* end (!diag) */
940: } else { /* (sizes[i] != bs) */
941: #if defined (PETSC_USE_DEBUG)
942: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
943: #endif
944: for (k=0; k<count; k++) {
945: aa[0] = zero;
946: aa += bs;
947: }
948: if (diag) {
949: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
950: }
951: }
952: }
954: PetscFree(rows);
955: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
956: return(0);
957: }
959: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
960: {
961: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
962: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
963: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
964: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
965: int ridx,cidx,bs2=a->bs2,ierr;
966: PetscTruth roworiented=a->roworiented;
967: MatScalar *ap,value,*aa=a->a,*bap;
970: for (k=0; k<m; k++) { /* loop over added rows */
971: row = im[k]; brow = row/bs;
972: if (row < 0) continue;
973: #if defined(PETSC_USE_BOPT_g)
974: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
975: #endif
976: rp = aj + ai[brow];
977: ap = aa + bs2*ai[brow];
978: rmax = imax[brow];
979: nrow = ailen[brow];
980: low = 0;
981: for (l=0; l<n; l++) { /* loop over added columns */
982: if (in[l] < 0) continue;
983: #if defined(PETSC_USE_BOPT_g)
984: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
985: #endif
986: col = in[l]; bcol = col/bs;
987: ridx = row % bs; cidx = col % bs;
988: if (roworiented) {
989: value = v[l + k*n];
990: } else {
991: value = v[k + l*m];
992: }
993: if (!sorted) low = 0; high = nrow;
994: while (high-low > 7) {
995: t = (low+high)/2;
996: if (rp[t] > bcol) high = t;
997: else low = t;
998: }
999: for (i=low; i<high; i++) {
1000: if (rp[i] > bcol) break;
1001: if (rp[i] == bcol) {
1002: bap = ap + bs2*i + bs*cidx + ridx;
1003: if (is == ADD_VALUES) *bap += value;
1004: else *bap = value;
1005: goto noinsert1;
1006: }
1007: }
1008: if (nonew == 1) goto noinsert1;
1009: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1010: if (nrow >= rmax) {
1011: /* there is no extra room in row, therefore enlarge */
1012: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1013: MatScalar *new_a;
1015: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1017: /* Malloc new storage space */
1018: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1019: ierr = PetscMalloc(len,&new_a);
1020: new_j = (int*)(new_a + bs2*new_nz);
1021: new_i = new_j + new_nz;
1023: /* copy over old data into new slots */
1024: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1025: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1026: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1027: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1028: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1029: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1030: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1031: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1032: /* free up old matrix storage */
1033: PetscFree(a->a);
1034: if (!a->singlemalloc) {
1035: PetscFree(a->i);
1036: PetscFree(a->j);
1037: }
1038: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1039: a->singlemalloc = PETSC_TRUE;
1041: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1042: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1043: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1044: a->maxnz += bs2*CHUNKSIZE;
1045: a->reallocs++;
1046: a->nz++;
1047: }
1048: N = nrow++ - 1;
1049: /* shift up all the later entries in this row */
1050: for (ii=N; ii>=i; ii--) {
1051: rp[ii+1] = rp[ii];
1052: ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1053: }
1054: if (N>=i) {
1055: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1056: }
1057: rp[i] = bcol;
1058: ap[bs2*i + bs*cidx + ridx] = value;
1059: noinsert1:;
1060: low = i;
1061: }
1062: ailen[brow] = nrow;
1063: }
1064: return(0);
1065: }
1068: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1069: {
1070: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1071: Mat outA;
1072: int ierr;
1073: PetscTruth row_identity,col_identity;
1076: if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1077: ISIdentity(row,&row_identity);
1078: ISIdentity(col,&col_identity);
1079: if (!row_identity || !col_identity) {
1080: SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1081: }
1083: outA = inA;
1084: inA->factor = FACTOR_LU;
1086: if (!a->diag) {
1087: MatMarkDiagonal_SeqBAIJ(inA);
1088: }
1090: a->row = row;
1091: a->col = col;
1092: ierr = PetscObjectReference((PetscObject)row);
1093: ierr = PetscObjectReference((PetscObject)col);
1094:
1095: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1096: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1097: PetscLogObjectParent(inA,a->icol);
1098:
1099: /*
1100: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1101: for ILU(0) factorization with natural ordering
1102: */
1103: if (a->bs < 8) {
1104: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1105: } else {
1106: if (!a->solve_work) {
1107: PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1108: PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1109: }
1110: }
1112: MatLUFactorNumeric(inA,&outA);
1114: return(0);
1115: }
1116: int MatPrintHelp_SeqBAIJ(Mat A)
1117: {
1118: static PetscTruth called = PETSC_FALSE;
1119: MPI_Comm comm = A->comm;
1120: int ierr;
1123: if (called) {return(0);} else called = PETSC_TRUE;
1124: (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1125: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>n");
1126: return(0);
1127: }
1129: EXTERN_C_BEGIN
1130: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1131: {
1132: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1133: int i,nz,nbs;
1136: nz = baij->maxnz/baij->bs2;
1137: nbs = baij->nbs;
1138: for (i=0; i<nz; i++) {
1139: baij->j[i] = indices[i];
1140: }
1141: baij->nz = nz;
1142: for (i=0; i<nbs; i++) {
1143: baij->ilen[i] = baij->imax[i];
1144: }
1146: return(0);
1147: }
1148: EXTERN_C_END
1150: /*@
1151: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1152: in the matrix.
1154: Input Parameters:
1155: + mat - the SeqBAIJ matrix
1156: - indices - the column indices
1158: Level: advanced
1160: Notes:
1161: This can be called if you have precomputed the nonzero structure of the
1162: matrix and want to provide it to the matrix object to improve the performance
1163: of the MatSetValues() operation.
1165: You MUST have set the correct numbers of nonzeros per row in the call to
1166: MatCreateSeqBAIJ().
1168: MUST be called before any calls to MatSetValues();
1170: @*/
1171: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1172: {
1173: int ierr,(*f)(Mat,int *);
1177: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1178: if (f) {
1179: (*f)(mat,indices);
1180: } else {
1181: SETERRQ(1,"Wrong type of matrix to set column indices");
1182: }
1183: return(0);
1184: }
1186: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1187: {
1188: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1189: int ierr,i,j,n,row,bs,*ai,*aj,mbs;
1190: PetscReal atmp;
1191: PetscScalar *x,zero = 0.0;
1192: MatScalar *aa;
1193: int ncols,brow,krow,kcol;
1196: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1197: bs = a->bs;
1198: aa = a->a;
1199: ai = a->i;
1200: aj = a->j;
1201: mbs = a->mbs;
1203: VecSet(&zero,v);
1204: VecGetArray(v,&x);
1205: VecGetLocalSize(v,&n);
1206: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1207: for (i=0; i<mbs; i++) {
1208: ncols = ai[1] - ai[0]; ai++;
1209: brow = bs*i;
1210: for (j=0; j<ncols; j++){
1211: /* bcol = bs*(*aj); */
1212: for (kcol=0; kcol<bs; kcol++){
1213: for (krow=0; krow<bs; krow++){
1214: atmp = PetscAbsScalar(*aa); aa++;
1215: row = brow + krow; /* row index */
1216: /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1217: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1218: }
1219: }
1220: aj++;
1221: }
1222: }
1223: VecRestoreArray(v,&x);
1224: return(0);
1225: }
1227: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1228: {
1229: int ierr;
1232: MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1233: return(0);
1234: }
1236: int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1237: {
1238: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1240: *array = a->a;
1241: return(0);
1242: }
1244: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1245: {
1247: return(0);
1248: }
1250: /* -------------------------------------------------------------------*/
1251: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1252: MatGetRow_SeqBAIJ,
1253: MatRestoreRow_SeqBAIJ,
1254: MatMult_SeqBAIJ_N,
1255: MatMultAdd_SeqBAIJ_N,
1256: MatMultTranspose_SeqBAIJ,
1257: MatMultTransposeAdd_SeqBAIJ,
1258: MatSolve_SeqBAIJ_N,
1259: 0,
1260: 0,
1261: 0,
1262: MatLUFactor_SeqBAIJ,
1263: 0,
1264: 0,
1265: MatTranspose_SeqBAIJ,
1266: MatGetInfo_SeqBAIJ,
1267: MatEqual_SeqBAIJ,
1268: MatGetDiagonal_SeqBAIJ,
1269: MatDiagonalScale_SeqBAIJ,
1270: MatNorm_SeqBAIJ,
1271: 0,
1272: MatAssemblyEnd_SeqBAIJ,
1273: 0,
1274: MatSetOption_SeqBAIJ,
1275: MatZeroEntries_SeqBAIJ,
1276: MatZeroRows_SeqBAIJ,
1277: MatLUFactorSymbolic_SeqBAIJ,
1278: MatLUFactorNumeric_SeqBAIJ_N,
1279: 0,
1280: 0,
1281: MatSetUpPreallocation_SeqBAIJ,
1282: MatILUFactorSymbolic_SeqBAIJ,
1283: 0,
1284: MatGetArray_SeqBAIJ,
1285: MatRestoreArray_SeqBAIJ,
1286: MatDuplicate_SeqBAIJ,
1287: 0,
1288: 0,
1289: MatILUFactor_SeqBAIJ,
1290: 0,
1291: 0,
1292: MatGetSubMatrices_SeqBAIJ,
1293: MatIncreaseOverlap_SeqBAIJ,
1294: MatGetValues_SeqBAIJ,
1295: 0,
1296: MatPrintHelp_SeqBAIJ,
1297: MatScale_SeqBAIJ,
1298: 0,
1299: 0,
1300: 0,
1301: MatGetBlockSize_SeqBAIJ,
1302: MatGetRowIJ_SeqBAIJ,
1303: MatRestoreRowIJ_SeqBAIJ,
1304: 0,
1305: 0,
1306: 0,
1307: 0,
1308: 0,
1309: 0,
1310: MatSetValuesBlocked_SeqBAIJ,
1311: MatGetSubMatrix_SeqBAIJ,
1312: MatDestroy_SeqBAIJ,
1313: MatView_SeqBAIJ,
1314: MatGetPetscMaps_Petsc,
1315: 0,
1316: 0,
1317: 0,
1318: 0,
1319: 0,
1320: 0,
1321: MatGetRowMax_SeqBAIJ,
1322: MatConvert_Basic};
1324: EXTERN_C_BEGIN
1325: int MatStoreValues_SeqBAIJ(Mat mat)
1326: {
1327: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1328: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
1329: int ierr;
1332: if (aij->nonew != 1) {
1333: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1334: }
1336: /* allocate space for values if not already there */
1337: if (!aij->saved_values) {
1338: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1339: }
1341: /* copy values over */
1342: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1343: return(0);
1344: }
1345: EXTERN_C_END
1347: EXTERN_C_BEGIN
1348: int MatRetrieveValues_SeqBAIJ(Mat mat)
1349: {
1350: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1351: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1354: if (aij->nonew != 1) {
1355: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1356: }
1357: if (!aij->saved_values) {
1358: SETERRQ(1,"Must call MatStoreValues(A);first");
1359: }
1361: /* copy values over */
1362: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1363: return(0);
1364: }
1365: EXTERN_C_END
1367: EXTERN_C_BEGIN
1368: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1369: EXTERN_C_END
1371: EXTERN_C_BEGIN
1372: int MatCreate_SeqBAIJ(Mat B)
1373: {
1374: int ierr,size;
1375: Mat_SeqBAIJ *b;
1378: MPI_Comm_size(B->comm,&size);
1379: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1381: B->m = B->M = PetscMax(B->m,B->M);
1382: B->n = B->N = PetscMax(B->n,B->N);
1383: ierr = PetscNew(Mat_SeqBAIJ,&b);
1384: B->data = (void*)b;
1385: ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1386: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1387: B->factor = 0;
1388: B->lupivotthreshold = 1.0;
1389: B->mapping = 0;
1390: b->row = 0;
1391: b->col = 0;
1392: b->icol = 0;
1393: b->reallocs = 0;
1394: b->saved_values = 0;
1395: #if defined(PETSC_USE_MAT_SINGLE)
1396: b->setvalueslen = 0;
1397: b->setvaluescopy = PETSC_NULL;
1398: #endif
1399: b->single_precision_solves = PETSC_FALSE;
1401: PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1402: PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);
1404: b->sorted = PETSC_FALSE;
1405: b->roworiented = PETSC_TRUE;
1406: b->nonew = 0;
1407: b->diag = 0;
1408: b->solve_work = 0;
1409: b->mult_work = 0;
1410: b->spptr = 0;
1411: B->info.nz_unneeded = (PetscReal)b->maxnz;
1412: b->keepzeroedrows = PETSC_FALSE;
1414: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1415: "MatStoreValues_SeqBAIJ",
1416: MatStoreValues_SeqBAIJ);
1417: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1418: "MatRetrieveValues_SeqBAIJ",
1419: MatRetrieveValues_SeqBAIJ);
1420: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1421: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1422: MatSeqBAIJSetColumnIndices_SeqBAIJ);
1423: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1424: "MatConvert_SeqBAIJ_SeqAIJ",
1425: MatConvert_SeqBAIJ_SeqAIJ);
1426: return(0);
1427: }
1428: EXTERN_C_END
1430: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1431: {
1432: Mat C;
1433: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1434: int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1437: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1439: *B = 0;
1440: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1441: MatSetType(C,MATSEQBAIJ);
1442: c = (Mat_SeqBAIJ*)C->data;
1444: c->bs = a->bs;
1445: c->bs2 = a->bs2;
1446: c->mbs = a->mbs;
1447: c->nbs = a->nbs;
1448: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1450: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1451: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1452: for (i=0; i<mbs; i++) {
1453: c->imax[i] = a->imax[i];
1454: c->ilen[i] = a->ilen[i];
1455: }
1457: /* allocate the matrix space */
1458: c->singlemalloc = PETSC_TRUE;
1459: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1460: PetscMalloc(len,&c->a);
1461: c->j = (int*)(c->a + nz*bs2);
1462: c->i = c->j + nz;
1463: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1464: if (mbs > 0) {
1465: PetscMemcpy(c->j,a->j,nz*sizeof(int));
1466: if (cpvalues == MAT_COPY_VALUES) {
1467: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1468: } else {
1469: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1470: }
1471: }
1473: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1474: c->sorted = a->sorted;
1475: c->roworiented = a->roworiented;
1476: c->nonew = a->nonew;
1478: if (a->diag) {
1479: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1480: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1481: for (i=0; i<mbs; i++) {
1482: c->diag[i] = a->diag[i];
1483: }
1484: } else c->diag = 0;
1485: c->nz = a->nz;
1486: c->maxnz = a->maxnz;
1487: c->solve_work = 0;
1488: c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
1489: c->mult_work = 0;
1490: C->preallocated = PETSC_TRUE;
1491: C->assembled = PETSC_TRUE;
1492: *B = C;
1493: PetscFListDuplicate(A->qlist,&C->qlist);
1494: return(0);
1495: }
1497: EXTERN_C_BEGIN
1498: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1499: {
1500: Mat_SeqBAIJ *a;
1501: Mat B;
1502: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1503: int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1504: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
1505: int *masked,nmask,tmp,bs2,ishift;
1506: PetscScalar *aa;
1507: MPI_Comm comm = ((PetscObject)viewer)->comm;
1510: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1511: bs2 = bs*bs;
1513: MPI_Comm_size(comm,&size);
1514: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1515: PetscViewerBinaryGetDescriptor(viewer,&fd);
1516: PetscBinaryRead(fd,header,4,PETSC_INT);
1517: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1518: M = header[1]; N = header[2]; nz = header[3];
1520: if (header[3] < 0) {
1521: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1522: }
1524: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1526: /*
1527: This code adds extra rows to make sure the number of rows is
1528: divisible by the blocksize
1529: */
1530: mbs = M/bs;
1531: extra_rows = bs - M + bs*(mbs);
1532: if (extra_rows == bs) extra_rows = 0;
1533: else mbs++;
1534: if (extra_rows) {
1535: PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1536: }
1538: /* read in row lengths */
1539: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1540: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1541: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1543: /* read in column indices */
1544: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1545: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1546: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1548: /* loop over row lengths determining block row lengths */
1549: ierr = PetscMalloc(mbs*sizeof(int),&browlengths);
1550: ierr = PetscMemzero(browlengths,mbs*sizeof(int));
1551: ierr = PetscMalloc(2*mbs*sizeof(int),&mask);
1552: ierr = PetscMemzero(mask,mbs*sizeof(int));
1553: masked = mask + mbs;
1554: rowcount = 0; nzcount = 0;
1555: for (i=0; i<mbs; i++) {
1556: nmask = 0;
1557: for (j=0; j<bs; j++) {
1558: kmax = rowlengths[rowcount];
1559: for (k=0; k<kmax; k++) {
1560: tmp = jj[nzcount++]/bs;
1561: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1562: }
1563: rowcount++;
1564: }
1565: browlengths[i] += nmask;
1566: /* zero out the mask elements we set */
1567: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1568: }
1570: /* create our matrix */
1571: MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1572: B = *A;
1573: a = (Mat_SeqBAIJ*)B->data;
1575: /* set matrix "i" values */
1576: a->i[0] = 0;
1577: for (i=1; i<= mbs; i++) {
1578: a->i[i] = a->i[i-1] + browlengths[i-1];
1579: a->ilen[i-1] = browlengths[i-1];
1580: }
1581: a->nz = 0;
1582: for (i=0; i<mbs; i++) a->nz += browlengths[i];
1584: /* read in nonzero values */
1585: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1586: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1587: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1589: /* set "a" and "j" values into matrix */
1590: nzcount = 0; jcount = 0;
1591: for (i=0; i<mbs; i++) {
1592: nzcountb = nzcount;
1593: nmask = 0;
1594: for (j=0; j<bs; j++) {
1595: kmax = rowlengths[i*bs+j];
1596: for (k=0; k<kmax; k++) {
1597: tmp = jj[nzcount++]/bs;
1598: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1599: }
1600: }
1601: /* sort the masked values */
1602: PetscSortInt(nmask,masked);
1604: /* set "j" values into matrix */
1605: maskcount = 1;
1606: for (j=0; j<nmask; j++) {
1607: a->j[jcount++] = masked[j];
1608: mask[masked[j]] = maskcount++;
1609: }
1610: /* set "a" values into matrix */
1611: ishift = bs2*a->i[i];
1612: for (j=0; j<bs; j++) {
1613: kmax = rowlengths[i*bs+j];
1614: for (k=0; k<kmax; k++) {
1615: tmp = jj[nzcountb]/bs ;
1616: block = mask[tmp] - 1;
1617: point = jj[nzcountb] - bs*tmp;
1618: idx = ishift + bs2*block + j + bs*point;
1619: a->a[idx] = (MatScalar)aa[nzcountb++];
1620: }
1621: }
1622: /* zero out the mask elements we set */
1623: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1624: }
1625: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1627: PetscFree(rowlengths);
1628: PetscFree(browlengths);
1629: PetscFree(aa);
1630: PetscFree(jj);
1631: PetscFree(mask);
1633: B->assembled = PETSC_TRUE;
1634:
1635: MatView_Private(B);
1636: return(0);
1637: }
1638: EXTERN_C_END
1640: /*@C
1641: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1642: compressed row) format. For good matrix assembly performance the
1643: user should preallocate the matrix storage by setting the parameter nz
1644: (or the array nnz). By setting these parameters accurately, performance
1645: during matrix assembly can be increased by more than a factor of 50.
1647: Collective on MPI_Comm
1649: Input Parameters:
1650: + comm - MPI communicator, set to PETSC_COMM_SELF
1651: . bs - size of block
1652: . m - number of rows
1653: . n - number of columns
1654: . nz - number of nonzero blocks per block row (same for all rows)
1655: - nnz - array containing the number of nonzero blocks in the various block rows
1656: (possibly different for each block row) or PETSC_NULL
1658: Output Parameter:
1659: . A - the matrix
1661: Options Database Keys:
1662: . -mat_no_unroll - uses code that does not unroll the loops in the
1663: block calculations (much slower)
1664: . -mat_block_size - size of the blocks to use
1666: Level: intermediate
1668: Notes:
1669: A nonzero block is any block that as 1 or more nonzeros in it
1671: The block AIJ format is fully compatible with standard Fortran 77
1672: storage. That is, the stored row and column indices can begin at
1673: either one (as in Fortran) or zero. See the users' manual for details.
1675: Specify the preallocated storage with either nz or nnz (not both).
1676: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1677: allocation. For additional details, see the users manual chapter on
1678: matrices.
1680: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1681: @*/
1682: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1683: {
1685:
1687: MatCreate(comm,m,n,m,n,A);
1688: MatSetType(*A,MATSEQBAIJ);
1689: MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1690: return(0);
1691: }
1693: /*@C
1694: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1695: per row in the matrix. For good matrix assembly performance the
1696: user should preallocate the matrix storage by setting the parameter nz
1697: (or the array nnz). By setting these parameters accurately, performance
1698: during matrix assembly can be increased by more than a factor of 50.
1700: Collective on MPI_Comm
1702: Input Parameters:
1703: + A - the matrix
1704: . bs - size of block
1705: . nz - number of block nonzeros per block row (same for all rows)
1706: - nnz - array containing the number of block nonzeros in the various block rows
1707: (possibly different for each block row) or PETSC_NULL
1709: Options Database Keys:
1710: . -mat_no_unroll - uses code that does not unroll the loops in the
1711: block calculations (much slower)
1712: . -mat_block_size - size of the blocks to use
1714: Level: intermediate
1716: Notes:
1717: The block AIJ format is fully compatible with standard Fortran 77
1718: storage. That is, the stored row and column indices can begin at
1719: either one (as in Fortran) or zero. See the users' manual for details.
1721: Specify the preallocated storage with either nz or nnz (not both).
1722: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1723: allocation. For additional details, see the users manual chapter on
1724: matrices.
1726: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1727: @*/
1728: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1729: {
1730: Mat_SeqBAIJ *b;
1731: int i,len,ierr,mbs,nbs,bs2,newbs = bs;
1732: PetscTruth flg;
1735: PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1736: if (!flg) return(0);
1738: B->preallocated = PETSC_TRUE;
1739: PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1740: if (nnz && newbs != bs) {
1741: SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1742: }
1743: bs = newbs;
1745: mbs = B->m/bs;
1746: nbs = B->n/bs;
1747: bs2 = bs*bs;
1749: if (mbs*bs!=B->m || nbs*bs!=B->n) {
1750: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1751: }
1753: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1754: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1755: if (nnz) {
1756: for (i=0; i<mbs; i++) {
1757: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1758: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1759: }
1760: }
1762: b = (Mat_SeqBAIJ*)B->data;
1763: ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1764: B->ops->solve = MatSolve_SeqBAIJ_Update;
1765: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
1766: if (!flg) {
1767: switch (bs) {
1768: case 1:
1769: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1770: B->ops->mult = MatMult_SeqBAIJ_1;
1771: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
1772: break;
1773: case 2:
1774: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1775: B->ops->mult = MatMult_SeqBAIJ_2;
1776: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
1777: break;
1778: case 3:
1779: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1780: B->ops->mult = MatMult_SeqBAIJ_3;
1781: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
1782: break;
1783: case 4:
1784: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1785: B->ops->mult = MatMult_SeqBAIJ_4;
1786: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
1787: break;
1788: case 5:
1789: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1790: B->ops->mult = MatMult_SeqBAIJ_5;
1791: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
1792: break;
1793: case 6:
1794: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1795: B->ops->mult = MatMult_SeqBAIJ_6;
1796: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
1797: break;
1798: case 7:
1799: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1800: B->ops->mult = MatMult_SeqBAIJ_7;
1801: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
1802: break;
1803: default:
1804: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1805: B->ops->mult = MatMult_SeqBAIJ_N;
1806: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1807: break;
1808: }
1809: }
1810: b->bs = bs;
1811: b->mbs = mbs;
1812: b->nbs = nbs;
1813: PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1814: if (!nnz) {
1815: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1816: else if (nz <= 0) nz = 1;
1817: for (i=0; i<mbs; i++) b->imax[i] = nz;
1818: nz = nz*mbs;
1819: } else {
1820: nz = 0;
1821: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1822: }
1824: /* allocate the matrix space */
1825: len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1826: ierr = PetscMalloc(len,&b->a);
1827: ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1828: b->j = (int*)(b->a + nz*bs2);
1829: PetscMemzero(b->j,nz*sizeof(int));
1830: b->i = b->j + nz;
1831: b->singlemalloc = PETSC_TRUE;
1833: b->i[0] = 0;
1834: for (i=1; i<mbs+1; i++) {
1835: b->i[i] = b->i[i-1] + b->imax[i-1];
1836: }
1838: /* b->ilen will count nonzeros in each block row so far. */
1839: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1840: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1841: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1843: b->bs = bs;
1844: b->bs2 = bs2;
1845: b->mbs = mbs;
1846: b->nz = 0;
1847: b->maxnz = nz*bs2;
1848: B->info.nz_unneeded = (PetscReal)b->maxnz;
1849: return(0);
1850: }