Actual source code: baij.c
1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/vec/vecimpl.h
9: #include src/inline/spops.h
10: #include petscsys.h
12: /* UGLY, ugly, ugly
13: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16: converts the entries into single precision and then calls ..._MatScalar() to put them
17: into the single precision data structures.
18: */
19: #if defined(PETSC_USE_MAT_SINGLE)
20: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21: #else
22: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23: #endif
24: #if defined(PETSC_HAVE_DSCPACK)
25: EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
26: #endif
28: #define CHUNKSIZE 10
30: /*
31: Checks for missing diagonals
32: */
33: int MatMissingDiagonal_SeqBAIJ(Mat A)
34: {
35: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
36: int *diag,*jj = a->j,i,ierr;
39: MatMarkDiagonal_SeqBAIJ(A);
40: diag = a->diag;
41: for (i=0; i<a->mbs; i++) {
42: if (jj[diag[i]] != i) {
43: SETERRQ1(1,"Matrix is missing diagonal number %d",i);
44: }
45: }
46: return(0);
47: }
49: int MatMarkDiagonal_SeqBAIJ(Mat A)
50: {
51: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
52: int i,j,*diag,m = a->mbs,ierr;
55: if (a->diag) return(0);
57: PetscMalloc((m+1)*sizeof(int),&diag);
58: PetscLogObjectMemory(A,(m+1)*sizeof(int));
59: for (i=0; i<m; i++) {
60: diag[i] = a->i[i+1];
61: for (j=a->i[i]; j<a->i[i+1]; j++) {
62: if (a->j[j] == i) {
63: diag[i] = j;
64: break;
65: }
66: }
67: }
68: a->diag = diag;
69: return(0);
70: }
73: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
76: {
77: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
78: int ierr,n = a->mbs,i;
81: *nn = n;
82: if (!ia) return(0);
83: if (symmetric) {
84: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
85: } else if (oshift == 1) {
86: /* temporarily add 1 to i and j indices */
87: int nz = a->i[n];
88: for (i=0; i<nz; i++) a->j[i]++;
89: for (i=0; i<n+1; i++) a->i[i]++;
90: *ia = a->i; *ja = a->j;
91: } else {
92: *ia = a->i; *ja = a->j;
93: }
95: return(0);
96: }
98: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
99: {
100: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
101: int i,n = a->mbs,ierr;
104: if (!ia) return(0);
105: if (symmetric) {
106: PetscFree(*ia);
107: PetscFree(*ja);
108: } else if (oshift == 1) {
109: int nz = a->i[n]-1;
110: for (i=0; i<nz; i++) a->j[i]--;
111: for (i=0; i<n+1; i++) a->i[i]--;
112: }
113: return(0);
114: }
116: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
117: {
118: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
121: *bs = baij->bs;
122: return(0);
123: }
126: int MatDestroy_SeqBAIJ(Mat A)
127: {
128: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
129: int ierr;
132: #if defined(PETSC_USE_LOG)
133: PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
134: #endif
135: PetscFree(a->a);
136: if (!a->singlemalloc) {
137: PetscFree(a->i);
138: PetscFree(a->j);
139: }
140: if (a->row) {
141: ISDestroy(a->row);
142: }
143: if (a->col) {
144: ISDestroy(a->col);
145: }
146: if (a->diag) {PetscFree(a->diag);}
147: if (a->ilen) {PetscFree(a->ilen);}
148: if (a->imax) {PetscFree(a->imax);}
149: if (a->solve_work) {PetscFree(a->solve_work);}
150: if (a->mult_work) {PetscFree(a->mult_work);}
151: if (a->icol) {ISDestroy(a->icol);}
152: if (a->saved_values) {PetscFree(a->saved_values);}
153: #if defined(PETSC_USE_MAT_SINGLE)
154: if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
155: #endif
156: PetscFree(a);
157: return(0);
158: }
160: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
161: {
162: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
165: switch (op) {
166: case MAT_ROW_ORIENTED:
167: a->roworiented = PETSC_TRUE;
168: break;
169: case MAT_COLUMN_ORIENTED:
170: a->roworiented = PETSC_FALSE;
171: break;
172: case MAT_COLUMNS_SORTED:
173: a->sorted = PETSC_TRUE;
174: break;
175: case MAT_COLUMNS_UNSORTED:
176: a->sorted = PETSC_FALSE;
177: break;
178: case MAT_KEEP_ZEROED_ROWS:
179: a->keepzeroedrows = PETSC_TRUE;
180: break;
181: case MAT_NO_NEW_NONZERO_LOCATIONS:
182: a->nonew = 1;
183: break;
184: case MAT_NEW_NONZERO_LOCATION_ERR:
185: a->nonew = -1;
186: break;
187: case MAT_NEW_NONZERO_ALLOCATION_ERR:
188: a->nonew = -2;
189: break;
190: case MAT_YES_NEW_NONZERO_LOCATIONS:
191: a->nonew = 0;
192: break;
193: case MAT_ROWS_SORTED:
194: case MAT_ROWS_UNSORTED:
195: case MAT_YES_NEW_DIAGONALS:
196: case MAT_IGNORE_OFF_PROC_ENTRIES:
197: case MAT_USE_HASH_TABLE:
198: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
199: break;
200: case MAT_NO_NEW_DIAGONALS:
201: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
202: case MAT_USE_SINGLE_PRECISION_SOLVES:
203: if (a->bs==4) {
204: PetscTruth sse_enabled_local,sse_enabled_global;
205: int ierr;
207: sse_enabled_local = PETSC_FALSE;
208: sse_enabled_global = PETSC_FALSE;
210: PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);
211: #if defined(PETSC_HAVE_SSE)
212: if (sse_enabled_local) {
213: a->single_precision_solves = PETSC_TRUE;
214: A->ops->solve = MatSolve_SeqBAIJ_Update;
215: A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
216: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES setn");
217: break;
218: } else {
219: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
220: }
221: #else
222: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
223: #endif
224: }
225: break;
226: default:
227: SETERRQ(PETSC_ERR_SUP,"unknown option");
228: }
229: return(0);
230: }
232: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
233: {
234: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
235: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
236: MatScalar *aa,*aa_i;
237: PetscScalar *v_i;
240: bs = a->bs;
241: ai = a->i;
242: aj = a->j;
243: aa = a->a;
244: bs2 = a->bs2;
245:
246: if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
247:
248: bn = row/bs; /* Block number */
249: bp = row % bs; /* Block Position */
250: M = ai[bn+1] - ai[bn];
251: *nz = bs*M;
252:
253: if (v) {
254: *v = 0;
255: if (*nz) {
256: PetscMalloc((*nz)*sizeof(PetscScalar),v);
257: for (i=0; i<M; i++) { /* for each block in the block row */
258: v_i = *v + i*bs;
259: aa_i = aa + bs2*(ai[bn] + i);
260: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
261: }
262: }
263: }
265: if (idx) {
266: *idx = 0;
267: if (*nz) {
268: PetscMalloc((*nz)*sizeof(int),idx);
269: for (i=0; i<M; i++) { /* for each block in the block row */
270: idx_i = *idx + i*bs;
271: itmp = bs*aj[ai[bn] + i];
272: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
273: }
274: }
275: }
276: return(0);
277: }
279: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
280: {
284: if (idx) {if (*idx) {PetscFree(*idx);}}
285: if (v) {if (*v) {PetscFree(*v);}}
286: return(0);
287: }
289: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
290: {
291: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
292: Mat C;
293: int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
294: int *rows,*cols,bs2=a->bs2;
295: PetscScalar *array;
298: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
299: PetscMalloc((1+nbs)*sizeof(int),&col);
300: PetscMemzero(col,(1+nbs)*sizeof(int));
302: #if defined(PETSC_USE_MAT_SINGLE)
303: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
304: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
305: #else
306: array = a->a;
307: #endif
309: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
310: MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
311: PetscFree(col);
312: PetscMalloc(2*bs*sizeof(int),&rows);
313: cols = rows + bs;
314: for (i=0; i<mbs; i++) {
315: cols[0] = i*bs;
316: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
317: len = ai[i+1] - ai[i];
318: for (j=0; j<len; j++) {
319: rows[0] = (*aj++)*bs;
320: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
321: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
322: array += bs2;
323: }
324: }
325: PetscFree(rows);
326: #if defined(PETSC_USE_MAT_SINGLE)
327: PetscFree(array);
328: #endif
329:
330: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
331: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
332:
333: if (B) {
334: *B = C;
335: } else {
336: MatHeaderCopy(A,C);
337: }
338: return(0);
339: }
341: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
342: {
343: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
344: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
345: PetscScalar *aa;
346: FILE *file;
349: ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);
350: ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
351: col_lens[0] = MAT_FILE_COOKIE;
353: col_lens[1] = A->m;
354: col_lens[2] = A->n;
355: col_lens[3] = a->nz*bs2;
357: /* store lengths of each row and write (including header) to file */
358: count = 0;
359: for (i=0; i<a->mbs; i++) {
360: for (j=0; j<bs; j++) {
361: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
362: }
363: }
364: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
365: PetscFree(col_lens);
367: /* store column indices (zero start index) */
368: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
369: count = 0;
370: for (i=0; i<a->mbs; i++) {
371: for (j=0; j<bs; j++) {
372: for (k=a->i[i]; k<a->i[i+1]; k++) {
373: for (l=0; l<bs; l++) {
374: jj[count++] = bs*a->j[k] + l;
375: }
376: }
377: }
378: }
379: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
380: PetscFree(jj);
382: /* store nonzero values */
383: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
384: count = 0;
385: for (i=0; i<a->mbs; i++) {
386: for (j=0; j<bs; j++) {
387: for (k=a->i[i]; k<a->i[i+1]; k++) {
388: for (l=0; l<bs; l++) {
389: aa[count++] = a->a[bs2*k + l*bs + j];
390: }
391: }
392: }
393: }
394: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
395: PetscFree(aa);
397: PetscViewerBinaryGetInfoPointer(viewer,&file);
398: if (file) {
399: fprintf(file,"-matload_block_size %dn",a->bs);
400: }
401: return(0);
402: }
404: extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
406: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
407: {
408: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
409: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
410: PetscViewerFormat format;
413: PetscViewerGetFormat(viewer,&format);
414: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
415: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
416: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
417: Mat aij;
418: MatConvert(A,MATSEQAIJ,&aij);
419: MatView(aij,viewer);
420: MatDestroy(aij);
421: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
422: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
423: MatMPIBAIJFactorInfo_DSCPACK(A,viewer);
424: #endif
425: return(0);
426: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
427: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
428: for (i=0; i<a->mbs; i++) {
429: for (j=0; j<bs; j++) {
430: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
431: for (k=a->i[i]; k<a->i[i+1]; k++) {
432: for (l=0; l<bs; l++) {
433: #if defined(PETSC_USE_COMPLEX)
434: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
435: PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
436: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
437: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
438: PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
439: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
440: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
441: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
442: }
443: #else
444: if (a->a[bs2*k + l*bs + j] != 0.0) {
445: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
446: }
447: #endif
448: }
449: }
450: PetscViewerASCIIPrintf(viewer,"n");
451: }
452: }
453: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
454: } else {
455: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
456: for (i=0; i<a->mbs; i++) {
457: for (j=0; j<bs; j++) {
458: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
459: for (k=a->i[i]; k<a->i[i+1]; k++) {
460: for (l=0; l<bs; l++) {
461: #if defined(PETSC_USE_COMPLEX)
462: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
463: PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
464: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
465: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
466: PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
467: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
468: } else {
469: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
470: }
471: #else
472: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
473: #endif
474: }
475: }
476: PetscViewerASCIIPrintf(viewer,"n");
477: }
478: }
479: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
480: }
481: PetscViewerFlush(viewer);
482: return(0);
483: }
485: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
486: {
487: Mat A = (Mat) Aa;
488: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
489: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
490: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
491: MatScalar *aa;
492: PetscViewer viewer;
496: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
497: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
499: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
501: /* loop over matrix elements drawing boxes */
502: color = PETSC_DRAW_BLUE;
503: for (i=0,row=0; i<mbs; i++,row+=bs) {
504: for (j=a->i[i]; j<a->i[i+1]; j++) {
505: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
506: x_l = a->j[j]*bs; x_r = x_l + 1.0;
507: aa = a->a + j*bs2;
508: for (k=0; k<bs; k++) {
509: for (l=0; l<bs; l++) {
510: if (PetscRealPart(*aa++) >= 0.) continue;
511: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
512: }
513: }
514: }
515: }
516: color = PETSC_DRAW_CYAN;
517: for (i=0,row=0; i<mbs; i++,row+=bs) {
518: for (j=a->i[i]; j<a->i[i+1]; j++) {
519: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
520: x_l = a->j[j]*bs; x_r = x_l + 1.0;
521: aa = a->a + j*bs2;
522: for (k=0; k<bs; k++) {
523: for (l=0; l<bs; l++) {
524: if (PetscRealPart(*aa++) != 0.) continue;
525: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
526: }
527: }
528: }
529: }
531: color = PETSC_DRAW_RED;
532: for (i=0,row=0; i<mbs; i++,row+=bs) {
533: for (j=a->i[i]; j<a->i[i+1]; j++) {
534: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
535: x_l = a->j[j]*bs; x_r = x_l + 1.0;
536: aa = a->a + j*bs2;
537: for (k=0; k<bs; k++) {
538: for (l=0; l<bs; l++) {
539: if (PetscRealPart(*aa++) <= 0.) continue;
540: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
541: }
542: }
543: }
544: }
545: return(0);
546: }
548: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
549: {
550: int ierr;
551: PetscReal xl,yl,xr,yr,w,h;
552: PetscDraw draw;
553: PetscTruth isnull;
557: PetscViewerDrawGetDraw(viewer,0,&draw);
558: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
560: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
561: xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
562: xr += w; yr += h; xl = -w; yl = -h;
563: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
564: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
565: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
566: return(0);
567: }
569: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
570: {
571: int ierr;
572: PetscTruth issocket,isascii,isbinary,isdraw;
575: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
576: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
577: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
578: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
579: if (issocket) {
580: SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
581: } else if (isascii){
582: MatView_SeqBAIJ_ASCII(A,viewer);
583: } else if (isbinary) {
584: MatView_SeqBAIJ_Binary(A,viewer);
585: } else if (isdraw) {
586: MatView_SeqBAIJ_Draw(A,viewer);
587: } else {
588: SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
589: }
590: return(0);
591: }
594: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
595: {
596: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
597: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
598: int *ai = a->i,*ailen = a->ilen;
599: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
600: MatScalar *ap,*aa = a->a,zero = 0.0;
603: for (k=0; k<m; k++) { /* loop over rows */
604: row = im[k]; brow = row/bs;
605: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
606: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
607: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
608: nrow = ailen[brow];
609: for (l=0; l<n; l++) { /* loop over columns */
610: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
611: if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
612: col = in[l] ;
613: bcol = col/bs;
614: cidx = col%bs;
615: ridx = row%bs;
616: high = nrow;
617: low = 0; /* assume unsorted */
618: while (high-low > 5) {
619: t = (low+high)/2;
620: if (rp[t] > bcol) high = t;
621: else low = t;
622: }
623: for (i=low; i<high; i++) {
624: if (rp[i] > bcol) break;
625: if (rp[i] == bcol) {
626: *v++ = ap[bs2*i+bs*cidx+ridx];
627: goto finished;
628: }
629: }
630: *v++ = zero;
631: finished:;
632: }
633: }
634: return(0);
635: }
637: #if defined(PETSC_USE_MAT_SINGLE)
638: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
639: {
640: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
641: int ierr,i,N = m*n*b->bs2;
642: MatScalar *vsingle;
645: if (N > b->setvalueslen) {
646: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
647: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
648: b->setvalueslen = N;
649: }
650: vsingle = b->setvaluescopy;
651: for (i=0; i<N; i++) {
652: vsingle[i] = v[i];
653: }
654: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
655: return(0);
656: }
657: #endif
660: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
661: {
662: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
663: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
664: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
665: int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
666: PetscTruth roworiented=a->roworiented;
667: MatScalar *value = v,*ap,*aa = a->a,*bap;
670: if (roworiented) {
671: stepval = (n-1)*bs;
672: } else {
673: stepval = (m-1)*bs;
674: }
675: for (k=0; k<m; k++) { /* loop over added rows */
676: row = im[k];
677: if (row < 0) continue;
678: #if defined(PETSC_USE_BOPT_g)
679: if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
680: #endif
681: rp = aj + ai[row];
682: ap = aa + bs2*ai[row];
683: rmax = imax[row];
684: nrow = ailen[row];
685: low = 0;
686: for (l=0; l<n; l++) { /* loop over added columns */
687: if (in[l] < 0) continue;
688: #if defined(PETSC_USE_BOPT_g)
689: if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
690: #endif
691: col = in[l];
692: if (roworiented) {
693: value = v + k*(stepval+bs)*bs + l*bs;
694: } else {
695: value = v + l*(stepval+bs)*bs + k*bs;
696: }
697: if (!sorted) low = 0; high = nrow;
698: while (high-low > 7) {
699: t = (low+high)/2;
700: if (rp[t] > col) high = t;
701: else low = t;
702: }
703: for (i=low; i<high; i++) {
704: if (rp[i] > col) break;
705: if (rp[i] == col) {
706: bap = ap + bs2*i;
707: if (roworiented) {
708: if (is == ADD_VALUES) {
709: for (ii=0; ii<bs; ii++,value+=stepval) {
710: for (jj=ii; jj<bs2; jj+=bs) {
711: bap[jj] += *value++;
712: }
713: }
714: } else {
715: for (ii=0; ii<bs; ii++,value+=stepval) {
716: for (jj=ii; jj<bs2; jj+=bs) {
717: bap[jj] = *value++;
718: }
719: }
720: }
721: } else {
722: if (is == ADD_VALUES) {
723: for (ii=0; ii<bs; ii++,value+=stepval) {
724: for (jj=0; jj<bs; jj++) {
725: *bap++ += *value++;
726: }
727: }
728: } else {
729: for (ii=0; ii<bs; ii++,value+=stepval) {
730: for (jj=0; jj<bs; jj++) {
731: *bap++ = *value++;
732: }
733: }
734: }
735: }
736: goto noinsert2;
737: }
738: }
739: if (nonew == 1) goto noinsert2;
740: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
741: if (nrow >= rmax) {
742: /* there is no extra room in row, therefore enlarge */
743: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
744: MatScalar *new_a;
746: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
748: /* malloc new storage space */
749: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
750: ierr = PetscMalloc(len,&new_a);
751: new_j = (int*)(new_a + bs2*new_nz);
752: new_i = new_j + new_nz;
754: /* copy over old data into new slots */
755: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
756: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
757: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
758: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
759: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
760: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
761: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
762: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
763: /* free up old matrix storage */
764: PetscFree(a->a);
765: if (!a->singlemalloc) {
766: PetscFree(a->i);
767: PetscFree(a->j);
768: }
769: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
770: a->singlemalloc = PETSC_TRUE;
772: rp = aj + ai[row]; ap = aa + bs2*ai[row];
773: rmax = imax[row] = imax[row] + CHUNKSIZE;
774: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
775: a->maxnz += bs2*CHUNKSIZE;
776: a->reallocs++;
777: a->nz++;
778: }
779: N = nrow++ - 1;
780: /* shift up all the later entries in this row */
781: for (ii=N; ii>=i; ii--) {
782: rp[ii+1] = rp[ii];
783: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
784: }
785: if (N >= i) {
786: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
787: }
788: rp[i] = col;
789: bap = ap + bs2*i;
790: if (roworiented) {
791: for (ii=0; ii<bs; ii++,value+=stepval) {
792: for (jj=ii; jj<bs2; jj+=bs) {
793: bap[jj] = *value++;
794: }
795: }
796: } else {
797: for (ii=0; ii<bs; ii++,value+=stepval) {
798: for (jj=0; jj<bs; jj++) {
799: *bap++ = *value++;
800: }
801: }
802: }
803: noinsert2:;
804: low = i;
805: }
806: ailen[row] = nrow;
807: }
808: return(0);
809: }
811: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
812: {
813: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
814: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
815: int m = A->m,*ip,N,*ailen = a->ilen;
816: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
817: MatScalar *aa = a->a,*ap;
818: #if defined(PETSC_HAVE_DSCPACK)
819: PetscTruth flag;
820: #endif
823: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
825: if (m) rmax = ailen[0];
826: for (i=1; i<mbs; i++) {
827: /* move each row back by the amount of empty slots (fshift) before it*/
828: fshift += imax[i-1] - ailen[i-1];
829: rmax = PetscMax(rmax,ailen[i]);
830: if (fshift) {
831: ip = aj + ai[i]; ap = aa + bs2*ai[i];
832: N = ailen[i];
833: for (j=0; j<N; j++) {
834: ip[j-fshift] = ip[j];
835: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
836: }
837: }
838: ai[i] = ai[i-1] + ailen[i-1];
839: }
840: if (mbs) {
841: fshift += imax[mbs-1] - ailen[mbs-1];
842: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
843: }
844: /* reset ilen and imax for each row */
845: for (i=0; i<mbs; i++) {
846: ailen[i] = imax[i] = ai[i+1] - ai[i];
847: }
848: a->nz = ai[mbs];
850: /* diagonals may have moved, so kill the diagonal pointers */
851: if (fshift && a->diag) {
852: PetscFree(a->diag);
853: PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
854: a->diag = 0;
855: }
856: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
857: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",a->reallocs);
858: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
859: a->reallocs = 0;
860: A->info.nz_unneeded = (PetscReal)fshift*bs2;
862: #if defined(PETSC_HAVE_DSCPACK)
863: PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);
864: if (flag) { MatUseDSCPACK_MPIBAIJ(A); }
865: #endif
867: return(0);
868: }
872: /*
873: This function returns an array of flags which indicate the locations of contiguous
874: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
875: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
876: Assume: sizes should be long enough to hold all the values.
877: */
878: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
879: {
880: int i,j,k,row;
881: PetscTruth flg;
884: for (i=0,j=0; i<n; j++) {
885: row = idx[i];
886: if (row%bs!=0) { /* Not the begining of a block */
887: sizes[j] = 1;
888: i++;
889: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
890: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
891: i++;
892: } else { /* Begining of the block, so check if the complete block exists */
893: flg = PETSC_TRUE;
894: for (k=1; k<bs; k++) {
895: if (row+k != idx[i+k]) { /* break in the block */
896: flg = PETSC_FALSE;
897: break;
898: }
899: }
900: if (flg == PETSC_TRUE) { /* No break in the bs */
901: sizes[j] = bs;
902: i+= bs;
903: } else {
904: sizes[j] = 1;
905: i++;
906: }
907: }
908: }
909: *bs_max = j;
910: return(0);
911: }
912:
913: int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
914: {
915: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
916: int ierr,i,j,k,count,is_n,*is_idx,*rows;
917: int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
918: PetscScalar zero = 0.0;
919: MatScalar *aa;
922: /* Make a copy of the IS and sort it */
923: ISGetLocalSize(is,&is_n);
924: ISGetIndices(is,&is_idx);
926: /* allocate memory for rows,sizes */
927: ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
928: sizes = rows + is_n;
930: /* copy IS values to rows, and sort them */
931: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
932: PetscSortInt(is_n,rows);
933: if (baij->keepzeroedrows) {
934: for (i=0; i<is_n; i++) { sizes[i] = 1; }
935: bs_max = is_n;
936: } else {
937: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
938: }
939: ISRestoreIndices(is,&is_idx);
941: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
942: row = rows[j];
943: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
944: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
945: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
946: if (sizes[i] == bs && !baij->keepzeroedrows) {
947: if (diag) {
948: if (baij->ilen[row/bs] > 0) {
949: baij->ilen[row/bs] = 1;
950: baij->j[baij->i[row/bs]] = row/bs;
951: PetscMemzero(aa,count*bs*sizeof(MatScalar));
952: }
953: /* Now insert all the diagonal values for this bs */
954: for (k=0; k<bs; k++) {
955: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
956: }
957: } else { /* (!diag) */
958: baij->ilen[row/bs] = 0;
959: } /* end (!diag) */
960: } else { /* (sizes[i] != bs) */
961: #if defined (PETSC_USE_DEBUG)
962: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
963: #endif
964: for (k=0; k<count; k++) {
965: aa[0] = zero;
966: aa += bs;
967: }
968: if (diag) {
969: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
970: }
971: }
972: }
974: PetscFree(rows);
975: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
976: return(0);
977: }
979: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
980: {
981: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
983: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
984: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
985: int ridx,cidx,bs2=a->bs2,ierr;
986: PetscTruth roworiented=a->roworiented;
987: MatScalar *ap,value,*aa=a->a,*bap;
990: for (k=0; k<m; k++) { /* loop over added rows */
991: row = im[k]; brow = row/bs;
992: if (row < 0) continue;
993: #if defined(PETSC_USE_BOPT_g)
994: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
995: #endif
996: rp = aj + ai[brow];
997: ap = aa + bs2*ai[brow];
998: rmax = imax[brow];
999: nrow = ailen[brow];
1000: low = 0;
1001: for (l=0; l<n; l++) { /* loop over added columns */
1002: if (in[l] < 0) continue;
1003: #if defined(PETSC_USE_BOPT_g)
1004: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1005: #endif
1006: col = in[l]; bcol = col/bs;
1007: ridx = row % bs; cidx = col % bs;
1008: if (roworiented) {
1009: value = v[l + k*n];
1010: } else {
1011: value = v[k + l*m];
1012: }
1013: if (!sorted) low = 0; high = nrow;
1014: while (high-low > 7) {
1015: t = (low+high)/2;
1016: if (rp[t] > bcol) high = t;
1017: else low = t;
1018: }
1019: for (i=low; i<high; i++) {
1020: if (rp[i] > bcol) break;
1021: if (rp[i] == bcol) {
1022: bap = ap + bs2*i + bs*cidx + ridx;
1023: if (is == ADD_VALUES) *bap += value;
1024: else *bap = value;
1025: goto noinsert1;
1026: }
1027: }
1028: if (nonew == 1) goto noinsert1;
1029: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1030: if (nrow >= rmax) {
1031: /* there is no extra room in row, therefore enlarge */
1032: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1033: MatScalar *new_a;
1035: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1037: /* Malloc new storage space */
1038: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1039: ierr = PetscMalloc(len,&new_a);
1040: new_j = (int*)(new_a + bs2*new_nz);
1041: new_i = new_j + new_nz;
1043: /* copy over old data into new slots */
1044: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1045: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1046: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1047: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1048: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1049: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1050: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1051: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1052: /* free up old matrix storage */
1053: PetscFree(a->a);
1054: if (!a->singlemalloc) {
1055: PetscFree(a->i);
1056: PetscFree(a->j);
1057: }
1058: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1059: a->singlemalloc = PETSC_TRUE;
1061: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1062: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1063: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1064: a->maxnz += bs2*CHUNKSIZE;
1065: a->reallocs++;
1066: a->nz++;
1067: }
1068: N = nrow++ - 1;
1069: /* shift up all the later entries in this row */
1070: for (ii=N; ii>=i; ii--) {
1071: rp[ii+1] = rp[ii];
1072: ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1073: }
1074: if (N>=i) {
1075: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1076: }
1077: rp[i] = bcol;
1078: ap[bs2*i + bs*cidx + ridx] = value;
1079: noinsert1:;
1080: low = i;
1081: }
1082: ailen[brow] = nrow;
1083: }
1084: return(0);
1085: }
1088: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1089: {
1090: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1091: Mat outA;
1092: int ierr;
1093: PetscTruth row_identity,col_identity;
1096: if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1097: ISIdentity(row,&row_identity);
1098: ISIdentity(col,&col_identity);
1099: if (!row_identity || !col_identity) {
1100: SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1101: }
1103: outA = inA;
1104: inA->factor = FACTOR_LU;
1106: if (!a->diag) {
1107: MatMarkDiagonal_SeqBAIJ(inA);
1108: }
1110: a->row = row;
1111: a->col = col;
1112: ierr = PetscObjectReference((PetscObject)row);
1113: ierr = PetscObjectReference((PetscObject)col);
1114:
1115: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1116: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1117: PetscLogObjectParent(inA,a->icol);
1118:
1119: /*
1120: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1121: for ILU(0) factorization with natural ordering
1122: */
1123: if (a->bs < 8) {
1124: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1125: } else {
1126: if (!a->solve_work) {
1127: PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1128: PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1129: }
1130: }
1132: MatLUFactorNumeric(inA,&outA);
1134: return(0);
1135: }
1136: int MatPrintHelp_SeqBAIJ(Mat A)
1137: {
1138: static PetscTruth called = PETSC_FALSE;
1139: MPI_Comm comm = A->comm;
1140: int ierr;
1143: if (called) {return(0);} else called = PETSC_TRUE;
1144: (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1145: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>n");
1146: return(0);
1147: }
1149: EXTERN_C_BEGIN
1150: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1151: {
1152: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1153: int i,nz,nbs;
1156: nz = baij->maxnz/baij->bs2;
1157: nbs = baij->nbs;
1158: for (i=0; i<nz; i++) {
1159: baij->j[i] = indices[i];
1160: }
1161: baij->nz = nz;
1162: for (i=0; i<nbs; i++) {
1163: baij->ilen[i] = baij->imax[i];
1164: }
1166: return(0);
1167: }
1168: EXTERN_C_END
1170: /*@
1171: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1172: in the matrix.
1174: Input Parameters:
1175: + mat - the SeqBAIJ matrix
1176: - indices - the column indices
1178: Level: advanced
1180: Notes:
1181: This can be called if you have precomputed the nonzero structure of the
1182: matrix and want to provide it to the matrix object to improve the performance
1183: of the MatSetValues() operation.
1185: You MUST have set the correct numbers of nonzeros per row in the call to
1186: MatCreateSeqBAIJ().
1188: MUST be called before any calls to MatSetValues();
1190: @*/
1191: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1192: {
1193: int ierr,(*f)(Mat,int *);
1197: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1198: if (f) {
1199: (*f)(mat,indices);
1200: } else {
1201: SETERRQ(1,"Wrong type of matrix to set column indices");
1202: }
1203: return(0);
1204: }
1206: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1207: {
1208: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1209: int ierr,i,j,n,row,bs,*ai,*aj,mbs;
1210: PetscReal atmp;
1211: PetscScalar *x,zero = 0.0;
1212: MatScalar *aa;
1213: int ncols,brow,krow,kcol;
1216: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1217: bs = a->bs;
1218: aa = a->a;
1219: ai = a->i;
1220: aj = a->j;
1221: mbs = a->mbs;
1223: VecSet(&zero,v);
1224: VecGetArray(v,&x);
1225: VecGetLocalSize(v,&n);
1226: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1227: for (i=0; i<mbs; i++) {
1228: ncols = ai[1] - ai[0]; ai++;
1229: brow = bs*i;
1230: for (j=0; j<ncols; j++){
1231: /* bcol = bs*(*aj); */
1232: for (kcol=0; kcol<bs; kcol++){
1233: for (krow=0; krow<bs; krow++){
1234: atmp = PetscAbsScalar(*aa); aa++;
1235: row = brow + krow; /* row index */
1236: /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1237: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1238: }
1239: }
1240: aj++;
1241: }
1242: }
1243: VecRestoreArray(v,&x);
1244: return(0);
1245: }
1247: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1248: {
1249: int ierr;
1252: MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1253: return(0);
1254: }
1256: int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1257: {
1258: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1260: *array = a->a;
1261: return(0);
1262: }
1264: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1265: {
1267: return(0);
1268: }
1270: /* -------------------------------------------------------------------*/
1271: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1272: MatGetRow_SeqBAIJ,
1273: MatRestoreRow_SeqBAIJ,
1274: MatMult_SeqBAIJ_N,
1275: MatMultAdd_SeqBAIJ_N,
1276: MatMultTranspose_SeqBAIJ,
1277: MatMultTransposeAdd_SeqBAIJ,
1278: MatSolve_SeqBAIJ_N,
1279: 0,
1280: 0,
1281: 0,
1282: MatLUFactor_SeqBAIJ,
1283: 0,
1284: 0,
1285: MatTranspose_SeqBAIJ,
1286: MatGetInfo_SeqBAIJ,
1287: MatEqual_SeqBAIJ,
1288: MatGetDiagonal_SeqBAIJ,
1289: MatDiagonalScale_SeqBAIJ,
1290: MatNorm_SeqBAIJ,
1291: 0,
1292: MatAssemblyEnd_SeqBAIJ,
1293: 0,
1294: MatSetOption_SeqBAIJ,
1295: MatZeroEntries_SeqBAIJ,
1296: MatZeroRows_SeqBAIJ,
1297: MatLUFactorSymbolic_SeqBAIJ,
1298: MatLUFactorNumeric_SeqBAIJ_N,
1299: 0,
1300: 0,
1301: MatSetUpPreallocation_SeqBAIJ,
1302: MatILUFactorSymbolic_SeqBAIJ,
1303: 0,
1304: MatGetArray_SeqBAIJ,
1305: MatRestoreArray_SeqBAIJ,
1306: MatDuplicate_SeqBAIJ,
1307: 0,
1308: 0,
1309: MatILUFactor_SeqBAIJ,
1310: 0,
1311: 0,
1312: MatGetSubMatrices_SeqBAIJ,
1313: MatIncreaseOverlap_SeqBAIJ,
1314: MatGetValues_SeqBAIJ,
1315: 0,
1316: MatPrintHelp_SeqBAIJ,
1317: MatScale_SeqBAIJ,
1318: 0,
1319: 0,
1320: 0,
1321: MatGetBlockSize_SeqBAIJ,
1322: MatGetRowIJ_SeqBAIJ,
1323: MatRestoreRowIJ_SeqBAIJ,
1324: 0,
1325: 0,
1326: 0,
1327: 0,
1328: 0,
1329: 0,
1330: MatSetValuesBlocked_SeqBAIJ,
1331: MatGetSubMatrix_SeqBAIJ,
1332: MatDestroy_SeqBAIJ,
1333: MatView_SeqBAIJ,
1334: MatGetPetscMaps_Petsc,
1335: 0,
1336: 0,
1337: 0,
1338: 0,
1339: 0,
1340: 0,
1341: MatGetRowMax_SeqBAIJ,
1342: MatConvert_Basic};
1344: EXTERN_C_BEGIN
1345: int MatStoreValues_SeqBAIJ(Mat mat)
1346: {
1347: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1348: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
1349: int ierr;
1352: if (aij->nonew != 1) {
1353: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1354: }
1356: /* allocate space for values if not already there */
1357: if (!aij->saved_values) {
1358: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1359: }
1361: /* copy values over */
1362: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1363: return(0);
1364: }
1365: EXTERN_C_END
1367: EXTERN_C_BEGIN
1368: int MatRetrieveValues_SeqBAIJ(Mat mat)
1369: {
1370: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1371: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1374: if (aij->nonew != 1) {
1375: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1376: }
1377: if (!aij->saved_values) {
1378: SETERRQ(1,"Must call MatStoreValues(A);first");
1379: }
1381: /* copy values over */
1382: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1383: return(0);
1384: }
1385: EXTERN_C_END
1387: EXTERN_C_BEGIN
1388: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1389: EXTERN_C_END
1391: EXTERN_C_BEGIN
1392: int MatCreate_SeqBAIJ(Mat B)
1393: {
1394: int ierr,size;
1395: Mat_SeqBAIJ *b;
1398: MPI_Comm_size(B->comm,&size);
1399: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1401: B->m = B->M = PetscMax(B->m,B->M);
1402: B->n = B->N = PetscMax(B->n,B->N);
1403: ierr = PetscNew(Mat_SeqBAIJ,&b);
1404: B->data = (void*)b;
1405: ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1406: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1407: B->factor = 0;
1408: B->lupivotthreshold = 1.0;
1409: B->mapping = 0;
1410: b->row = 0;
1411: b->col = 0;
1412: b->icol = 0;
1413: b->reallocs = 0;
1414: b->saved_values = 0;
1415: #if defined(PETSC_USE_MAT_SINGLE)
1416: b->setvalueslen = 0;
1417: b->setvaluescopy = PETSC_NULL;
1418: #endif
1419: b->single_precision_solves = PETSC_FALSE;
1421: PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1422: PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);
1424: b->sorted = PETSC_FALSE;
1425: b->roworiented = PETSC_TRUE;
1426: b->nonew = 0;
1427: b->diag = 0;
1428: b->solve_work = 0;
1429: b->mult_work = 0;
1430: B->spptr = 0;
1431: B->info.nz_unneeded = (PetscReal)b->maxnz;
1432: b->keepzeroedrows = PETSC_FALSE;
1434: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1435: "MatStoreValues_SeqBAIJ",
1436: MatStoreValues_SeqBAIJ);
1437: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1438: "MatRetrieveValues_SeqBAIJ",
1439: MatRetrieveValues_SeqBAIJ);
1440: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1441: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1442: MatSeqBAIJSetColumnIndices_SeqBAIJ);
1443: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1444: "MatConvert_SeqBAIJ_SeqAIJ",
1445: MatConvert_SeqBAIJ_SeqAIJ);
1446: return(0);
1447: }
1448: EXTERN_C_END
1450: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1451: {
1452: Mat C;
1453: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1454: int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1457: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1459: *B = 0;
1460: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1461: MatSetType(C,MATSEQBAIJ);
1462: c = (Mat_SeqBAIJ*)C->data;
1464: c->bs = a->bs;
1465: c->bs2 = a->bs2;
1466: c->mbs = a->mbs;
1467: c->nbs = a->nbs;
1468: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1470: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1471: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1472: for (i=0; i<mbs; i++) {
1473: c->imax[i] = a->imax[i];
1474: c->ilen[i] = a->ilen[i];
1475: }
1477: /* allocate the matrix space */
1478: c->singlemalloc = PETSC_TRUE;
1479: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1480: PetscMalloc(len,&c->a);
1481: c->j = (int*)(c->a + nz*bs2);
1482: c->i = c->j + nz;
1483: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1484: if (mbs > 0) {
1485: PetscMemcpy(c->j,a->j,nz*sizeof(int));
1486: if (cpvalues == MAT_COPY_VALUES) {
1487: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1488: } else {
1489: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1490: }
1491: }
1493: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1494: c->sorted = a->sorted;
1495: c->roworiented = a->roworiented;
1496: c->nonew = a->nonew;
1498: if (a->diag) {
1499: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1500: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1501: for (i=0; i<mbs; i++) {
1502: c->diag[i] = a->diag[i];
1503: }
1504: } else c->diag = 0;
1505: c->nz = a->nz;
1506: c->maxnz = a->maxnz;
1507: c->solve_work = 0;
1508: C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
1509: c->mult_work = 0;
1510: C->preallocated = PETSC_TRUE;
1511: C->assembled = PETSC_TRUE;
1512: *B = C;
1513: PetscFListDuplicate(A->qlist,&C->qlist);
1514: return(0);
1515: }
1517: EXTERN_C_BEGIN
1518: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1519: {
1520: Mat_SeqBAIJ *a;
1521: Mat B;
1522: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1523: int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1524: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
1525: int *masked,nmask,tmp,bs2,ishift;
1526: PetscScalar *aa;
1527: MPI_Comm comm = ((PetscObject)viewer)->comm;
1530: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1531: bs2 = bs*bs;
1533: MPI_Comm_size(comm,&size);
1534: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1535: PetscViewerBinaryGetDescriptor(viewer,&fd);
1536: PetscBinaryRead(fd,header,4,PETSC_INT);
1537: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1538: M = header[1]; N = header[2]; nz = header[3];
1540: if (header[3] < 0) {
1541: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1542: }
1544: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1546: /*
1547: This code adds extra rows to make sure the number of rows is
1548: divisible by the blocksize
1549: */
1550: mbs = M/bs;
1551: extra_rows = bs - M + bs*(mbs);
1552: if (extra_rows == bs) extra_rows = 0;
1553: else mbs++;
1554: if (extra_rows) {
1555: PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1556: }
1558: /* read in row lengths */
1559: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1560: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1561: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1563: /* read in column indices */
1564: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1565: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1566: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1568: /* loop over row lengths determining block row lengths */
1569: ierr = PetscMalloc(mbs*sizeof(int),&browlengths);
1570: ierr = PetscMemzero(browlengths,mbs*sizeof(int));
1571: ierr = PetscMalloc(2*mbs*sizeof(int),&mask);
1572: ierr = PetscMemzero(mask,mbs*sizeof(int));
1573: masked = mask + mbs;
1574: rowcount = 0; nzcount = 0;
1575: for (i=0; i<mbs; i++) {
1576: nmask = 0;
1577: for (j=0; j<bs; j++) {
1578: kmax = rowlengths[rowcount];
1579: for (k=0; k<kmax; k++) {
1580: tmp = jj[nzcount++]/bs;
1581: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1582: }
1583: rowcount++;
1584: }
1585: browlengths[i] += nmask;
1586: /* zero out the mask elements we set */
1587: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1588: }
1590: /* create our matrix */
1591: MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1592: B = *A;
1593: a = (Mat_SeqBAIJ*)B->data;
1595: /* set matrix "i" values */
1596: a->i[0] = 0;
1597: for (i=1; i<= mbs; i++) {
1598: a->i[i] = a->i[i-1] + browlengths[i-1];
1599: a->ilen[i-1] = browlengths[i-1];
1600: }
1601: a->nz = 0;
1602: for (i=0; i<mbs; i++) a->nz += browlengths[i];
1604: /* read in nonzero values */
1605: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1606: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1607: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1609: /* set "a" and "j" values into matrix */
1610: nzcount = 0; jcount = 0;
1611: for (i=0; i<mbs; i++) {
1612: nzcountb = nzcount;
1613: nmask = 0;
1614: for (j=0; j<bs; j++) {
1615: kmax = rowlengths[i*bs+j];
1616: for (k=0; k<kmax; k++) {
1617: tmp = jj[nzcount++]/bs;
1618: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1619: }
1620: }
1621: /* sort the masked values */
1622: PetscSortInt(nmask,masked);
1624: /* set "j" values into matrix */
1625: maskcount = 1;
1626: for (j=0; j<nmask; j++) {
1627: a->j[jcount++] = masked[j];
1628: mask[masked[j]] = maskcount++;
1629: }
1630: /* set "a" values into matrix */
1631: ishift = bs2*a->i[i];
1632: for (j=0; j<bs; j++) {
1633: kmax = rowlengths[i*bs+j];
1634: for (k=0; k<kmax; k++) {
1635: tmp = jj[nzcountb]/bs ;
1636: block = mask[tmp] - 1;
1637: point = jj[nzcountb] - bs*tmp;
1638: idx = ishift + bs2*block + j + bs*point;
1639: a->a[idx] = (MatScalar)aa[nzcountb++];
1640: }
1641: }
1642: /* zero out the mask elements we set */
1643: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1644: }
1645: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1647: PetscFree(rowlengths);
1648: PetscFree(browlengths);
1649: PetscFree(aa);
1650: PetscFree(jj);
1651: PetscFree(mask);
1653: B->assembled = PETSC_TRUE;
1654:
1655: MatView_Private(B);
1656: return(0);
1657: }
1658: EXTERN_C_END
1660: /*@C
1661: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1662: compressed row) format. For good matrix assembly performance the
1663: user should preallocate the matrix storage by setting the parameter nz
1664: (or the array nnz). By setting these parameters accurately, performance
1665: during matrix assembly can be increased by more than a factor of 50.
1667: Collective on MPI_Comm
1669: Input Parameters:
1670: + comm - MPI communicator, set to PETSC_COMM_SELF
1671: . bs - size of block
1672: . m - number of rows
1673: . n - number of columns
1674: . nz - number of nonzero blocks per block row (same for all rows)
1675: - nnz - array containing the number of nonzero blocks in the various block rows
1676: (possibly different for each block row) or PETSC_NULL
1678: Output Parameter:
1679: . A - the matrix
1681: Options Database Keys:
1682: . -mat_no_unroll - uses code that does not unroll the loops in the
1683: block calculations (much slower)
1684: . -mat_block_size - size of the blocks to use
1686: Level: intermediate
1688: Notes:
1689: A nonzero block is any block that as 1 or more nonzeros in it
1691: The block AIJ format is fully compatible with standard Fortran 77
1692: storage. That is, the stored row and column indices can begin at
1693: either one (as in Fortran) or zero. See the users' manual for details.
1695: Specify the preallocated storage with either nz or nnz (not both).
1696: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1697: allocation. For additional details, see the users manual chapter on
1698: matrices.
1700: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1701: @*/
1702: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1703: {
1705:
1707: MatCreate(comm,m,n,m,n,A);
1708: MatSetType(*A,MATSEQBAIJ);
1709: MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1710: return(0);
1711: }
1713: /*@C
1714: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1715: per row in the matrix. For good matrix assembly performance the
1716: user should preallocate the matrix storage by setting the parameter nz
1717: (or the array nnz). By setting these parameters accurately, performance
1718: during matrix assembly can be increased by more than a factor of 50.
1720: Collective on MPI_Comm
1722: Input Parameters:
1723: + A - the matrix
1724: . bs - size of block
1725: . nz - number of block nonzeros per block row (same for all rows)
1726: - nnz - array containing the number of block nonzeros in the various block rows
1727: (possibly different for each block row) or PETSC_NULL
1729: Options Database Keys:
1730: . -mat_no_unroll - uses code that does not unroll the loops in the
1731: block calculations (much slower)
1732: . -mat_block_size - size of the blocks to use
1734: Level: intermediate
1736: Notes:
1737: The block AIJ format is fully compatible with standard Fortran 77
1738: storage. That is, the stored row and column indices can begin at
1739: either one (as in Fortran) or zero. See the users' manual for details.
1741: Specify the preallocated storage with either nz or nnz (not both).
1742: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1743: allocation. For additional details, see the users manual chapter on
1744: matrices.
1746: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1747: @*/
1748: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1749: {
1750: Mat_SeqBAIJ *b;
1751: int i,len,ierr,mbs,nbs,bs2,newbs = bs;
1752: PetscTruth flg;
1755: PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1756: if (!flg) return(0);
1758: B->preallocated = PETSC_TRUE;
1759: PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1760: if (nnz && newbs != bs) {
1761: SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1762: }
1763: bs = newbs;
1765: mbs = B->m/bs;
1766: nbs = B->n/bs;
1767: bs2 = bs*bs;
1769: if (mbs*bs!=B->m || nbs*bs!=B->n) {
1770: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1771: }
1773: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1774: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1775: if (nnz) {
1776: for (i=0; i<mbs; i++) {
1777: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1778: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1779: }
1780: }
1782: b = (Mat_SeqBAIJ*)B->data;
1783: ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1784: B->ops->solve = MatSolve_SeqBAIJ_Update;
1785: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
1786: if (!flg) {
1787: switch (bs) {
1788: case 1:
1789: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1790: B->ops->mult = MatMult_SeqBAIJ_1;
1791: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
1792: break;
1793: case 2:
1794: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1795: B->ops->mult = MatMult_SeqBAIJ_2;
1796: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
1797: break;
1798: case 3:
1799: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1800: B->ops->mult = MatMult_SeqBAIJ_3;
1801: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
1802: break;
1803: case 4:
1804: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1805: B->ops->mult = MatMult_SeqBAIJ_4;
1806: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
1807: break;
1808: case 5:
1809: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1810: B->ops->mult = MatMult_SeqBAIJ_5;
1811: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
1812: break;
1813: case 6:
1814: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1815: B->ops->mult = MatMult_SeqBAIJ_6;
1816: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
1817: break;
1818: case 7:
1819: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1820: B->ops->mult = MatMult_SeqBAIJ_7;
1821: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
1822: break;
1823: default:
1824: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1825: B->ops->mult = MatMult_SeqBAIJ_N;
1826: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1827: break;
1828: }
1829: }
1830: b->bs = bs;
1831: b->mbs = mbs;
1832: b->nbs = nbs;
1833: PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1834: if (!nnz) {
1835: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1836: else if (nz <= 0) nz = 1;
1837: for (i=0; i<mbs; i++) b->imax[i] = nz;
1838: nz = nz*mbs;
1839: } else {
1840: nz = 0;
1841: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1842: }
1844: /* allocate the matrix space */
1845: len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1846: ierr = PetscMalloc(len,&b->a);
1847: ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1848: b->j = (int*)(b->a + nz*bs2);
1849: PetscMemzero(b->j,nz*sizeof(int));
1850: b->i = b->j + nz;
1851: b->singlemalloc = PETSC_TRUE;
1853: b->i[0] = 0;
1854: for (i=1; i<mbs+1; i++) {
1855: b->i[i] = b->i[i-1] + b->imax[i-1];
1856: }
1858: /* b->ilen will count nonzeros in each block row so far. */
1859: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1860: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1861: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1863: b->bs = bs;
1864: b->bs2 = bs2;
1865: b->mbs = mbs;
1866: b->nz = 0;
1867: b->maxnz = nz*bs2;
1868: B->info.nz_unneeded = (PetscReal)b->maxnz;
1869: return(0);
1870: }