Actual source code: baij.c
1: /*$Id: baij.c,v 1.225 2001/04/10 19:35:34 bsmith Exp $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include "petscsys.h"
8: #include "src/mat/impls/baij/seq/baij.h"
9: #include "src/vec/vecimpl.h"
10: #include "src/inline/spops.h"
12: /* UGLY, ugly, ugly
13: When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14: not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
15: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16: converts the entries into single precision and then calls ..._MatScalar() to put them
17: into the single precision data structures.
18: */
19: #if defined(PETSC_USE_MAT_SINGLE)
20: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21: #else
22: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23: #endif
25: #define CHUNKSIZE 10
27: /*
28: Checks for missing diagonals
29: */
30: int MatMissingDiagonal_SeqBAIJ(Mat A)
31: {
32: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
33: int *diag,*jj = a->j,i,ierr;
36: MatMarkDiagonal_SeqBAIJ(A);
37: diag = a->diag;
38: for (i=0; i<a->mbs; i++) {
39: if (jj[diag[i]] != i) {
40: SETERRQ1(1,"Matrix is missing diagonal number %d",i);
41: }
42: }
43: return(0);
44: }
46: int MatMarkDiagonal_SeqBAIJ(Mat A)
47: {
48: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
49: int i,j,*diag,m = a->mbs,ierr;
52: if (a->diag) return(0);
54: PetscMalloc((m+1)*sizeof(int),&diag);
55: PetscLogObjectMemory(A,(m+1)*sizeof(int));
56: for (i=0; i<m; i++) {
57: diag[i] = a->i[i+1];
58: for (j=a->i[i]; j<a->i[i+1]; j++) {
59: if (a->j[j] == i) {
60: diag[i] = j;
61: break;
62: }
63: }
64: }
65: a->diag = diag;
66: return(0);
67: }
70: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
72: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
73: {
74: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
75: int ierr,n = a->mbs,i;
78: *nn = n;
79: if (!ia) return(0);
80: if (symmetric) {
81: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
82: } else if (oshift == 1) {
83: /* temporarily add 1 to i and j indices */
84: int nz = a->i[n];
85: for (i=0; i<nz; i++) a->j[i]++;
86: for (i=0; i<n+1; i++) a->i[i]++;
87: *ia = a->i; *ja = a->j;
88: } else {
89: *ia = a->i; *ja = a->j;
90: }
92: return(0);
93: }
95: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
96: {
97: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
98: int i,n = a->mbs,ierr;
101: if (!ia) return(0);
102: if (symmetric) {
103: PetscFree(*ia);
104: PetscFree(*ja);
105: } else if (oshift == 1) {
106: int nz = a->i[n]-1;
107: for (i=0; i<nz; i++) a->j[i]--;
108: for (i=0; i<n+1; i++) a->i[i]--;
109: }
110: return(0);
111: }
113: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
114: {
115: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
118: *bs = baij->bs;
119: return(0);
120: }
123: int MatDestroy_SeqBAIJ(Mat A)
124: {
125: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
126: int ierr;
129: #if defined(PETSC_USE_LOG)
130: PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
131: #endif
132: PetscFree(a->a);
133: if (!a->singlemalloc) {
134: PetscFree(a->i);
135: PetscFree(a->j);
136: }
137: if (a->row) {
138: ISDestroy(a->row);
139: }
140: if (a->col) {
141: ISDestroy(a->col);
142: }
143: if (a->diag) {PetscFree(a->diag);}
144: if (a->ilen) {PetscFree(a->ilen);}
145: if (a->imax) {PetscFree(a->imax);}
146: if (a->solve_work) {PetscFree(a->solve_work);}
147: if (a->mult_work) {PetscFree(a->mult_work);}
148: if (a->icol) {ISDestroy(a->icol);}
149: if (a->saved_values) {PetscFree(a->saved_values);}
150: #if defined(PETSC_USE_MAT_SINGLE)
151: if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
152: #endif
153: PetscFree(a);
154: return(0);
155: }
157: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
158: {
159: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
162: if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE;
163: else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE;
164: else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE;
165: else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE;
166: else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE;
167: else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1;
168: else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1;
169: else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2;
170: else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0;
171: else if (op == MAT_ROWS_SORTED ||
172: op == MAT_ROWS_UNSORTED ||
173: op == MAT_YES_NEW_DIAGONALS ||
174: op == MAT_IGNORE_OFF_PROC_ENTRIES ||
175: op == MAT_USE_HASH_TABLE) {
176: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
177: } else if (op == MAT_NO_NEW_DIAGONALS) {
178: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
179: } else {
180: SETERRQ(PETSC_ERR_SUP,"unknown option");
181: }
182: return(0);
183: }
185: int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
186: {
188: if (m) *m = 0;
189: if (n) *n = A->m;
190: return(0);
191: }
193: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
194: {
195: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
196: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
197: MatScalar *aa,*aa_i;
198: Scalar *v_i;
201: bs = a->bs;
202: ai = a->i;
203: aj = a->j;
204: aa = a->a;
205: bs2 = a->bs2;
206:
207: if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
208:
209: bn = row/bs; /* Block number */
210: bp = row % bs; /* Block Position */
211: M = ai[bn+1] - ai[bn];
212: *nz = bs*M;
213:
214: if (v) {
215: *v = 0;
216: if (*nz) {
217: PetscMalloc((*nz)*sizeof(Scalar),v);
218: for (i=0; i<M; i++) { /* for each block in the block row */
219: v_i = *v + i*bs;
220: aa_i = aa + bs2*(ai[bn] + i);
221: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
222: }
223: }
224: }
226: if (idx) {
227: *idx = 0;
228: if (*nz) {
229: PetscMalloc((*nz)*sizeof(int),idx);
230: for (i=0; i<M; i++) { /* for each block in the block row */
231: idx_i = *idx + i*bs;
232: itmp = bs*aj[ai[bn] + i];
233: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
234: }
235: }
236: }
237: return(0);
238: }
240: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
241: {
245: if (idx) {if (*idx) {PetscFree(*idx);}}
246: if (v) {if (*v) {PetscFree(*v);}}
247: return(0);
248: }
250: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
251: {
252: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
253: Mat C;
254: int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
255: int *rows,*cols,bs2=a->bs2;
256: Scalar *array;
259: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
260: PetscMalloc((1+nbs)*sizeof(int),&col);
261: PetscMemzero(col,(1+nbs)*sizeof(int));
263: #if defined(PETSC_USE_MAT_SINGLE)
264: PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);
265: for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i];
266: #else
267: array = a->a;
268: #endif
270: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
271: MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
272: PetscFree(col);
273: PetscMalloc(2*bs*sizeof(int),&rows);
274: cols = rows + bs;
275: for (i=0; i<mbs; i++) {
276: cols[0] = i*bs;
277: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
278: len = ai[i+1] - ai[i];
279: for (j=0; j<len; j++) {
280: rows[0] = (*aj++)*bs;
281: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
282: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
283: array += bs2;
284: }
285: }
286: PetscFree(rows);
287: #if defined(PETSC_USE_MAT_SINGLE)
288: PetscFree(array);
289: #endif
290:
291: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
292: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
293:
294: if (B) {
295: *B = C;
296: } else {
297: MatHeaderCopy(A,C);
298: }
299: return(0);
300: }
302: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
303: {
304: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
305: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
306: Scalar *aa;
307: FILE *file;
310: ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);
311: ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
312: col_lens[0] = MAT_COOKIE;
314: col_lens[1] = A->m;
315: col_lens[2] = A->n;
316: col_lens[3] = a->nz*bs2;
318: /* store lengths of each row and write (including header) to file */
319: count = 0;
320: for (i=0; i<a->mbs; i++) {
321: for (j=0; j<bs; j++) {
322: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
323: }
324: }
325: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
326: PetscFree(col_lens);
328: /* store column indices (zero start index) */
329: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
330: count = 0;
331: for (i=0; i<a->mbs; i++) {
332: for (j=0; j<bs; j++) {
333: for (k=a->i[i]; k<a->i[i+1]; k++) {
334: for (l=0; l<bs; l++) {
335: jj[count++] = bs*a->j[k] + l;
336: }
337: }
338: }
339: }
340: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
341: PetscFree(jj);
343: /* store nonzero values */
344: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);
345: count = 0;
346: for (i=0; i<a->mbs; i++) {
347: for (j=0; j<bs; j++) {
348: for (k=a->i[i]; k<a->i[i+1]; k++) {
349: for (l=0; l<bs; l++) {
350: aa[count++] = a->a[bs2*k + l*bs + j];
351: }
352: }
353: }
354: }
355: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
356: PetscFree(aa);
358: PetscViewerBinaryGetInfoPointer(viewer,&file);
359: if (file) {
360: fprintf(file,"-matload_block_size %dn",a->bs);
361: }
362: return(0);
363: }
365: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
366: {
367: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
368: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
369: PetscViewerFormat format;
372: PetscViewerGetFormat(viewer,&format);
373: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
374: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
375: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
376: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
377: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
378: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
379: for (i=0; i<a->mbs; i++) {
380: for (j=0; j<bs; j++) {
381: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
382: for (k=a->i[i]; k<a->i[i+1]; k++) {
383: for (l=0; l<bs; l++) {
384: #if defined(PETSC_USE_COMPLEX)
385: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
386: PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
387: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
388: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
389: PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
390: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
391: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
392: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
393: }
394: #else
395: if (a->a[bs2*k + l*bs + j] != 0.0) {
396: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
397: }
398: #endif
399: }
400: }
401: PetscViewerASCIIPrintf(viewer,"n");
402: }
403: }
404: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
405: } else {
406: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
407: for (i=0; i<a->mbs; i++) {
408: for (j=0; j<bs; j++) {
409: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
410: for (k=a->i[i]; k<a->i[i+1]; k++) {
411: for (l=0; l<bs; l++) {
412: #if defined(PETSC_USE_COMPLEX)
413: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
414: PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
415: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
416: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
417: PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
418: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
419: } else {
420: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
421: }
422: #else
423: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
424: #endif
425: }
426: }
427: PetscViewerASCIIPrintf(viewer,"n");
428: }
429: }
430: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
431: }
432: PetscViewerFlush(viewer);
433: return(0);
434: }
436: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
437: {
438: Mat A = (Mat) Aa;
439: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
440: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
441: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
442: MatScalar *aa;
443: PetscViewer viewer;
447: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
448: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
450: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
452: /* loop over matrix elements drawing boxes */
453: color = PETSC_DRAW_BLUE;
454: for (i=0,row=0; i<mbs; i++,row+=bs) {
455: for (j=a->i[i]; j<a->i[i+1]; j++) {
456: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
457: x_l = a->j[j]*bs; x_r = x_l + 1.0;
458: aa = a->a + j*bs2;
459: for (k=0; k<bs; k++) {
460: for (l=0; l<bs; l++) {
461: if (PetscRealPart(*aa++) >= 0.) continue;
462: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
463: }
464: }
465: }
466: }
467: color = PETSC_DRAW_CYAN;
468: for (i=0,row=0; i<mbs; i++,row+=bs) {
469: for (j=a->i[i]; j<a->i[i+1]; j++) {
470: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
471: x_l = a->j[j]*bs; x_r = x_l + 1.0;
472: aa = a->a + j*bs2;
473: for (k=0; k<bs; k++) {
474: for (l=0; l<bs; l++) {
475: if (PetscRealPart(*aa++) != 0.) continue;
476: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
477: }
478: }
479: }
480: }
482: color = PETSC_DRAW_RED;
483: for (i=0,row=0; i<mbs; i++,row+=bs) {
484: for (j=a->i[i]; j<a->i[i+1]; j++) {
485: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
486: x_l = a->j[j]*bs; x_r = x_l + 1.0;
487: aa = a->a + j*bs2;
488: for (k=0; k<bs; k++) {
489: for (l=0; l<bs; l++) {
490: if (PetscRealPart(*aa++) <= 0.) continue;
491: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
492: }
493: }
494: }
495: }
496: return(0);
497: }
499: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
500: {
501: int ierr;
502: PetscReal xl,yl,xr,yr,w,h;
503: PetscDraw draw;
504: PetscTruth isnull;
508: PetscViewerDrawGetDraw(viewer,0,&draw);
509: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
511: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
512: xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
513: xr += w; yr += h; xl = -w; yl = -h;
514: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
515: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
516: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
517: return(0);
518: }
520: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
521: {
522: int ierr;
523: PetscTruth issocket,isascii,isbinary,isdraw;
526: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
527: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
528: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
529: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
530: if (issocket) {
531: SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
532: } else if (isascii){
533: MatView_SeqBAIJ_ASCII(A,viewer);
534: } else if (isbinary) {
535: MatView_SeqBAIJ_Binary(A,viewer);
536: } else if (isdraw) {
537: MatView_SeqBAIJ_Draw(A,viewer);
538: } else {
539: SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
540: }
541: return(0);
542: }
545: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
546: {
547: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
548: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
549: int *ai = a->i,*ailen = a->ilen;
550: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
551: MatScalar *ap,*aa = a->a,zero = 0.0;
554: for (k=0; k<m; k++) { /* loop over rows */
555: row = im[k]; brow = row/bs;
556: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
557: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
558: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
559: nrow = ailen[brow];
560: for (l=0; l<n; l++) { /* loop over columns */
561: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
562: if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
563: col = in[l] ;
564: bcol = col/bs;
565: cidx = col%bs;
566: ridx = row%bs;
567: high = nrow;
568: low = 0; /* assume unsorted */
569: while (high-low > 5) {
570: t = (low+high)/2;
571: if (rp[t] > bcol) high = t;
572: else low = t;
573: }
574: for (i=low; i<high; i++) {
575: if (rp[i] > bcol) break;
576: if (rp[i] == bcol) {
577: *v++ = ap[bs2*i+bs*cidx+ridx];
578: goto finished;
579: }
580: }
581: *v++ = zero;
582: finished:;
583: }
584: }
585: return(0);
586: }
588: #if defined(PETSC_USE_MAT_SINGLE)
589: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
590: {
591: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
592: int ierr,i,N = m*n*b->bs2;
593: MatScalar *vsingle;
596: if (N > b->setvalueslen) {
597: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
598: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
599: b->setvalueslen = N;
600: }
601: vsingle = b->setvaluescopy;
602: for (i=0; i<N; i++) {
603: vsingle[i] = v[i];
604: }
605: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
606: return(0);
607: }
608: #endif
611: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
612: {
613: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
614: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
615: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
616: int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
617: PetscTruth roworiented=a->roworiented;
618: MatScalar *value = v,*ap,*aa = a->a,*bap;
621: if (roworiented) {
622: stepval = (n-1)*bs;
623: } else {
624: stepval = (m-1)*bs;
625: }
626: for (k=0; k<m; k++) { /* loop over added rows */
627: row = im[k];
628: if (row < 0) continue;
629: #if defined(PETSC_USE_BOPT_g)
630: if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
631: #endif
632: rp = aj + ai[row];
633: ap = aa + bs2*ai[row];
634: rmax = imax[row];
635: nrow = ailen[row];
636: low = 0;
637: for (l=0; l<n; l++) { /* loop over added columns */
638: if (in[l] < 0) continue;
639: #if defined(PETSC_USE_BOPT_g)
640: if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
641: #endif
642: col = in[l];
643: if (roworiented) {
644: value = v + k*(stepval+bs)*bs + l*bs;
645: } else {
646: value = v + l*(stepval+bs)*bs + k*bs;
647: }
648: if (!sorted) low = 0; high = nrow;
649: while (high-low > 7) {
650: t = (low+high)/2;
651: if (rp[t] > col) high = t;
652: else low = t;
653: }
654: for (i=low; i<high; i++) {
655: if (rp[i] > col) break;
656: if (rp[i] == col) {
657: bap = ap + bs2*i;
658: if (roworiented) {
659: if (is == ADD_VALUES) {
660: for (ii=0; ii<bs; ii++,value+=stepval) {
661: for (jj=ii; jj<bs2; jj+=bs) {
662: bap[jj] += *value++;
663: }
664: }
665: } else {
666: for (ii=0; ii<bs; ii++,value+=stepval) {
667: for (jj=ii; jj<bs2; jj+=bs) {
668: bap[jj] = *value++;
669: }
670: }
671: }
672: } else {
673: if (is == ADD_VALUES) {
674: for (ii=0; ii<bs; ii++,value+=stepval) {
675: for (jj=0; jj<bs; jj++) {
676: *bap++ += *value++;
677: }
678: }
679: } else {
680: for (ii=0; ii<bs; ii++,value+=stepval) {
681: for (jj=0; jj<bs; jj++) {
682: *bap++ = *value++;
683: }
684: }
685: }
686: }
687: goto noinsert2;
688: }
689: }
690: if (nonew == 1) goto noinsert2;
691: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
692: if (nrow >= rmax) {
693: /* there is no extra room in row, therefore enlarge */
694: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
695: MatScalar *new_a;
697: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
699: /* malloc new storage space */
700: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
701: ierr = PetscMalloc(len,&new_a);
702: new_j = (int*)(new_a + bs2*new_nz);
703: new_i = new_j + new_nz;
705: /* copy over old data into new slots */
706: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
707: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
708: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
709: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
710: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
711: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
712: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
713: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
714: /* free up old matrix storage */
715: PetscFree(a->a);
716: if (!a->singlemalloc) {
717: PetscFree(a->i);
718: PetscFree(a->j);
719: }
720: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
721: a->singlemalloc = PETSC_TRUE;
723: rp = aj + ai[row]; ap = aa + bs2*ai[row];
724: rmax = imax[row] = imax[row] + CHUNKSIZE;
725: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
726: a->maxnz += bs2*CHUNKSIZE;
727: a->reallocs++;
728: a->nz++;
729: }
730: N = nrow++ - 1;
731: /* shift up all the later entries in this row */
732: for (ii=N; ii>=i; ii--) {
733: rp[ii+1] = rp[ii];
734: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
735: }
736: if (N >= i) {
737: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
738: }
739: rp[i] = col;
740: bap = ap + bs2*i;
741: if (roworiented) {
742: for (ii=0; ii<bs; ii++,value+=stepval) {
743: for (jj=ii; jj<bs2; jj+=bs) {
744: bap[jj] = *value++;
745: }
746: }
747: } else {
748: for (ii=0; ii<bs; ii++,value+=stepval) {
749: for (jj=0; jj<bs; jj++) {
750: *bap++ = *value++;
751: }
752: }
753: }
754: noinsert2:;
755: low = i;
756: }
757: ailen[row] = nrow;
758: }
759: return(0);
760: }
763: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
764: {
765: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
766: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
767: int m = A->m,*ip,N,*ailen = a->ilen;
768: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
769: MatScalar *aa = a->a,*ap;
772: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
774: if (m) rmax = ailen[0];
775: for (i=1; i<mbs; i++) {
776: /* move each row back by the amount of empty slots (fshift) before it*/
777: fshift += imax[i-1] - ailen[i-1];
778: rmax = PetscMax(rmax,ailen[i]);
779: if (fshift) {
780: ip = aj + ai[i]; ap = aa + bs2*ai[i];
781: N = ailen[i];
782: for (j=0; j<N; j++) {
783: ip[j-fshift] = ip[j];
784: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
785: }
786: }
787: ai[i] = ai[i-1] + ailen[i-1];
788: }
789: if (mbs) {
790: fshift += imax[mbs-1] - ailen[mbs-1];
791: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
792: }
793: /* reset ilen and imax for each row */
794: for (i=0; i<mbs; i++) {
795: ailen[i] = imax[i] = ai[i+1] - ai[i];
796: }
797: a->nz = ai[mbs];
799: /* diagonals may have moved, so kill the diagonal pointers */
800: if (fshift && a->diag) {
801: PetscFree(a->diag);
802: PetscLogObjectMemory(A,-(m+1)*sizeof(int));
803: a->diag = 0;
804: }
805: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",
806: m,A->n,a->bs,fshift*bs2,a->nz*bs2);
807: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",
808: a->reallocs);
809: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
810: a->reallocs = 0;
811: A->info.nz_unneeded = (PetscReal)fshift*bs2;
813: return(0);
814: }
818: /*
819: This function returns an array of flags which indicate the locations of contiguous
820: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
821: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
822: Assume: sizes should be long enough to hold all the values.
823: */
824: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
825: {
826: int i,j,k,row;
827: PetscTruth flg;
830: for (i=0,j=0; i<n; j++) {
831: row = idx[i];
832: if (row%bs!=0) { /* Not the begining of a block */
833: sizes[j] = 1;
834: i++;
835: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
836: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
837: i++;
838: } else { /* Begining of the block, so check if the complete block exists */
839: flg = PETSC_TRUE;
840: for (k=1; k<bs; k++) {
841: if (row+k != idx[i+k]) { /* break in the block */
842: flg = PETSC_FALSE;
843: break;
844: }
845: }
846: if (flg == PETSC_TRUE) { /* No break in the bs */
847: sizes[j] = bs;
848: i+= bs;
849: } else {
850: sizes[j] = 1;
851: i++;
852: }
853: }
854: }
855: *bs_max = j;
856: return(0);
857: }
858:
859: int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag)
860: {
861: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
862: int ierr,i,j,k,count,is_n,*is_idx,*rows;
863: int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
864: Scalar zero = 0.0;
865: MatScalar *aa;
868: /* Make a copy of the IS and sort it */
869: ISGetLocalSize(is,&is_n);
870: ISGetIndices(is,&is_idx);
872: /* allocate memory for rows,sizes */
873: ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
874: sizes = rows + is_n;
876: /* copy IS values to rows, and sort them */
877: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
878: PetscSortInt(is_n,rows);
879: if (baij->keepzeroedrows) {
880: for (i=0; i<is_n; i++) { sizes[i] = 1; }
881: bs_max = is_n;
882: } else {
883: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
884: }
885: ISRestoreIndices(is,&is_idx);
887: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
888: row = rows[j];
889: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
890: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
891: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
892: if (sizes[i] == bs && !baij->keepzeroedrows) {
893: if (diag) {
894: if (baij->ilen[row/bs] > 0) {
895: baij->ilen[row/bs] = 1;
896: baij->j[baij->i[row/bs]] = row/bs;
897: PetscMemzero(aa,count*bs*sizeof(MatScalar));
898: }
899: /* Now insert all the diagonal values for this bs */
900: for (k=0; k<bs; k++) {
901: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
902: }
903: } else { /* (!diag) */
904: baij->ilen[row/bs] = 0;
905: } /* end (!diag) */
906: } else { /* (sizes[i] != bs) */
907: #if defined (PETSC_USE_DEBUG)
908: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
909: #endif
910: for (k=0; k<count; k++) {
911: aa[0] = zero;
912: aa += bs;
913: }
914: if (diag) {
915: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
916: }
917: }
918: }
920: PetscFree(rows);
921: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
922: return(0);
923: }
925: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
926: {
927: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
928: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
929: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
930: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
931: int ridx,cidx,bs2=a->bs2,ierr;
932: PetscTruth roworiented=a->roworiented;
933: MatScalar *ap,value,*aa=a->a,*bap;
936: for (k=0; k<m; k++) { /* loop over added rows */
937: row = im[k]; brow = row/bs;
938: if (row < 0) continue;
939: #if defined(PETSC_USE_BOPT_g)
940: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
941: #endif
942: rp = aj + ai[brow];
943: ap = aa + bs2*ai[brow];
944: rmax = imax[brow];
945: nrow = ailen[brow];
946: low = 0;
947: for (l=0; l<n; l++) { /* loop over added columns */
948: if (in[l] < 0) continue;
949: #if defined(PETSC_USE_BOPT_g)
950: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
951: #endif
952: col = in[l]; bcol = col/bs;
953: ridx = row % bs; cidx = col % bs;
954: if (roworiented) {
955: value = v[l + k*n];
956: } else {
957: value = v[k + l*m];
958: }
959: if (!sorted) low = 0; high = nrow;
960: while (high-low > 7) {
961: t = (low+high)/2;
962: if (rp[t] > bcol) high = t;
963: else low = t;
964: }
965: for (i=low; i<high; i++) {
966: if (rp[i] > bcol) break;
967: if (rp[i] == bcol) {
968: bap = ap + bs2*i + bs*cidx + ridx;
969: if (is == ADD_VALUES) *bap += value;
970: else *bap = value;
971: goto noinsert1;
972: }
973: }
974: if (nonew == 1) goto noinsert1;
975: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
976: if (nrow >= rmax) {
977: /* there is no extra room in row, therefore enlarge */
978: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
979: MatScalar *new_a;
981: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
983: /* Malloc new storage space */
984: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
985: ierr = PetscMalloc(len,&new_a);
986: new_j = (int*)(new_a + bs2*new_nz);
987: new_i = new_j + new_nz;
989: /* copy over old data into new slots */
990: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
991: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
992: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
993: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
994: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
995: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
996: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
997: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
998: /* free up old matrix storage */
999: PetscFree(a->a);
1000: if (!a->singlemalloc) {
1001: PetscFree(a->i);
1002: PetscFree(a->j);
1003: }
1004: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1005: a->singlemalloc = PETSC_TRUE;
1007: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1008: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1009: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1010: a->maxnz += bs2*CHUNKSIZE;
1011: a->reallocs++;
1012: a->nz++;
1013: }
1014: N = nrow++ - 1;
1015: /* shift up all the later entries in this row */
1016: for (ii=N; ii>=i; ii--) {
1017: rp[ii+1] = rp[ii];
1018: ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1019: }
1020: if (N>=i) {
1021: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1022: }
1023: rp[i] = bcol;
1024: ap[bs2*i + bs*cidx + ridx] = value;
1025: noinsert1:;
1026: low = i;
1027: }
1028: ailen[brow] = nrow;
1029: }
1030: return(0);
1031: }
1033: EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*);
1034: EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*);
1035: EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1036: EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1037: EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1038: EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
1039: EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1040: EXTERN int MatScale_SeqBAIJ(Scalar*,Mat);
1041: EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
1042: EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
1043: EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1044: EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1045: EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1046: EXTERN int MatZeroEntries_SeqBAIJ(Mat);
1048: EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1049: EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1050: EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1051: EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1052: EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1053: EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1054: EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1055: EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1056: EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
1057: EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
1058: EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
1059: EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
1060: EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
1061: EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
1062: EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
1064: EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1065: EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1066: EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1067: EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1068: EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1069: EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1070: EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1072: EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1073: EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1074: EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1075: EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1076: EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1077: EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1078: EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1079: EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1081: EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1082: EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1083: EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1084: EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1085: EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1086: EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1087: EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1088: EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1090: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1091: {
1092: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1093: Mat outA;
1094: int ierr;
1095: PetscTruth row_identity,col_identity;
1098: if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1099: ISIdentity(row,&row_identity);
1100: ISIdentity(col,&col_identity);
1101: if (!row_identity || !col_identity) {
1102: SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1103: }
1105: outA = inA;
1106: inA->factor = FACTOR_LU;
1108: if (!a->diag) {
1109: MatMarkDiagonal_SeqBAIJ(inA);
1110: }
1111: /*
1112: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1113: for ILU(0) factorization with natural ordering
1114: */
1115: switch (a->bs) {
1116: case 1:
1117: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1118: inA->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering;
1119: inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
1120: PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1n");
1121: case 2:
1122: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1123: inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering;
1124: inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
1125: PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2n");
1126: break;
1127: case 3:
1128: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1129: inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering;
1130: inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering;
1131: PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3n");
1132: break;
1133: case 4:
1134: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1135: inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering;
1136: inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering;
1137: PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4n");
1138: break;
1139: case 5:
1140: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1141: inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering;
1142: inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering;
1143: PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5n");
1144: break;
1145: case 6:
1146: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1147: inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering;
1148: inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering;
1149: PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6n");
1150: break;
1151: case 7:
1152: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1153: inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering;
1154: inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering;
1155: PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7n");
1156: break;
1157: default:
1158: a->row = row;
1159: a->col = col;
1160: ierr = PetscObjectReference((PetscObject)row);
1161: ierr = PetscObjectReference((PetscObject)col);
1163: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1164: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1165: PetscLogObjectParent(inA,a->icol);
1167: if (!a->solve_work) {
1168: PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);
1169: PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar));
1170: }
1171: }
1173: MatLUFactorNumeric(inA,&outA);
1175: return(0);
1176: }
1177: int MatPrintHelp_SeqBAIJ(Mat A)
1178: {
1179: static PetscTruth called = PETSC_FALSE;
1180: MPI_Comm comm = A->comm;
1181: int ierr;
1184: if (called) {return(0);} else called = PETSC_TRUE;
1185: (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1186: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>n");
1187: return(0);
1188: }
1190: EXTERN_C_BEGIN
1191: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1192: {
1193: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1194: int i,nz,n;
1197: nz = baij->maxnz;
1198: n = mat->n;
1199: for (i=0; i<nz; i++) {
1200: baij->j[i] = indices[i];
1201: }
1202: baij->nz = nz;
1203: for (i=0; i<n; i++) {
1204: baij->ilen[i] = baij->imax[i];
1205: }
1207: return(0);
1208: }
1209: EXTERN_C_END
1211: /*@
1212: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1213: in the matrix.
1215: Input Parameters:
1216: + mat - the SeqBAIJ matrix
1217: - indices - the column indices
1219: Level: advanced
1221: Notes:
1222: This can be called if you have precomputed the nonzero structure of the
1223: matrix and want to provide it to the matrix object to improve the performance
1224: of the MatSetValues() operation.
1226: You MUST have set the correct numbers of nonzeros per row in the call to
1227: MatCreateSeqBAIJ().
1229: MUST be called before any calls to MatSetValues();
1231: @*/
1232: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1233: {
1234: int ierr,(*f)(Mat,int *);
1238: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);
1239: if (f) {
1240: (*f)(mat,indices);
1241: } else {
1242: SETERRQ(1,"Wrong type of matrix to set column indices");
1243: }
1244: return(0);
1245: }
1247: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1248: {
1249: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1250: int ierr,i,j,n,row,bs,*ai,*aj,mbs;
1251: PetscReal atmp;
1252: Scalar *x,zero = 0.0;
1253: MatScalar *aa;
1254: int ncols,brow,krow,kcol;
1257: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1258: bs = a->bs;
1259: aa = a->a;
1260: ai = a->i;
1261: aj = a->j;
1262: mbs = a->mbs;
1264: VecSet(&zero,v);
1265: VecGetArray(v,&x);
1266: VecGetLocalSize(v,&n);
1267: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1268: for (i=0; i<mbs; i++) {
1269: ncols = ai[1] - ai[0]; ai++;
1270: brow = bs*i;
1271: for (j=0; j<ncols; j++){
1272: /* bcol = bs*(*aj); */
1273: for (kcol=0; kcol<bs; kcol++){
1274: for (krow=0; krow<bs; krow++){
1275: atmp = PetscAbsScalar(*aa); aa++;
1276: row = brow + krow; /* row index */
1277: /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1278: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1279: }
1280: }
1281: aj++;
1282: }
1283: }
1284: VecRestoreArray(v,&x);
1285: return(0);
1286: }
1288: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1289: {
1290: int ierr;
1293: MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1294: return(0);
1295: }
1297: int MatGetArray_SeqBAIJ(Mat A,Scalar **array)
1298: {
1299: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1301: *array = a->a;
1302: return(0);
1303: }
1305: int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array)
1306: {
1308: return(0);
1309: }
1311: /* -------------------------------------------------------------------*/
1312: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1313: MatGetRow_SeqBAIJ,
1314: MatRestoreRow_SeqBAIJ,
1315: MatMult_SeqBAIJ_N,
1316: MatMultAdd_SeqBAIJ_N,
1317: MatMultTranspose_SeqBAIJ,
1318: MatMultTransposeAdd_SeqBAIJ,
1319: MatSolve_SeqBAIJ_N,
1320: 0,
1321: 0,
1322: 0,
1323: MatLUFactor_SeqBAIJ,
1324: 0,
1325: 0,
1326: MatTranspose_SeqBAIJ,
1327: MatGetInfo_SeqBAIJ,
1328: MatEqual_SeqBAIJ,
1329: MatGetDiagonal_SeqBAIJ,
1330: MatDiagonalScale_SeqBAIJ,
1331: MatNorm_SeqBAIJ,
1332: 0,
1333: MatAssemblyEnd_SeqBAIJ,
1334: 0,
1335: MatSetOption_SeqBAIJ,
1336: MatZeroEntries_SeqBAIJ,
1337: MatZeroRows_SeqBAIJ,
1338: MatLUFactorSymbolic_SeqBAIJ,
1339: MatLUFactorNumeric_SeqBAIJ_N,
1340: 0,
1341: 0,
1342: MatSetUpPreallocation_SeqBAIJ,
1343: 0,
1344: MatGetOwnershipRange_SeqBAIJ,
1345: MatILUFactorSymbolic_SeqBAIJ,
1346: 0,
1347: MatGetArray_SeqBAIJ,
1348: MatRestoreArray_SeqBAIJ,
1349: MatDuplicate_SeqBAIJ,
1350: 0,
1351: 0,
1352: MatILUFactor_SeqBAIJ,
1353: 0,
1354: 0,
1355: MatGetSubMatrices_SeqBAIJ,
1356: MatIncreaseOverlap_SeqBAIJ,
1357: MatGetValues_SeqBAIJ,
1358: 0,
1359: MatPrintHelp_SeqBAIJ,
1360: MatScale_SeqBAIJ,
1361: 0,
1362: 0,
1363: 0,
1364: MatGetBlockSize_SeqBAIJ,
1365: MatGetRowIJ_SeqBAIJ,
1366: MatRestoreRowIJ_SeqBAIJ,
1367: 0,
1368: 0,
1369: 0,
1370: 0,
1371: 0,
1372: 0,
1373: MatSetValuesBlocked_SeqBAIJ,
1374: MatGetSubMatrix_SeqBAIJ,
1375: MatDestroy_SeqBAIJ,
1376: MatView_SeqBAIJ,
1377: MatGetMaps_Petsc,
1378: 0,
1379: 0,
1380: 0,
1381: 0,
1382: 0,
1383: 0,
1384: MatGetRowMax_SeqBAIJ,
1385: MatConvert_Basic};
1387: EXTERN_C_BEGIN
1388: int MatStoreValues_SeqBAIJ(Mat mat)
1389: {
1390: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1391: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
1392: int ierr;
1395: if (aij->nonew != 1) {
1396: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1397: }
1399: /* allocate space for values if not already there */
1400: if (!aij->saved_values) {
1401: PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);
1402: }
1404: /* copy values over */
1405: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));
1406: return(0);
1407: }
1408: EXTERN_C_END
1410: EXTERN_C_BEGIN
1411: int MatRetrieveValues_SeqBAIJ(Mat mat)
1412: {
1413: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1414: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1417: if (aij->nonew != 1) {
1418: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1419: }
1420: if (!aij->saved_values) {
1421: SETERRQ(1,"Must call MatStoreValues(A);first");
1422: }
1424: /* copy values over */
1425: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));
1426: return(0);
1427: }
1428: EXTERN_C_END
1430: EXTERN_C_BEGIN
1431: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1432: EXTERN_C_END
1434: EXTERN_C_BEGIN
1435: int MatCreate_SeqBAIJ(Mat B)
1436: {
1437: int ierr,size;
1438: Mat_SeqBAIJ *b;
1441: MPI_Comm_size(B->comm,&size);
1442: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1444: B->m = B->M = PetscMax(B->m,B->M);
1445: B->n = B->N = PetscMax(B->n,B->N);
1446: ierr = PetscNew(Mat_SeqBAIJ,&b);
1447: B->data = (void*)b;
1448: ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1449: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1450: B->factor = 0;
1451: B->lupivotthreshold = 1.0;
1452: B->mapping = 0;
1453: b->row = 0;
1454: b->col = 0;
1455: b->icol = 0;
1456: b->reallocs = 0;
1457: b->saved_values = 0;
1458: #if defined(PETSC_USE_MAT_SINGLE)
1459: b->setvalueslen = 0;
1460: b->setvaluescopy = PETSC_NULL;
1461: #endif
1462:
1463: MapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1464: MapCreateMPI(B->comm,B->n,B->n,&B->cmap);
1466: b->sorted = PETSC_FALSE;
1467: b->roworiented = PETSC_TRUE;
1468: b->nonew = 0;
1469: b->diag = 0;
1470: b->solve_work = 0;
1471: b->mult_work = 0;
1472: b->spptr = 0;
1473: B->info.nz_unneeded = (PetscReal)b->maxnz;
1474: b->keepzeroedrows = PETSC_FALSE;
1476: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1477: "MatStoreValues_SeqBAIJ",
1478: MatStoreValues_SeqBAIJ);
1479: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1480: "MatRetrieveValues_SeqBAIJ",
1481: MatRetrieveValues_SeqBAIJ);
1482: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1483: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1484: MatSeqBAIJSetColumnIndices_SeqBAIJ);
1485: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1486: "MatConvert_SeqBAIJ_SeqAIJ",
1487: MatConvert_SeqBAIJ_SeqAIJ);
1488: return(0);
1489: }
1490: EXTERN_C_END
1492: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1493: {
1494: Mat C;
1495: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1496: int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1499: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1501: *B = 0;
1502: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1503: MatSetType(C,MATSEQBAIJ);
1504: c = (Mat_SeqBAIJ*)C->data;
1506: c->bs = a->bs;
1507: c->bs2 = a->bs2;
1508: c->mbs = a->mbs;
1509: c->nbs = a->nbs;
1510: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1512: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1513: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1514: for (i=0; i<mbs; i++) {
1515: c->imax[i] = a->imax[i];
1516: c->ilen[i] = a->ilen[i];
1517: }
1519: /* allocate the matrix space */
1520: c->singlemalloc = PETSC_TRUE;
1521: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1522: PetscMalloc(len,&c->a);
1523: c->j = (int*)(c->a + nz*bs2);
1524: c->i = c->j + nz;
1525: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1526: if (mbs > 0) {
1527: PetscMemcpy(c->j,a->j,nz*sizeof(int));
1528: if (cpvalues == MAT_COPY_VALUES) {
1529: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1530: } else {
1531: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1532: }
1533: }
1535: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1536: c->sorted = a->sorted;
1537: c->roworiented = a->roworiented;
1538: c->nonew = a->nonew;
1540: if (a->diag) {
1541: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1542: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1543: for (i=0; i<mbs; i++) {
1544: c->diag[i] = a->diag[i];
1545: }
1546: } else c->diag = 0;
1547: c->nz = a->nz;
1548: c->maxnz = a->maxnz;
1549: c->solve_work = 0;
1550: c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
1551: c->mult_work = 0;
1552: C->preallocated = PETSC_TRUE;
1553: C->assembled = PETSC_TRUE;
1554: *B = C;
1555: PetscFListDuplicate(A->qlist,&C->qlist);
1556: return(0);
1557: }
1559: EXTERN_C_BEGIN
1560: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1561: {
1562: Mat_SeqBAIJ *a;
1563: Mat B;
1564: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1565: int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1566: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
1567: int *masked,nmask,tmp,bs2,ishift;
1568: Scalar *aa;
1569: MPI_Comm comm = ((PetscObject)viewer)->comm;
1572: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1573: bs2 = bs*bs;
1575: MPI_Comm_size(comm,&size);
1576: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1577: PetscViewerBinaryGetDescriptor(viewer,&fd);
1578: PetscBinaryRead(fd,header,4,PETSC_INT);
1579: if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1580: M = header[1]; N = header[2]; nz = header[3];
1582: if (header[3] < 0) {
1583: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1584: }
1586: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1588: /*
1589: This code adds extra rows to make sure the number of rows is
1590: divisible by the blocksize
1591: */
1592: mbs = M/bs;
1593: extra_rows = bs - M + bs*(mbs);
1594: if (extra_rows == bs) extra_rows = 0;
1595: else mbs++;
1596: if (extra_rows) {
1597: PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1598: }
1600: /* read in row lengths */
1601: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1602: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1603: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1605: /* read in column indices */
1606: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1607: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1608: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1610: /* loop over row lengths determining block row lengths */
1611: ierr = PetscMalloc(mbs*sizeof(int),&browlengths);
1612: ierr = PetscMemzero(browlengths,mbs*sizeof(int));
1613: ierr = PetscMalloc(2*mbs*sizeof(int),&mask);
1614: ierr = PetscMemzero(mask,mbs*sizeof(int));
1615: masked = mask + mbs;
1616: rowcount = 0; nzcount = 0;
1617: for (i=0; i<mbs; i++) {
1618: nmask = 0;
1619: for (j=0; j<bs; j++) {
1620: kmax = rowlengths[rowcount];
1621: for (k=0; k<kmax; k++) {
1622: tmp = jj[nzcount++]/bs;
1623: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1624: }
1625: rowcount++;
1626: }
1627: browlengths[i] += nmask;
1628: /* zero out the mask elements we set */
1629: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1630: }
1632: /* create our matrix */
1633: MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1634: B = *A;
1635: a = (Mat_SeqBAIJ*)B->data;
1637: /* set matrix "i" values */
1638: a->i[0] = 0;
1639: for (i=1; i<= mbs; i++) {
1640: a->i[i] = a->i[i-1] + browlengths[i-1];
1641: a->ilen[i-1] = browlengths[i-1];
1642: }
1643: a->nz = 0;
1644: for (i=0; i<mbs; i++) a->nz += browlengths[i];
1646: /* read in nonzero values */
1647: PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);
1648: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1649: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1651: /* set "a" and "j" values into matrix */
1652: nzcount = 0; jcount = 0;
1653: for (i=0; i<mbs; i++) {
1654: nzcountb = nzcount;
1655: nmask = 0;
1656: for (j=0; j<bs; j++) {
1657: kmax = rowlengths[i*bs+j];
1658: for (k=0; k<kmax; k++) {
1659: tmp = jj[nzcount++]/bs;
1660: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1661: }
1662: }
1663: /* sort the masked values */
1664: PetscSortInt(nmask,masked);
1666: /* set "j" values into matrix */
1667: maskcount = 1;
1668: for (j=0; j<nmask; j++) {
1669: a->j[jcount++] = masked[j];
1670: mask[masked[j]] = maskcount++;
1671: }
1672: /* set "a" values into matrix */
1673: ishift = bs2*a->i[i];
1674: for (j=0; j<bs; j++) {
1675: kmax = rowlengths[i*bs+j];
1676: for (k=0; k<kmax; k++) {
1677: tmp = jj[nzcountb]/bs ;
1678: block = mask[tmp] - 1;
1679: point = jj[nzcountb] - bs*tmp;
1680: idx = ishift + bs2*block + j + bs*point;
1681: a->a[idx] = (MatScalar)aa[nzcountb++];
1682: }
1683: }
1684: /* zero out the mask elements we set */
1685: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1686: }
1687: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1689: PetscFree(rowlengths);
1690: PetscFree(browlengths);
1691: PetscFree(aa);
1692: PetscFree(jj);
1693: PetscFree(mask);
1695: B->assembled = PETSC_TRUE;
1696:
1697: MatView_Private(B);
1698: return(0);
1699: }
1700: EXTERN_C_END
1702: /*@C
1703: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1704: compressed row) format. For good matrix assembly performance the
1705: user should preallocate the matrix storage by setting the parameter nz
1706: (or the array nnz). By setting these parameters accurately, performance
1707: during matrix assembly can be increased by more than a factor of 50.
1709: Collective on MPI_Comm
1711: Input Parameters:
1712: + comm - MPI communicator, set to PETSC_COMM_SELF
1713: . bs - size of block
1714: . m - number of rows
1715: . n - number of columns
1716: . nz - number of nonzero blocks per block row (same for all rows)
1717: - nnz - array containing the number of nonzero blocks in the various block rows
1718: (possibly different for each block row) or PETSC_NULL
1720: Output Parameter:
1721: . A - the matrix
1723: Options Database Keys:
1724: . -mat_no_unroll - uses code that does not unroll the loops in the
1725: block calculations (much slower)
1726: . -mat_block_size - size of the blocks to use
1728: Level: intermediate
1730: Notes:
1731: A nonzero block is any block that as 1 or more nonzeros in it
1733: The block AIJ format is fully compatible with standard Fortran 77
1734: storage. That is, the stored row and column indices can begin at
1735: either one (as in Fortran) or zero. See the users' manual for details.
1737: Specify the preallocated storage with either nz or nnz (not both).
1738: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1739: allocation. For additional details, see the users manual chapter on
1740: matrices.
1742: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1743: @*/
1744: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1745: {
1747:
1749: MatCreate(comm,m,n,m,n,A);
1750: MatSetType(*A,MATSEQBAIJ);
1751: MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1752: return(0);
1753: }
1755: /*@C
1756: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1757: per row in the matrix. For good matrix assembly performance the
1758: user should preallocate the matrix storage by setting the parameter nz
1759: (or the array nnz). By setting these parameters accurately, performance
1760: during matrix assembly can be increased by more than a factor of 50.
1762: Collective on MPI_Comm
1764: Input Parameters:
1765: + A - the matrix
1766: . bs - size of block
1767: . nz - number of block nonzeros per block row (same for all rows)
1768: - nnz - array containing the number of block nonzeros in the various block rows
1769: (possibly different for each block row) or PETSC_NULL
1771: Options Database Keys:
1772: . -mat_no_unroll - uses code that does not unroll the loops in the
1773: block calculations (much slower)
1774: . -mat_block_size - size of the blocks to use
1776: Level: intermediate
1778: Notes:
1779: The block AIJ format is fully compatible with standard Fortran 77
1780: storage. That is, the stored row and column indices can begin at
1781: either one (as in Fortran) or zero. See the users' manual for details.
1783: Specify the preallocated storage with either nz or nnz (not both).
1784: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1785: allocation. For additional details, see the users manual chapter on
1786: matrices.
1788: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1789: @*/
1790: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1791: {
1792: Mat_SeqBAIJ *b;
1793: int i,len,ierr,mbs,nbs,bs2,newbs = bs;
1794: PetscTruth flg;
1797: PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1798: if (!flg) return(0);
1800: B->preallocated = PETSC_TRUE;
1801: PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1802: if (nnz && newbs != bs) {
1803: SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1804: }
1805: bs = newbs;
1807: mbs = B->m/bs;
1808: nbs = B->n/bs;
1809: bs2 = bs*bs;
1811: if (mbs*bs!=B->m || nbs*bs!=B->n) {
1812: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1813: }
1815: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1816: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1817: if (nnz) {
1818: for (i=0; i<mbs; i++) {
1819: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1820: }
1821: }
1823: b = (Mat_SeqBAIJ*)B->data;
1824: ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1825: if (!flg) {
1826: switch (bs) {
1827: case 1:
1828: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1829: B->ops->solve = MatSolve_SeqBAIJ_1;
1830: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1;
1831: B->ops->mult = MatMult_SeqBAIJ_1;
1832: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
1833: break;
1834: case 2:
1835: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1836: B->ops->solve = MatSolve_SeqBAIJ_2;
1837: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2;
1838: B->ops->mult = MatMult_SeqBAIJ_2;
1839: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
1840: break;
1841: case 3:
1842: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1843: B->ops->solve = MatSolve_SeqBAIJ_3;
1844: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3;
1845: B->ops->mult = MatMult_SeqBAIJ_3;
1846: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
1847: break;
1848: case 4:
1849: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1850: B->ops->solve = MatSolve_SeqBAIJ_4;
1851: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4;
1852: B->ops->mult = MatMult_SeqBAIJ_4;
1853: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
1854: break;
1855: case 5:
1856: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1857: B->ops->solve = MatSolve_SeqBAIJ_5;
1858: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5;
1859: B->ops->mult = MatMult_SeqBAIJ_5;
1860: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
1861: break;
1862: case 6:
1863: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1864: B->ops->solve = MatSolve_SeqBAIJ_6;
1865: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6;
1866: B->ops->mult = MatMult_SeqBAIJ_6;
1867: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
1868: break;
1869: case 7:
1870: B->ops->mult = MatMult_SeqBAIJ_7;
1871: B->ops->solve = MatSolve_SeqBAIJ_7;
1872: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7;
1873: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
1874: break;
1875: default:
1876: B->ops->mult = MatMult_SeqBAIJ_N;
1877: B->ops->solve = MatSolve_SeqBAIJ_N;
1878: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1879: break;
1880: }
1881: }
1882: b->bs = bs;
1883: b->mbs = mbs;
1884: b->nbs = nbs;
1885: PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1886: if (!nnz) {
1887: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1888: else if (nz <= 0) nz = 1;
1889: for (i=0; i<mbs; i++) b->imax[i] = nz;
1890: nz = nz*mbs;
1891: } else {
1892: nz = 0;
1893: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1894: }
1896: /* allocate the matrix space */
1897: len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1898: ierr = PetscMalloc(len,&b->a);
1899: ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1900: b->j = (int*)(b->a + nz*bs2);
1901: PetscMemzero(b->j,nz*sizeof(int));
1902: b->i = b->j + nz;
1903: b->singlemalloc = PETSC_TRUE;
1905: b->i[0] = 0;
1906: for (i=1; i<mbs+1; i++) {
1907: b->i[i] = b->i[i-1] + b->imax[i-1];
1908: }
1910: /* b->ilen will count nonzeros in each block row so far. */
1911: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1912: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1913: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1915: b->bs = bs;
1916: b->bs2 = bs2;
1917: b->mbs = mbs;
1918: b->nz = 0;
1919: b->maxnz = nz*bs2;
1920: B->info.nz_unneeded = (PetscReal)b->maxnz;
1921: return(0);
1922: }