Actual source code: baij.c
1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/vec/vecimpl.h
9: #include src/inline/spops.h
10: #include petscsys.h
12: /*
13: Special version for Fun3d sequential benchmark
14: */
15: #if defined(PETSC_HAVE_FORTRAN_CAPS)
16: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18: #define matsetvaluesblocked4_ matsetvaluesblocked4
19: #endif
21: EXTERN_C_BEGIN
24: void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
25: {
26: Mat A = *AA;
27: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
28: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29: int *ai=a->i,*ailen=a->ilen;
30: int *aj=a->j,stepval;
31: const PetscScalar *value = v;
32: MatScalar *ap,*aa = a->a,*bap;
35: stepval = (n-1)*4;
36: for (k=0; k<m; k++) { /* loop over added rows */
37: row = im[k];
38: rp = aj + ai[row];
39: ap = aa + 16*ai[row];
40: nrow = ailen[row];
41: low = 0;
42: for (l=0; l<n; l++) { /* loop over added columns */
43: col = in[l];
44: value = v + k*(stepval+4)*4 + l*4;
45: low = 0; high = nrow;
46: while (high-low > 7) {
47: t = (low+high)/2;
48: if (rp[t] > col) high = t;
49: else low = t;
50: }
51: for (i=low; i<high; i++) {
52: if (rp[i] > col) break;
53: if (rp[i] == col) {
54: bap = ap + 16*i;
55: for (ii=0; ii<4; ii++,value+=stepval) {
56: for (jj=ii; jj<16; jj+=4) {
57: bap[jj] += *value++;
58: }
59: }
60: goto noinsert2;
61: }
62: }
63: N = nrow++ - 1;
64: /* shift up all the later entries in this row */
65: for (ii=N; ii>=i; ii--) {
66: rp[ii+1] = rp[ii];
67: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68: }
69: if (N >= i) {
70: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71: }
72: rp[i] = col;
73: bap = ap + 16*i;
74: for (ii=0; ii<4; ii++,value+=stepval) {
75: for (jj=ii; jj<16; jj+=4) {
76: bap[jj] = *value++;
77: }
78: }
79: noinsert2:;
80: low = i;
81: }
82: ailen[row] = nrow;
83: }
84: }
85: EXTERN_C_END
87: #if defined(PETSC_HAVE_FORTRAN_CAPS)
88: #define matsetvalues4_ MATSETVALUES4
89: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90: #define matsetvalues4_ matsetvalues4
91: #endif
93: EXTERN_C_BEGIN
96: void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97: {
98: Mat A = *AA;
99: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100: int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101: int *ai=a->i,*ailen=a->ilen;
102: int *aj=a->j,brow,bcol;
103: int ridx,cidx;
104: MatScalar *ap,value,*aa=a->a,*bap;
105:
107: for (k=0; k<m; k++) { /* loop over added rows */
108: row = im[k]; brow = row/4;
109: rp = aj + ai[brow];
110: ap = aa + 16*ai[brow];
111: nrow = ailen[brow];
112: low = 0;
113: for (l=0; l<n; l++) { /* loop over added columns */
114: col = in[l]; bcol = col/4;
115: ridx = row % 4; cidx = col % 4;
116: value = v[l + k*n];
117: low = 0; high = nrow;
118: while (high-low > 7) {
119: t = (low+high)/2;
120: if (rp[t] > bcol) high = t;
121: else low = t;
122: }
123: for (i=low; i<high; i++) {
124: if (rp[i] > bcol) break;
125: if (rp[i] == bcol) {
126: bap = ap + 16*i + 4*cidx + ridx;
127: *bap += value;
128: goto noinsert1;
129: }
130: }
131: N = nrow++ - 1;
132: /* shift up all the later entries in this row */
133: for (ii=N; ii>=i; ii--) {
134: rp[ii+1] = rp[ii];
135: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136: }
137: if (N>=i) {
138: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139: }
140: rp[i] = bcol;
141: ap[16*i + 4*cidx + ridx] = value;
142: noinsert1:;
143: low = i;
144: }
145: ailen[brow] = nrow;
146: }
147: }
148: EXTERN_C_END
150: /* UGLY, ugly, ugly
151: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154: converts the entries into single precision and then calls ..._MatScalar() to put them
155: into the single precision data structures.
156: */
157: #if defined(PETSC_USE_MAT_SINGLE)
158: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
159: #else
160: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161: #endif
163: #define CHUNKSIZE 10
165: /*
166: Checks for missing diagonals
167: */
170: int MatMissingDiagonal_SeqBAIJ(Mat A)
171: {
172: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173: int *diag,*jj = a->j,i,ierr;
176: MatMarkDiagonal_SeqBAIJ(A);
177: diag = a->diag;
178: for (i=0; i<a->mbs; i++) {
179: if (jj[diag[i]] != i) {
180: SETERRQ1(1,"Matrix is missing diagonal number %d",i);
181: }
182: }
183: return(0);
184: }
188: int MatMarkDiagonal_SeqBAIJ(Mat A)
189: {
190: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
191: int i,j,*diag,m = a->mbs,ierr;
194: if (a->diag) return(0);
196: PetscMalloc((m+1)*sizeof(int),&diag);
197: PetscLogObjectMemory(A,(m+1)*sizeof(int));
198: for (i=0; i<m; i++) {
199: diag[i] = a->i[i+1];
200: for (j=a->i[i]; j<a->i[i+1]; j++) {
201: if (a->j[j] == i) {
202: diag[i] = j;
203: break;
204: }
205: }
206: }
207: a->diag = diag;
208: return(0);
209: }
212: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
216: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
217: {
218: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
219: int ierr,n = a->mbs,i;
222: *nn = n;
223: if (!ia) return(0);
224: if (symmetric) {
225: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
226: } else if (oshift == 1) {
227: /* temporarily add 1 to i and j indices */
228: int nz = a->i[n];
229: for (i=0; i<nz; i++) a->j[i]++;
230: for (i=0; i<n+1; i++) a->i[i]++;
231: *ia = a->i; *ja = a->j;
232: } else {
233: *ia = a->i; *ja = a->j;
234: }
236: return(0);
237: }
241: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
242: {
243: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
244: int i,n = a->mbs,ierr;
247: if (!ia) return(0);
248: if (symmetric) {
249: PetscFree(*ia);
250: PetscFree(*ja);
251: } else if (oshift == 1) {
252: int nz = a->i[n]-1;
253: for (i=0; i<nz; i++) a->j[i]--;
254: for (i=0; i<n+1; i++) a->i[i]--;
255: }
256: return(0);
257: }
261: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
262: {
263: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
266: *bs = baij->bs;
267: return(0);
268: }
273: int MatDestroy_SeqBAIJ(Mat A)
274: {
275: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
276: int ierr;
279: #if defined(PETSC_USE_LOG)
280: PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
281: #endif
282: PetscFree(a->a);
283: if (!a->singlemalloc) {
284: PetscFree(a->i);
285: PetscFree(a->j);
286: }
287: if (a->row) {
288: ISDestroy(a->row);
289: }
290: if (a->col) {
291: ISDestroy(a->col);
292: }
293: if (a->diag) {PetscFree(a->diag);}
294: if (a->ilen) {PetscFree(a->ilen);}
295: if (a->imax) {PetscFree(a->imax);}
296: if (a->solve_work) {PetscFree(a->solve_work);}
297: if (a->mult_work) {PetscFree(a->mult_work);}
298: if (a->icol) {ISDestroy(a->icol);}
299: if (a->saved_values) {PetscFree(a->saved_values);}
300: #if defined(PETSC_USE_MAT_SINGLE)
301: if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
302: #endif
303: if (a->xtoy) {PetscFree(a->xtoy);}
305: PetscFree(a);
306: return(0);
307: }
311: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
312: {
313: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
316: switch (op) {
317: case MAT_ROW_ORIENTED:
318: a->roworiented = PETSC_TRUE;
319: break;
320: case MAT_COLUMN_ORIENTED:
321: a->roworiented = PETSC_FALSE;
322: break;
323: case MAT_COLUMNS_SORTED:
324: a->sorted = PETSC_TRUE;
325: break;
326: case MAT_COLUMNS_UNSORTED:
327: a->sorted = PETSC_FALSE;
328: break;
329: case MAT_KEEP_ZEROED_ROWS:
330: a->keepzeroedrows = PETSC_TRUE;
331: break;
332: case MAT_NO_NEW_NONZERO_LOCATIONS:
333: a->nonew = 1;
334: break;
335: case MAT_NEW_NONZERO_LOCATION_ERR:
336: a->nonew = -1;
337: break;
338: case MAT_NEW_NONZERO_ALLOCATION_ERR:
339: a->nonew = -2;
340: break;
341: case MAT_YES_NEW_NONZERO_LOCATIONS:
342: a->nonew = 0;
343: break;
344: case MAT_ROWS_SORTED:
345: case MAT_ROWS_UNSORTED:
346: case MAT_YES_NEW_DIAGONALS:
347: case MAT_IGNORE_OFF_PROC_ENTRIES:
348: case MAT_USE_HASH_TABLE:
349: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
350: break;
351: case MAT_NO_NEW_DIAGONALS:
352: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
353: default:
354: SETERRQ(PETSC_ERR_SUP,"unknown option");
355: }
356: return(0);
357: }
361: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
362: {
363: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
364: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
365: MatScalar *aa,*aa_i;
366: PetscScalar *v_i;
369: bs = a->bs;
370: ai = a->i;
371: aj = a->j;
372: aa = a->a;
373: bs2 = a->bs2;
374:
375: if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
376:
377: bn = row/bs; /* Block number */
378: bp = row % bs; /* Block Position */
379: M = ai[bn+1] - ai[bn];
380: *nz = bs*M;
381:
382: if (v) {
383: *v = 0;
384: if (*nz) {
385: PetscMalloc((*nz)*sizeof(PetscScalar),v);
386: for (i=0; i<M; i++) { /* for each block in the block row */
387: v_i = *v + i*bs;
388: aa_i = aa + bs2*(ai[bn] + i);
389: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
390: }
391: }
392: }
394: if (idx) {
395: *idx = 0;
396: if (*nz) {
397: PetscMalloc((*nz)*sizeof(int),idx);
398: for (i=0; i<M; i++) { /* for each block in the block row */
399: idx_i = *idx + i*bs;
400: itmp = bs*aj[ai[bn] + i];
401: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
402: }
403: }
404: }
405: return(0);
406: }
410: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
411: {
415: if (idx) {if (*idx) {PetscFree(*idx);}}
416: if (v) {if (*v) {PetscFree(*v);}}
417: return(0);
418: }
422: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
423: {
424: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
425: Mat C;
426: int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
427: int *rows,*cols,bs2=a->bs2;
428: PetscScalar *array;
431: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
432: PetscMalloc((1+nbs)*sizeof(int),&col);
433: PetscMemzero(col,(1+nbs)*sizeof(int));
435: #if defined(PETSC_USE_MAT_SINGLE)
436: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
437: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
438: #else
439: array = a->a;
440: #endif
442: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
443: MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
444: PetscFree(col);
445: PetscMalloc(2*bs*sizeof(int),&rows);
446: cols = rows + bs;
447: for (i=0; i<mbs; i++) {
448: cols[0] = i*bs;
449: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
450: len = ai[i+1] - ai[i];
451: for (j=0; j<len; j++) {
452: rows[0] = (*aj++)*bs;
453: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
454: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
455: array += bs2;
456: }
457: }
458: PetscFree(rows);
459: #if defined(PETSC_USE_MAT_SINGLE)
460: PetscFree(array);
461: #endif
462:
463: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
464: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
465:
466: if (B) {
467: *B = C;
468: } else {
469: MatHeaderCopy(A,C);
470: }
471: return(0);
472: }
476: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
477: {
478: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
479: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
480: PetscScalar *aa;
481: FILE *file;
484: PetscViewerBinaryGetDescriptor(viewer,&fd);
485: PetscMalloc((4+A->m)*sizeof(int),&col_lens);
486: col_lens[0] = MAT_FILE_COOKIE;
488: col_lens[1] = A->m;
489: col_lens[2] = A->n;
490: col_lens[3] = a->nz*bs2;
492: /* store lengths of each row and write (including header) to file */
493: count = 0;
494: for (i=0; i<a->mbs; i++) {
495: for (j=0; j<bs; j++) {
496: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
497: }
498: }
499: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
500: PetscFree(col_lens);
502: /* store column indices (zero start index) */
503: PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
504: count = 0;
505: for (i=0; i<a->mbs; i++) {
506: for (j=0; j<bs; j++) {
507: for (k=a->i[i]; k<a->i[i+1]; k++) {
508: for (l=0; l<bs; l++) {
509: jj[count++] = bs*a->j[k] + l;
510: }
511: }
512: }
513: }
514: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
515: PetscFree(jj);
517: /* store nonzero values */
518: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
519: count = 0;
520: for (i=0; i<a->mbs; i++) {
521: for (j=0; j<bs; j++) {
522: for (k=a->i[i]; k<a->i[i+1]; k++) {
523: for (l=0; l<bs; l++) {
524: aa[count++] = a->a[bs2*k + l*bs + j];
525: }
526: }
527: }
528: }
529: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
530: PetscFree(aa);
532: PetscViewerBinaryGetInfoPointer(viewer,&file);
533: if (file) {
534: fprintf(file,"-matload_block_size %d\n",a->bs);
535: }
536: return(0);
537: }
541: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
542: {
543: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
544: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
545: PetscViewerFormat format;
548: PetscViewerGetFormat(viewer,&format);
549: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
550: PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);
551: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
552: Mat aij;
553: MatConvert(A,MATSEQAIJ,&aij);
554: MatView(aij,viewer);
555: MatDestroy(aij);
556: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
557: return(0);
558: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
559: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
560: for (i=0; i<a->mbs; i++) {
561: for (j=0; j<bs; j++) {
562: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
563: for (k=a->i[i]; k<a->i[i+1]; k++) {
564: for (l=0; l<bs; l++) {
565: #if defined(PETSC_USE_COMPLEX)
566: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
567: PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
568: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
569: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
570: PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
571: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
572: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
573: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
574: }
575: #else
576: if (a->a[bs2*k + l*bs + j] != 0.0) {
577: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
578: }
579: #endif
580: }
581: }
582: PetscViewerASCIIPrintf(viewer,"\n");
583: }
584: }
585: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
586: } else {
587: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
588: for (i=0; i<a->mbs; i++) {
589: for (j=0; j<bs; j++) {
590: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
591: for (k=a->i[i]; k<a->i[i+1]; k++) {
592: for (l=0; l<bs; l++) {
593: #if defined(PETSC_USE_COMPLEX)
594: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
595: PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
596: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
597: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
598: PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
599: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
600: } else {
601: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
602: }
603: #else
604: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
605: #endif
606: }
607: }
608: PetscViewerASCIIPrintf(viewer,"\n");
609: }
610: }
611: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
612: }
613: PetscViewerFlush(viewer);
614: return(0);
615: }
619: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
620: {
621: Mat A = (Mat) Aa;
622: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
623: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
624: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
625: MatScalar *aa;
626: PetscViewer viewer;
630: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
631: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
633: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
635: /* loop over matrix elements drawing boxes */
636: color = PETSC_DRAW_BLUE;
637: for (i=0,row=0; i<mbs; i++,row+=bs) {
638: for (j=a->i[i]; j<a->i[i+1]; j++) {
639: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
640: x_l = a->j[j]*bs; x_r = x_l + 1.0;
641: aa = a->a + j*bs2;
642: for (k=0; k<bs; k++) {
643: for (l=0; l<bs; l++) {
644: if (PetscRealPart(*aa++) >= 0.) continue;
645: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
646: }
647: }
648: }
649: }
650: color = PETSC_DRAW_CYAN;
651: for (i=0,row=0; i<mbs; i++,row+=bs) {
652: for (j=a->i[i]; j<a->i[i+1]; j++) {
653: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
654: x_l = a->j[j]*bs; x_r = x_l + 1.0;
655: aa = a->a + j*bs2;
656: for (k=0; k<bs; k++) {
657: for (l=0; l<bs; l++) {
658: if (PetscRealPart(*aa++) != 0.) continue;
659: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
660: }
661: }
662: }
663: }
665: color = PETSC_DRAW_RED;
666: for (i=0,row=0; i<mbs; i++,row+=bs) {
667: for (j=a->i[i]; j<a->i[i+1]; j++) {
668: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
669: x_l = a->j[j]*bs; x_r = x_l + 1.0;
670: aa = a->a + j*bs2;
671: for (k=0; k<bs; k++) {
672: for (l=0; l<bs; l++) {
673: if (PetscRealPart(*aa++) <= 0.) continue;
674: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
675: }
676: }
677: }
678: }
679: return(0);
680: }
684: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
685: {
686: int ierr;
687: PetscReal xl,yl,xr,yr,w,h;
688: PetscDraw draw;
689: PetscTruth isnull;
693: PetscViewerDrawGetDraw(viewer,0,&draw);
694: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
696: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
697: xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
698: xr += w; yr += h; xl = -w; yl = -h;
699: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
700: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
701: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
702: return(0);
703: }
707: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
708: {
709: int ierr;
710: PetscTruth issocket,isascii,isbinary,isdraw;
713: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
714: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
715: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
716: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
717: if (issocket) {
718: SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
719: } else if (isascii){
720: MatView_SeqBAIJ_ASCII(A,viewer);
721: } else if (isbinary) {
722: MatView_SeqBAIJ_Binary(A,viewer);
723: } else if (isdraw) {
724: MatView_SeqBAIJ_Draw(A,viewer);
725: } else {
726: SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
727: }
728: return(0);
729: }
734: int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
735: {
736: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
737: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
738: int *ai = a->i,*ailen = a->ilen;
739: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
740: MatScalar *ap,*aa = a->a,zero = 0.0;
743: for (k=0; k<m; k++) { /* loop over rows */
744: row = im[k]; brow = row/bs;
745: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
746: if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
747: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
748: nrow = ailen[brow];
749: for (l=0; l<n; l++) { /* loop over columns */
750: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
751: if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
752: col = in[l] ;
753: bcol = col/bs;
754: cidx = col%bs;
755: ridx = row%bs;
756: high = nrow;
757: low = 0; /* assume unsorted */
758: while (high-low > 5) {
759: t = (low+high)/2;
760: if (rp[t] > bcol) high = t;
761: else low = t;
762: }
763: for (i=low; i<high; i++) {
764: if (rp[i] > bcol) break;
765: if (rp[i] == bcol) {
766: *v++ = ap[bs2*i+bs*cidx+ridx];
767: goto finished;
768: }
769: }
770: *v++ = zero;
771: finished:;
772: }
773: }
774: return(0);
775: }
777: #if defined(PETSC_USE_MAT_SINGLE)
780: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
781: {
782: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
783: int ierr,i,N = m*n*b->bs2;
784: MatScalar *vsingle;
787: if (N > b->setvalueslen) {
788: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
789: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
790: b->setvalueslen = N;
791: }
792: vsingle = b->setvaluescopy;
793: for (i=0; i<N; i++) {
794: vsingle[i] = v[i];
795: }
796: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
797: return(0);
798: }
799: #endif
804: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
805: {
806: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
807: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
808: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
809: int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
810: PetscTruth roworiented=a->roworiented;
811: const MatScalar *value = v;
812: MatScalar *ap,*aa = a->a,*bap;
815: if (roworiented) {
816: stepval = (n-1)*bs;
817: } else {
818: stepval = (m-1)*bs;
819: }
820: for (k=0; k<m; k++) { /* loop over added rows */
821: row = im[k];
822: if (row < 0) continue;
823: #if defined(PETSC_USE_BOPT_g)
824: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
825: #endif
826: rp = aj + ai[row];
827: ap = aa + bs2*ai[row];
828: rmax = imax[row];
829: nrow = ailen[row];
830: low = 0;
831: for (l=0; l<n; l++) { /* loop over added columns */
832: if (in[l] < 0) continue;
833: #if defined(PETSC_USE_BOPT_g)
834: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
835: #endif
836: col = in[l];
837: if (roworiented) {
838: value = v + k*(stepval+bs)*bs + l*bs;
839: } else {
840: value = v + l*(stepval+bs)*bs + k*bs;
841: }
842: if (!sorted) low = 0; high = nrow;
843: while (high-low > 7) {
844: t = (low+high)/2;
845: if (rp[t] > col) high = t;
846: else low = t;
847: }
848: for (i=low; i<high; i++) {
849: if (rp[i] > col) break;
850: if (rp[i] == col) {
851: bap = ap + bs2*i;
852: if (roworiented) {
853: if (is == ADD_VALUES) {
854: for (ii=0; ii<bs; ii++,value+=stepval) {
855: for (jj=ii; jj<bs2; jj+=bs) {
856: bap[jj] += *value++;
857: }
858: }
859: } else {
860: for (ii=0; ii<bs; ii++,value+=stepval) {
861: for (jj=ii; jj<bs2; jj+=bs) {
862: bap[jj] = *value++;
863: }
864: }
865: }
866: } else {
867: if (is == ADD_VALUES) {
868: for (ii=0; ii<bs; ii++,value+=stepval) {
869: for (jj=0; jj<bs; jj++) {
870: *bap++ += *value++;
871: }
872: }
873: } else {
874: for (ii=0; ii<bs; ii++,value+=stepval) {
875: for (jj=0; jj<bs; jj++) {
876: *bap++ = *value++;
877: }
878: }
879: }
880: }
881: goto noinsert2;
882: }
883: }
884: if (nonew == 1) goto noinsert2;
885: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
886: if (nrow >= rmax) {
887: /* there is no extra room in row, therefore enlarge */
888: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
889: MatScalar *new_a;
891: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
893: /* malloc new storage space */
894: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
895: PetscMalloc(len,&new_a);
896: new_j = (int*)(new_a + bs2*new_nz);
897: new_i = new_j + new_nz;
899: /* copy over old data into new slots */
900: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
901: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
902: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
903: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
904: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
905: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
906: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
907: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
908: /* free up old matrix storage */
909: PetscFree(a->a);
910: if (!a->singlemalloc) {
911: PetscFree(a->i);
912: PetscFree(a->j);
913: }
914: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
915: a->singlemalloc = PETSC_TRUE;
917: rp = aj + ai[row]; ap = aa + bs2*ai[row];
918: rmax = imax[row] = imax[row] + CHUNKSIZE;
919: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
920: a->maxnz += bs2*CHUNKSIZE;
921: a->reallocs++;
922: a->nz++;
923: }
924: N = nrow++ - 1;
925: /* shift up all the later entries in this row */
926: for (ii=N; ii>=i; ii--) {
927: rp[ii+1] = rp[ii];
928: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
929: }
930: if (N >= i) {
931: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
932: }
933: rp[i] = col;
934: bap = ap + bs2*i;
935: if (roworiented) {
936: for (ii=0; ii<bs; ii++,value+=stepval) {
937: for (jj=ii; jj<bs2; jj+=bs) {
938: bap[jj] = *value++;
939: }
940: }
941: } else {
942: for (ii=0; ii<bs; ii++,value+=stepval) {
943: for (jj=0; jj<bs; jj++) {
944: *bap++ = *value++;
945: }
946: }
947: }
948: noinsert2:;
949: low = i;
950: }
951: ailen[row] = nrow;
952: }
953: return(0);
954: }
958: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
959: {
960: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
961: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
962: int m = A->m,*ip,N,*ailen = a->ilen;
963: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
964: MatScalar *aa = a->a,*ap;
967: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
969: if (m) rmax = ailen[0];
970: for (i=1; i<mbs; i++) {
971: /* move each row back by the amount of empty slots (fshift) before it*/
972: fshift += imax[i-1] - ailen[i-1];
973: rmax = PetscMax(rmax,ailen[i]);
974: if (fshift) {
975: ip = aj + ai[i]; ap = aa + bs2*ai[i];
976: N = ailen[i];
977: for (j=0; j<N; j++) {
978: ip[j-fshift] = ip[j];
979: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
980: }
981: }
982: ai[i] = ai[i-1] + ailen[i-1];
983: }
984: if (mbs) {
985: fshift += imax[mbs-1] - ailen[mbs-1];
986: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
987: }
988: /* reset ilen and imax for each row */
989: for (i=0; i<mbs; i++) {
990: ailen[i] = imax[i] = ai[i+1] - ai[i];
991: }
992: a->nz = ai[mbs];
994: /* diagonals may have moved, so kill the diagonal pointers */
995: if (fshift && a->diag) {
996: PetscFree(a->diag);
997: PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
998: a->diag = 0;
999: }
1000: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1001: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1002: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1003: a->reallocs = 0;
1004: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1006: return(0);
1007: }
1011: /*
1012: This function returns an array of flags which indicate the locations of contiguous
1013: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1014: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1015: Assume: sizes should be long enough to hold all the values.
1016: */
1019: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1020: {
1021: int i,j,k,row;
1022: PetscTruth flg;
1025: for (i=0,j=0; i<n; j++) {
1026: row = idx[i];
1027: if (row%bs!=0) { /* Not the begining of a block */
1028: sizes[j] = 1;
1029: i++;
1030: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1031: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1032: i++;
1033: } else { /* Begining of the block, so check if the complete block exists */
1034: flg = PETSC_TRUE;
1035: for (k=1; k<bs; k++) {
1036: if (row+k != idx[i+k]) { /* break in the block */
1037: flg = PETSC_FALSE;
1038: break;
1039: }
1040: }
1041: if (flg == PETSC_TRUE) { /* No break in the bs */
1042: sizes[j] = bs;
1043: i+= bs;
1044: } else {
1045: sizes[j] = 1;
1046: i++;
1047: }
1048: }
1049: }
1050: *bs_max = j;
1051: return(0);
1052: }
1053:
1056: int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1057: {
1058: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1059: int ierr,i,j,k,count,is_n,*is_idx,*rows;
1060: int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1061: PetscScalar zero = 0.0;
1062: MatScalar *aa;
1065: /* Make a copy of the IS and sort it */
1066: ISGetLocalSize(is,&is_n);
1067: ISGetIndices(is,&is_idx);
1069: /* allocate memory for rows,sizes */
1070: PetscMalloc((3*is_n+1)*sizeof(int),&rows);
1071: sizes = rows + is_n;
1073: /* copy IS values to rows, and sort them */
1074: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1075: PetscSortInt(is_n,rows);
1076: if (baij->keepzeroedrows) {
1077: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1078: bs_max = is_n;
1079: } else {
1080: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1081: }
1082: ISRestoreIndices(is,&is_idx);
1084: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1085: row = rows[j];
1086: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1087: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1088: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1089: if (sizes[i] == bs && !baij->keepzeroedrows) {
1090: if (diag) {
1091: if (baij->ilen[row/bs] > 0) {
1092: baij->ilen[row/bs] = 1;
1093: baij->j[baij->i[row/bs]] = row/bs;
1094: PetscMemzero(aa,count*bs*sizeof(MatScalar));
1095: }
1096: /* Now insert all the diagonal values for this bs */
1097: for (k=0; k<bs; k++) {
1098: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1099: }
1100: } else { /* (!diag) */
1101: baij->ilen[row/bs] = 0;
1102: } /* end (!diag) */
1103: } else { /* (sizes[i] != bs) */
1104: #if defined (PETSC_USE_DEBUG)
1105: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1106: #endif
1107: for (k=0; k<count; k++) {
1108: aa[0] = zero;
1109: aa += bs;
1110: }
1111: if (diag) {
1112: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1113: }
1114: }
1115: }
1117: PetscFree(rows);
1118: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1119: return(0);
1120: }
1124: int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1125: {
1126: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1127: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1128: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1129: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1130: int ridx,cidx,bs2=a->bs2,ierr;
1131: PetscTruth roworiented=a->roworiented;
1132: MatScalar *ap,value,*aa=a->a,*bap;
1135: for (k=0; k<m; k++) { /* loop over added rows */
1136: row = im[k]; brow = row/bs;
1137: if (row < 0) continue;
1138: #if defined(PETSC_USE_BOPT_g)
1139: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1140: #endif
1141: rp = aj + ai[brow];
1142: ap = aa + bs2*ai[brow];
1143: rmax = imax[brow];
1144: nrow = ailen[brow];
1145: low = 0;
1146: for (l=0; l<n; l++) { /* loop over added columns */
1147: if (in[l] < 0) continue;
1148: #if defined(PETSC_USE_BOPT_g)
1149: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1150: #endif
1151: col = in[l]; bcol = col/bs;
1152: ridx = row % bs; cidx = col % bs;
1153: if (roworiented) {
1154: value = v[l + k*n];
1155: } else {
1156: value = v[k + l*m];
1157: }
1158: if (!sorted) low = 0; high = nrow;
1159: while (high-low > 7) {
1160: t = (low+high)/2;
1161: if (rp[t] > bcol) high = t;
1162: else low = t;
1163: }
1164: for (i=low; i<high; i++) {
1165: if (rp[i] > bcol) break;
1166: if (rp[i] == bcol) {
1167: bap = ap + bs2*i + bs*cidx + ridx;
1168: if (is == ADD_VALUES) *bap += value;
1169: else *bap = value;
1170: goto noinsert1;
1171: }
1172: }
1173: if (nonew == 1) goto noinsert1;
1174: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1175: if (nrow >= rmax) {
1176: /* there is no extra room in row, therefore enlarge */
1177: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1178: MatScalar *new_a;
1180: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1182: /* Malloc new storage space */
1183: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1184: PetscMalloc(len,&new_a);
1185: new_j = (int*)(new_a + bs2*new_nz);
1186: new_i = new_j + new_nz;
1188: /* copy over old data into new slots */
1189: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1190: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1191: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1192: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1193: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1194: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1195: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1196: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1197: /* free up old matrix storage */
1198: PetscFree(a->a);
1199: if (!a->singlemalloc) {
1200: PetscFree(a->i);
1201: PetscFree(a->j);
1202: }
1203: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1204: a->singlemalloc = PETSC_TRUE;
1206: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1207: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1208: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1209: a->maxnz += bs2*CHUNKSIZE;
1210: a->reallocs++;
1211: a->nz++;
1212: }
1213: N = nrow++ - 1;
1214: /* shift up all the later entries in this row */
1215: for (ii=N; ii>=i; ii--) {
1216: rp[ii+1] = rp[ii];
1217: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1218: }
1219: if (N>=i) {
1220: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1221: }
1222: rp[i] = bcol;
1223: ap[bs2*i + bs*cidx + ridx] = value;
1224: noinsert1:;
1225: low = i;
1226: }
1227: ailen[brow] = nrow;
1228: }
1229: return(0);
1230: }
1235: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1236: {
1237: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1238: Mat outA;
1239: int ierr;
1240: PetscTruth row_identity,col_identity;
1243: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1244: ISIdentity(row,&row_identity);
1245: ISIdentity(col,&col_identity);
1246: if (!row_identity || !col_identity) {
1247: SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1248: }
1250: outA = inA;
1251: inA->factor = FACTOR_LU;
1253: if (!a->diag) {
1254: MatMarkDiagonal_SeqBAIJ(inA);
1255: }
1257: a->row = row;
1258: a->col = col;
1259: PetscObjectReference((PetscObject)row);
1260: PetscObjectReference((PetscObject)col);
1261:
1262: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1263: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1264: PetscLogObjectParent(inA,a->icol);
1265:
1266: /*
1267: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1268: for ILU(0) factorization with natural ordering
1269: */
1270: if (a->bs < 8) {
1271: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1272: } else {
1273: if (!a->solve_work) {
1274: PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1275: PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1276: }
1277: }
1279: MatLUFactorNumeric(inA,&outA);
1281: return(0);
1282: }
1285: int MatPrintHelp_SeqBAIJ(Mat A)
1286: {
1287: static PetscTruth called = PETSC_FALSE;
1288: MPI_Comm comm = A->comm;
1289: int ierr;
1292: if (called) {return(0);} else called = PETSC_TRUE;
1293: (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1294: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");
1295: return(0);
1296: }
1298: EXTERN_C_BEGIN
1301: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1302: {
1303: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1304: int i,nz,nbs;
1307: nz = baij->maxnz/baij->bs2;
1308: nbs = baij->nbs;
1309: for (i=0; i<nz; i++) {
1310: baij->j[i] = indices[i];
1311: }
1312: baij->nz = nz;
1313: for (i=0; i<nbs; i++) {
1314: baij->ilen[i] = baij->imax[i];
1315: }
1317: return(0);
1318: }
1319: EXTERN_C_END
1323: /*@
1324: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1325: in the matrix.
1327: Input Parameters:
1328: + mat - the SeqBAIJ matrix
1329: - indices - the column indices
1331: Level: advanced
1333: Notes:
1334: This can be called if you have precomputed the nonzero structure of the
1335: matrix and want to provide it to the matrix object to improve the performance
1336: of the MatSetValues() operation.
1338: You MUST have set the correct numbers of nonzeros per row in the call to
1339: MatCreateSeqBAIJ().
1341: MUST be called before any calls to MatSetValues();
1343: @*/
1344: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1345: {
1346: int ierr,(*f)(Mat,int *);
1350: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1351: if (f) {
1352: (*f)(mat,indices);
1353: } else {
1354: SETERRQ(1,"Wrong type of matrix to set column indices");
1355: }
1356: return(0);
1357: }
1361: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1362: {
1363: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1364: int ierr,i,j,n,row,bs,*ai,*aj,mbs;
1365: PetscReal atmp;
1366: PetscScalar *x,zero = 0.0;
1367: MatScalar *aa;
1368: int ncols,brow,krow,kcol;
1371: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1372: bs = a->bs;
1373: aa = a->a;
1374: ai = a->i;
1375: aj = a->j;
1376: mbs = a->mbs;
1378: VecSet(&zero,v);
1379: VecGetArray(v,&x);
1380: VecGetLocalSize(v,&n);
1381: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1382: for (i=0; i<mbs; i++) {
1383: ncols = ai[1] - ai[0]; ai++;
1384: brow = bs*i;
1385: for (j=0; j<ncols; j++){
1386: /* bcol = bs*(*aj); */
1387: for (kcol=0; kcol<bs; kcol++){
1388: for (krow=0; krow<bs; krow++){
1389: atmp = PetscAbsScalar(*aa); aa++;
1390: row = brow + krow; /* row index */
1391: /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1392: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1393: }
1394: }
1395: aj++;
1396: }
1397: }
1398: VecRestoreArray(v,&x);
1399: return(0);
1400: }
1404: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1405: {
1406: int ierr;
1409: MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1410: return(0);
1411: }
1415: int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1416: {
1417: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1419: *array = a->a;
1420: return(0);
1421: }
1425: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1426: {
1428: return(0);
1429: }
1431: #include petscblaslapack.h
1434: int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1435: {
1436: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1437: int ierr,one=1,i,bs=y->bs,j,bs2;
1440: if (str == SAME_NONZERO_PATTERN) {
1441: BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1442: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1443: if (y->xtoy && y->XtoY != X) {
1444: PetscFree(y->xtoy);
1445: MatDestroy(y->XtoY);
1446: }
1447: if (!y->xtoy) { /* get xtoy */
1448: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1449: y->XtoY = X;
1450: }
1451: bs2 = bs*bs;
1452: for (i=0; i<x->nz; i++) {
1453: j = 0;
1454: while (j < bs2){
1455: y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1456: j++;
1457: }
1458: }
1459: PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1460: } else {
1461: MatAXPY_Basic(a,X,Y,str);
1462: }
1463: return(0);
1464: }
1466: /* -------------------------------------------------------------------*/
1467: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1468: MatGetRow_SeqBAIJ,
1469: MatRestoreRow_SeqBAIJ,
1470: MatMult_SeqBAIJ_N,
1471: /* 4*/ MatMultAdd_SeqBAIJ_N,
1472: MatMultTranspose_SeqBAIJ,
1473: MatMultTransposeAdd_SeqBAIJ,
1474: MatSolve_SeqBAIJ_N,
1475: 0,
1476: 0,
1477: /*10*/ 0,
1478: MatLUFactor_SeqBAIJ,
1479: 0,
1480: 0,
1481: MatTranspose_SeqBAIJ,
1482: /*15*/ MatGetInfo_SeqBAIJ,
1483: MatEqual_SeqBAIJ,
1484: MatGetDiagonal_SeqBAIJ,
1485: MatDiagonalScale_SeqBAIJ,
1486: MatNorm_SeqBAIJ,
1487: /*20*/ 0,
1488: MatAssemblyEnd_SeqBAIJ,
1489: 0,
1490: MatSetOption_SeqBAIJ,
1491: MatZeroEntries_SeqBAIJ,
1492: /*25*/ MatZeroRows_SeqBAIJ,
1493: MatLUFactorSymbolic_SeqBAIJ,
1494: MatLUFactorNumeric_SeqBAIJ_N,
1495: 0,
1496: 0,
1497: /*30*/ MatSetUpPreallocation_SeqBAIJ,
1498: MatILUFactorSymbolic_SeqBAIJ,
1499: 0,
1500: MatGetArray_SeqBAIJ,
1501: MatRestoreArray_SeqBAIJ,
1502: /*35*/ MatDuplicate_SeqBAIJ,
1503: 0,
1504: 0,
1505: MatILUFactor_SeqBAIJ,
1506: 0,
1507: /*40*/ MatAXPY_SeqBAIJ,
1508: MatGetSubMatrices_SeqBAIJ,
1509: MatIncreaseOverlap_SeqBAIJ,
1510: MatGetValues_SeqBAIJ,
1511: 0,
1512: /*45*/ MatPrintHelp_SeqBAIJ,
1513: MatScale_SeqBAIJ,
1514: 0,
1515: 0,
1516: 0,
1517: /*50*/ MatGetBlockSize_SeqBAIJ,
1518: MatGetRowIJ_SeqBAIJ,
1519: MatRestoreRowIJ_SeqBAIJ,
1520: 0,
1521: 0,
1522: /*55*/ 0,
1523: 0,
1524: 0,
1525: 0,
1526: MatSetValuesBlocked_SeqBAIJ,
1527: /*60*/ MatGetSubMatrix_SeqBAIJ,
1528: MatDestroy_SeqBAIJ,
1529: MatView_SeqBAIJ,
1530: MatGetPetscMaps_Petsc,
1531: 0,
1532: /*65*/ 0,
1533: 0,
1534: 0,
1535: 0,
1536: 0,
1537: /*70*/ MatGetRowMax_SeqBAIJ,
1538: MatConvert_Basic,
1539: 0,
1540: 0,
1541: 0,
1542: /*75*/ 0,
1543: 0,
1544: 0,
1545: 0,
1546: 0,
1547: /*80*/ 0,
1548: 0,
1549: 0,
1550: 0,
1551: 0,
1552: /*85*/ MatLoad_SeqBAIJ
1553: };
1555: EXTERN_C_BEGIN
1558: int MatStoreValues_SeqBAIJ(Mat mat)
1559: {
1560: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1561: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
1562: int ierr;
1565: if (aij->nonew != 1) {
1566: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1567: }
1569: /* allocate space for values if not already there */
1570: if (!aij->saved_values) {
1571: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1572: }
1574: /* copy values over */
1575: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1576: return(0);
1577: }
1578: EXTERN_C_END
1580: EXTERN_C_BEGIN
1583: int MatRetrieveValues_SeqBAIJ(Mat mat)
1584: {
1585: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1586: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1589: if (aij->nonew != 1) {
1590: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1591: }
1592: if (!aij->saved_values) {
1593: SETERRQ(1,"Must call MatStoreValues(A);first");
1594: }
1596: /* copy values over */
1597: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1598: return(0);
1599: }
1600: EXTERN_C_END
1602: EXTERN_C_BEGIN
1603: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1604: EXTERN_C_END
1606: EXTERN_C_BEGIN
1609: int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1610: {
1611: Mat_SeqBAIJ *b;
1612: int i,len,ierr,mbs,nbs,bs2,newbs = bs;
1613: PetscTruth flg;
1617: B->preallocated = PETSC_TRUE;
1618: PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1619: if (nnz && newbs != bs) {
1620: SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1621: }
1622: bs = newbs;
1624: mbs = B->m/bs;
1625: nbs = B->n/bs;
1626: bs2 = bs*bs;
1628: if (mbs*bs!=B->m || nbs*bs!=B->n) {
1629: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1630: }
1632: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1633: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1634: if (nnz) {
1635: for (i=0; i<mbs; i++) {
1636: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1637: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1638: }
1639: }
1641: b = (Mat_SeqBAIJ*)B->data;
1642: PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1643: B->ops->solve = MatSolve_SeqBAIJ_Update;
1644: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
1645: if (!flg) {
1646: switch (bs) {
1647: case 1:
1648: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1649: B->ops->mult = MatMult_SeqBAIJ_1;
1650: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
1651: break;
1652: case 2:
1653: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1654: B->ops->mult = MatMult_SeqBAIJ_2;
1655: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
1656: break;
1657: case 3:
1658: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1659: B->ops->mult = MatMult_SeqBAIJ_3;
1660: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
1661: break;
1662: case 4:
1663: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1664: B->ops->mult = MatMult_SeqBAIJ_4;
1665: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
1666: break;
1667: case 5:
1668: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1669: B->ops->mult = MatMult_SeqBAIJ_5;
1670: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
1671: break;
1672: case 6:
1673: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1674: B->ops->mult = MatMult_SeqBAIJ_6;
1675: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
1676: break;
1677: case 7:
1678: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1679: B->ops->mult = MatMult_SeqBAIJ_7;
1680: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
1681: break;
1682: default:
1683: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1684: B->ops->mult = MatMult_SeqBAIJ_N;
1685: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1686: break;
1687: }
1688: }
1689: b->bs = bs;
1690: b->mbs = mbs;
1691: b->nbs = nbs;
1692: PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1693: if (!nnz) {
1694: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1695: else if (nz <= 0) nz = 1;
1696: for (i=0; i<mbs; i++) b->imax[i] = nz;
1697: nz = nz*mbs;
1698: } else {
1699: nz = 0;
1700: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1701: }
1703: /* allocate the matrix space */
1704: len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1705: PetscMalloc(len,&b->a);
1706: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1707: b->j = (int*)(b->a + nz*bs2);
1708: PetscMemzero(b->j,nz*sizeof(int));
1709: b->i = b->j + nz;
1710: b->singlemalloc = PETSC_TRUE;
1712: b->i[0] = 0;
1713: for (i=1; i<mbs+1; i++) {
1714: b->i[i] = b->i[i-1] + b->imax[i-1];
1715: }
1717: /* b->ilen will count nonzeros in each block row so far. */
1718: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1719: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1720: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1722: b->bs = bs;
1723: b->bs2 = bs2;
1724: b->mbs = mbs;
1725: b->nz = 0;
1726: b->maxnz = nz*bs2;
1727: B->info.nz_unneeded = (PetscReal)b->maxnz;
1728: return(0);
1729: }
1730: EXTERN_C_END
1732: /*MC
1733: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
1734: block sparse compressed row format.
1736: Options Database Keys:
1737: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
1739: Level: beginner
1741: .seealso: MatCreateSeqBAIJ
1742: M*/
1744: EXTERN_C_BEGIN
1747: int MatCreate_SeqBAIJ(Mat B)
1748: {
1749: int ierr,size;
1750: Mat_SeqBAIJ *b;
1753: MPI_Comm_size(B->comm,&size);
1754: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1756: B->m = B->M = PetscMax(B->m,B->M);
1757: B->n = B->N = PetscMax(B->n,B->N);
1758: PetscNew(Mat_SeqBAIJ,&b);
1759: B->data = (void*)b;
1760: PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1761: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1762: B->factor = 0;
1763: B->lupivotthreshold = 1.0;
1764: B->mapping = 0;
1765: b->row = 0;
1766: b->col = 0;
1767: b->icol = 0;
1768: b->reallocs = 0;
1769: b->saved_values = 0;
1770: #if defined(PETSC_USE_MAT_SINGLE)
1771: b->setvalueslen = 0;
1772: b->setvaluescopy = PETSC_NULL;
1773: #endif
1775: PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1776: PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);
1778: b->sorted = PETSC_FALSE;
1779: b->roworiented = PETSC_TRUE;
1780: b->nonew = 0;
1781: b->diag = 0;
1782: b->solve_work = 0;
1783: b->mult_work = 0;
1784: B->spptr = 0;
1785: B->info.nz_unneeded = (PetscReal)b->maxnz;
1786: b->keepzeroedrows = PETSC_FALSE;
1787: b->xtoy = 0;
1788: b->XtoY = 0;
1790: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1791: "MatStoreValues_SeqBAIJ",
1792: MatStoreValues_SeqBAIJ);
1793: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1794: "MatRetrieveValues_SeqBAIJ",
1795: MatRetrieveValues_SeqBAIJ);
1796: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1797: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1798: MatSeqBAIJSetColumnIndices_SeqBAIJ);
1799: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1800: "MatConvert_SeqBAIJ_SeqAIJ",
1801: MatConvert_SeqBAIJ_SeqAIJ);
1802: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1803: "MatSeqBAIJSetPreallocation_SeqBAIJ",
1804: MatSeqBAIJSetPreallocation_SeqBAIJ);
1805: return(0);
1806: }
1807: EXTERN_C_END
1811: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1812: {
1813: Mat C;
1814: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1815: int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1818: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1820: *B = 0;
1821: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1822: MatSetType(C,MATSEQBAIJ);
1823: c = (Mat_SeqBAIJ*)C->data;
1825: c->bs = a->bs;
1826: c->bs2 = a->bs2;
1827: c->mbs = a->mbs;
1828: c->nbs = a->nbs;
1829: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1831: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1832: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1833: for (i=0; i<mbs; i++) {
1834: c->imax[i] = a->imax[i];
1835: c->ilen[i] = a->ilen[i];
1836: }
1838: /* allocate the matrix space */
1839: c->singlemalloc = PETSC_TRUE;
1840: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1841: PetscMalloc(len,&c->a);
1842: c->j = (int*)(c->a + nz*bs2);
1843: c->i = c->j + nz;
1844: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1845: if (mbs > 0) {
1846: PetscMemcpy(c->j,a->j,nz*sizeof(int));
1847: if (cpvalues == MAT_COPY_VALUES) {
1848: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1849: } else {
1850: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1851: }
1852: }
1854: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1855: c->sorted = a->sorted;
1856: c->roworiented = a->roworiented;
1857: c->nonew = a->nonew;
1859: if (a->diag) {
1860: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1861: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1862: for (i=0; i<mbs; i++) {
1863: c->diag[i] = a->diag[i];
1864: }
1865: } else c->diag = 0;
1866: c->nz = a->nz;
1867: c->maxnz = a->maxnz;
1868: c->solve_work = 0;
1869: C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
1870: c->mult_work = 0;
1871: C->preallocated = PETSC_TRUE;
1872: C->assembled = PETSC_TRUE;
1873: *B = C;
1874: PetscFListDuplicate(A->qlist,&C->qlist);
1875: return(0);
1876: }
1880: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1881: {
1882: Mat_SeqBAIJ *a;
1883: Mat B;
1884: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1885: int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1886: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
1887: int *masked,nmask,tmp,bs2,ishift;
1888: PetscScalar *aa;
1889: MPI_Comm comm = ((PetscObject)viewer)->comm;
1892: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1893: bs2 = bs*bs;
1895: MPI_Comm_size(comm,&size);
1896: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1897: PetscViewerBinaryGetDescriptor(viewer,&fd);
1898: PetscBinaryRead(fd,header,4,PETSC_INT);
1899: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1900: M = header[1]; N = header[2]; nz = header[3];
1902: if (header[3] < 0) {
1903: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1904: }
1906: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1908: /*
1909: This code adds extra rows to make sure the number of rows is
1910: divisible by the blocksize
1911: */
1912: mbs = M/bs;
1913: extra_rows = bs - M + bs*(mbs);
1914: if (extra_rows == bs) extra_rows = 0;
1915: else mbs++;
1916: if (extra_rows) {
1917: PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1918: }
1920: /* read in row lengths */
1921: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1922: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1923: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1925: /* read in column indices */
1926: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1927: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1928: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1930: /* loop over row lengths determining block row lengths */
1931: PetscMalloc(mbs*sizeof(int),&browlengths);
1932: PetscMemzero(browlengths,mbs*sizeof(int));
1933: PetscMalloc(2*mbs*sizeof(int),&mask);
1934: PetscMemzero(mask,mbs*sizeof(int));
1935: masked = mask + mbs;
1936: rowcount = 0; nzcount = 0;
1937: for (i=0; i<mbs; i++) {
1938: nmask = 0;
1939: for (j=0; j<bs; j++) {
1940: kmax = rowlengths[rowcount];
1941: for (k=0; k<kmax; k++) {
1942: tmp = jj[nzcount++]/bs;
1943: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1944: }
1945: rowcount++;
1946: }
1947: browlengths[i] += nmask;
1948: /* zero out the mask elements we set */
1949: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1950: }
1952: /* create our matrix */
1953: MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1954: MatSetType(B,type);
1955: MatSeqBAIJSetPreallocation(B,bs,0,browlengths);
1956: a = (Mat_SeqBAIJ*)B->data;
1958: /* set matrix "i" values */
1959: a->i[0] = 0;
1960: for (i=1; i<= mbs; i++) {
1961: a->i[i] = a->i[i-1] + browlengths[i-1];
1962: a->ilen[i-1] = browlengths[i-1];
1963: }
1964: a->nz = 0;
1965: for (i=0; i<mbs; i++) a->nz += browlengths[i];
1967: /* read in nonzero values */
1968: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1969: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1970: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1972: /* set "a" and "j" values into matrix */
1973: nzcount = 0; jcount = 0;
1974: for (i=0; i<mbs; i++) {
1975: nzcountb = nzcount;
1976: nmask = 0;
1977: for (j=0; j<bs; j++) {
1978: kmax = rowlengths[i*bs+j];
1979: for (k=0; k<kmax; k++) {
1980: tmp = jj[nzcount++]/bs;
1981: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1982: }
1983: }
1984: /* sort the masked values */
1985: PetscSortInt(nmask,masked);
1987: /* set "j" values into matrix */
1988: maskcount = 1;
1989: for (j=0; j<nmask; j++) {
1990: a->j[jcount++] = masked[j];
1991: mask[masked[j]] = maskcount++;
1992: }
1993: /* set "a" values into matrix */
1994: ishift = bs2*a->i[i];
1995: for (j=0; j<bs; j++) {
1996: kmax = rowlengths[i*bs+j];
1997: for (k=0; k<kmax; k++) {
1998: tmp = jj[nzcountb]/bs ;
1999: block = mask[tmp] - 1;
2000: point = jj[nzcountb] - bs*tmp;
2001: idx = ishift + bs2*block + j + bs*point;
2002: a->a[idx] = (MatScalar)aa[nzcountb++];
2003: }
2004: }
2005: /* zero out the mask elements we set */
2006: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2007: }
2008: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2010: PetscFree(rowlengths);
2011: PetscFree(browlengths);
2012: PetscFree(aa);
2013: PetscFree(jj);
2014: PetscFree(mask);
2016: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2017: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2018: MatView_Private(B);
2020: *A = B;
2021: return(0);
2022: }
2026: /*@C
2027: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2028: compressed row) format. For good matrix assembly performance the
2029: user should preallocate the matrix storage by setting the parameter nz
2030: (or the array nnz). By setting these parameters accurately, performance
2031: during matrix assembly can be increased by more than a factor of 50.
2033: Collective on MPI_Comm
2035: Input Parameters:
2036: + comm - MPI communicator, set to PETSC_COMM_SELF
2037: . bs - size of block
2038: . m - number of rows
2039: . n - number of columns
2040: . nz - number of nonzero blocks per block row (same for all rows)
2041: - nnz - array containing the number of nonzero blocks in the various block rows
2042: (possibly different for each block row) or PETSC_NULL
2044: Output Parameter:
2045: . A - the matrix
2047: Options Database Keys:
2048: . -mat_no_unroll - uses code that does not unroll the loops in the
2049: block calculations (much slower)
2050: . -mat_block_size - size of the blocks to use
2052: Level: intermediate
2054: Notes:
2055: A nonzero block is any block that as 1 or more nonzeros in it
2057: The block AIJ format is fully compatible with standard Fortran 77
2058: storage. That is, the stored row and column indices can begin at
2059: either one (as in Fortran) or zero. See the users' manual for details.
2061: Specify the preallocated storage with either nz or nnz (not both).
2062: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2063: allocation. For additional details, see the users manual chapter on
2064: matrices.
2066: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2067: @*/
2068: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2069: {
2071:
2073: MatCreate(comm,m,n,m,n,A);
2074: MatSetType(*A,MATSEQBAIJ);
2075: MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
2076: return(0);
2077: }
2081: /*@C
2082: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2083: per row in the matrix. For good matrix assembly performance the
2084: user should preallocate the matrix storage by setting the parameter nz
2085: (or the array nnz). By setting these parameters accurately, performance
2086: during matrix assembly can be increased by more than a factor of 50.
2088: Collective on MPI_Comm
2090: Input Parameters:
2091: + A - the matrix
2092: . bs - size of block
2093: . nz - number of block nonzeros per block row (same for all rows)
2094: - nnz - array containing the number of block nonzeros in the various block rows
2095: (possibly different for each block row) or PETSC_NULL
2097: Options Database Keys:
2098: . -mat_no_unroll - uses code that does not unroll the loops in the
2099: block calculations (much slower)
2100: . -mat_block_size - size of the blocks to use
2102: Level: intermediate
2104: Notes:
2105: The block AIJ format is fully compatible with standard Fortran 77
2106: storage. That is, the stored row and column indices can begin at
2107: either one (as in Fortran) or zero. See the users' manual for details.
2109: Specify the preallocated storage with either nz or nnz (not both).
2110: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2111: allocation. For additional details, see the users manual chapter on
2112: matrices.
2114: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2115: @*/
2116: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2117: {
2118: int ierr,(*f)(Mat,int,int,const int[]);
2121: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2122: if (f) {
2123: (*f)(B,bs,nz,nnz);
2124: }
2125: return(0);
2126: }