Actual source code: aijnode.c
1: /*
2: This file provides high performance routines for the AIJ (compressed row)
3: format by taking advantage of rows with identical nonzero structure (I-nodes).
4: */
5: #include src/mat/impls/aij/seq/aij.h
7: EXTERN PetscErrorCode Mat_AIJ_CheckInode(Mat,PetscTruth);
8: EXTERN PetscErrorCode MatSolve_SeqAIJ_Inode(Mat,Vec,Vec);
9: EXTERN PetscErrorCode MatLUFactorNumeric_SeqAIJ_Inode(Mat,Mat *);
11: EXTERN PetscErrorCode MatMult_SeqAIJ(Mat,Vec,Vec);
12: EXTERN PetscErrorCode MatMultAdd_SeqAIJ(Mat,Vec,Vec,Vec);
13: EXTERN PetscErrorCode MatSolve_SeqAIJ(Mat,Vec,Vec);
14: EXTERN PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat,Mat*);
15: EXTERN PetscErrorCode MatGetRowIJ_SeqAIJ(Mat,PetscInt,PetscTruth,PetscInt*,PetscInt*[],PetscInt*[],PetscTruth*);
16: EXTERN PetscErrorCode MatRestoreRowIJ_SeqAIJ(Mat,PetscInt,PetscTruth,PetscInt*,PetscInt*[],PetscInt*[],PetscTruth*);
17: EXTERN PetscErrorCode MatGetColumnIJ_SeqAIJ(Mat,PetscInt,PetscTruth,PetscInt*,PetscInt*[],PetscInt*[],PetscTruth*);
18: EXTERN PetscErrorCode MatRestoreColumnIJ_SeqAIJ(Mat,PetscInt,PetscTruth,PetscInt*,PetscInt*[],PetscInt*[],PetscTruth*);
22: static PetscErrorCode Mat_AIJ_CreateColInode(Mat A,PetscInt* size,PetscInt ** ns)
23: {
24: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
26: PetscInt i,count,m,n,min_mn,*ns_row,*ns_col;
29: n = A->n;
30: m = A->m;
31: ns_row = a->inode.size;
32:
33: min_mn = (m < n) ? m : n;
34: if (!ns) {
35: for (count=0,i=0; count<min_mn; count+=ns_row[i],i++);
36: for(; count+1 < n; count++,i++);
37: if (count < n) {
38: i++;
39: }
40: *size = i;
41: return(0);
42: }
43: PetscMalloc((n+1)*sizeof(PetscInt),&ns_col);
44:
45: /* Use the same row structure wherever feasible. */
46: for (count=0,i=0; count<min_mn; count+=ns_row[i],i++) {
47: ns_col[i] = ns_row[i];
48: }
50: /* if m < n; pad up the remainder with inode_limit */
51: for(; count+1 < n; count++,i++) {
52: ns_col[i] = 1;
53: }
54: /* The last node is the odd ball. padd it up with the remaining rows; */
55: if (count < n) {
56: ns_col[i] = n - count;
57: i++;
58: } else if (count > n) {
59: /* Adjust for the over estimation */
60: ns_col[i-1] += n - count;
61: }
62: *size = i;
63: *ns = ns_col;
64: return(0);
65: }
68: /*
69: This builds symmetric version of nonzero structure,
70: */
73: static PetscErrorCode MatGetRowIJ_SeqAIJ_Inode_Symmetric(Mat A,PetscInt *iia[],PetscInt *jja[],PetscInt ishift,PetscInt oshift)
74: {
75: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
77: PetscInt *work,*ia,*ja,*j,nz,nslim_row,nslim_col,m,row,col,*jmax,n;
78: PetscInt *tns,*tvc,*ns_row = a->inode.size,*ns_col,nsz,i1,i2,*ai= a->i,*aj = a->j;
81: nslim_row = a->inode.node_count;
82: m = A->m;
83: n = A->n;
84: if (m != n) SETERRQ(PETSC_ERR_SUP,"MatGetRowIJ_SeqAIJ_Inode_Symmetric: Matrix shoul be square");
85:
86: /* Use the row_inode as column_inode */
87: nslim_col = nslim_row;
88: ns_col = ns_row;
90: /* allocate space for reformated inode structure */
91: PetscMalloc((nslim_col+1)*sizeof(PetscInt),&tns);
92: PetscMalloc((n+1)*sizeof(PetscInt),&tvc);
93: for (i1=0,tns[0]=0; i1<nslim_col; ++i1) tns[i1+1] = tns[i1]+ ns_row[i1];
95: for (i1=0,col=0; i1<nslim_col; ++i1){
96: nsz = ns_col[i1];
97: for (i2=0; i2<nsz; ++i2,++col)
98: tvc[col] = i1;
99: }
100: /* allocate space for row pointers */
101: PetscMalloc((nslim_row+1)*sizeof(PetscInt),&ia);
102: *iia = ia;
103: PetscMemzero(ia,(nslim_row+1)*sizeof(PetscInt));
104: PetscMalloc((nslim_row+1)*sizeof(PetscInt),&work);
106: /* determine the number of columns in each row */
107: ia[0] = oshift;
108: for (i1=0,row=0 ; i1<nslim_row; row+=ns_row[i1],i1++) {
110: j = aj + ai[row] + ishift;
111: jmax = aj + ai[row+1] + ishift;
112: i2 = 0;
113: col = *j++ + ishift;
114: i2 = tvc[col];
115: while (i2<i1 && j<jmax) { /* 1.[-xx-d-xx--] 2.[-xx-------],off-diagonal elemets */
116: ia[i1+1]++;
117: ia[i2+1]++;
118: i2++; /* Start col of next node */
119: while(((col=*j+ishift)<tns[i2]) && (j<jmax)) ++j;
120: i2 = tvc[col];
121: }
122: if(i2 == i1) ia[i2+1]++; /* now the diagonal element */
123: }
125: /* shift ia[i] to point to next row */
126: for (i1=1; i1<nslim_row+1; i1++) {
127: row = ia[i1-1];
128: ia[i1] += row;
129: work[i1-1] = row - oshift;
130: }
132: /* allocate space for column pointers */
133: nz = ia[nslim_row] + (!ishift);
134: PetscMalloc(nz*sizeof(PetscInt),&ja);
135: *jja = ja;
137: /* loop over lower triangular part putting into ja */
138: for (i1=0,row=0; i1<nslim_row; row += ns_row[i1],i1++) {
139: j = aj + ai[row] + ishift;
140: jmax = aj + ai[row+1] + ishift;
141: i2 = 0; /* Col inode index */
142: col = *j++ + ishift;
143: i2 = tvc[col];
144: while (i2<i1 && j<jmax) {
145: ja[work[i2]++] = i1 + oshift;
146: ja[work[i1]++] = i2 + oshift;
147: ++i2;
148: while(((col=*j+ishift)< tns[i2])&&(j<jmax)) ++j; /* Skip rest col indices in this node */
149: i2 = tvc[col];
150: }
151: if (i2 == i1) ja[work[i1]++] = i2 + oshift;
153: }
154: PetscFree(work);
155: PetscFree(tns);
156: PetscFree(tvc);
157: return(0);
158: }
160: /*
161: This builds nonsymmetric version of nonzero structure,
162: */
165: static PetscErrorCode MatGetRowIJ_SeqAIJ_Inode_Nonsymmetric(Mat A,PetscInt *iia[],PetscInt *jja[],PetscInt ishift,PetscInt oshift)
166: {
167: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
169: PetscInt *work,*ia,*ja,*j,nz,nslim_row,n,row,col,*ns_col,nslim_col;
170: PetscInt *tns,*tvc,*ns_row = a->inode.size,nsz,i1,i2,*ai= a->i,*aj = a->j;
173: nslim_row = a->inode.node_count;
174: n = A->n;
176: /* Create The column_inode for this matrix */
177: Mat_AIJ_CreateColInode(A,&nslim_col,&ns_col);
178:
179: /* allocate space for reformated column_inode structure */
180: PetscMalloc((nslim_col +1)*sizeof(PetscInt),&tns);
181: PetscMalloc((n +1)*sizeof(PetscInt),&tvc);
182: for (i1=0,tns[0]=0; i1<nslim_col; ++i1) tns[i1+1] = tns[i1] + ns_col[i1];
184: for (i1=0,col=0; i1<nslim_col; ++i1){
185: nsz = ns_col[i1];
186: for (i2=0; i2<nsz; ++i2,++col)
187: tvc[col] = i1;
188: }
189: /* allocate space for row pointers */
190: PetscMalloc((nslim_row+1)*sizeof(PetscInt),&ia);
191: *iia = ia;
192: PetscMemzero(ia,(nslim_row+1)*sizeof(PetscInt));
193: PetscMalloc((nslim_row+1)*sizeof(PetscInt),&work);
195: /* determine the number of columns in each row */
196: ia[0] = oshift;
197: for (i1=0,row=0; i1<nslim_row; row+=ns_row[i1],i1++) {
198: j = aj + ai[row] + ishift;
199: col = *j++ + ishift;
200: i2 = tvc[col];
201: nz = ai[row+1] - ai[row];
202: while (nz-- > 0) { /* off-diagonal elemets */
203: ia[i1+1]++;
204: i2++; /* Start col of next node */
205: while (((col = *j++ + ishift) < tns[i2]) && nz > 0) {nz--;}
206: i2 = tvc[col];
207: }
208: }
210: /* shift ia[i] to point to next row */
211: for (i1=1; i1<nslim_row+1; i1++) {
212: row = ia[i1-1];
213: ia[i1] += row;
214: work[i1-1] = row - oshift;
215: }
217: /* allocate space for column pointers */
218: nz = ia[nslim_row] + (!ishift);
219: PetscMalloc(nz*sizeof(PetscInt),&ja);
220: *jja = ja;
222: /* loop over matrix putting into ja */
223: for (i1=0,row=0; i1<nslim_row; row+=ns_row[i1],i1++) {
224: j = aj + ai[row] + ishift;
225: i2 = 0; /* Col inode index */
226: col = *j++ + ishift;
227: i2 = tvc[col];
228: nz = ai[row+1] - ai[row];
229: while (nz-- > 0) {
230: ja[work[i1]++] = i2 + oshift;
231: ++i2;
232: while(((col = *j++ + ishift) < tns[i2]) && nz > 0) {nz--;}
233: i2 = tvc[col];
234: }
235: }
236: PetscFree(ns_col);
237: PetscFree(work);
238: PetscFree(tns);
239: PetscFree(tvc);
240: return(0);
241: }
245: static PetscErrorCode MatGetRowIJ_SeqAIJ_Inode(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
246: {
247: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
251: *n = a->inode.node_count;
252: if (!ia) return(0);
254: if (symmetric) {
255: MatGetRowIJ_SeqAIJ_Inode_Symmetric(A,ia,ja,0,oshift);
256: } else {
257: MatGetRowIJ_SeqAIJ_Inode_Nonsymmetric(A,ia,ja,0,oshift);
258: }
259: return(0);
260: }
264: static PetscErrorCode MatRestoreRowIJ_SeqAIJ_Inode(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
265: {
269: if (!ia) return(0);
270: PetscFree(*ia);
271: PetscFree(*ja);
272: return(0);
273: }
275: /* ----------------------------------------------------------- */
279: static PetscErrorCode MatGetColumnIJ_SeqAIJ_Inode_Nonsymmetric(Mat A,PetscInt *iia[],PetscInt *jja[],PetscInt ishift,PetscInt oshift)
280: {
281: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
283: PetscInt *work,*ia,*ja,*j,nz,nslim_row, n,row,col,*ns_col,nslim_col;
284: PetscInt *tns,*tvc,*ns_row = a->inode.size,nsz,i1,i2,*ai= a->i,*aj = a->j;
287: nslim_row = a->inode.node_count;
288: n = A->n;
290: /* Create The column_inode for this matrix */
291: Mat_AIJ_CreateColInode(A,&nslim_col,&ns_col);
292:
293: /* allocate space for reformated column_inode structure */
294: PetscMalloc((nslim_col + 1)*sizeof(PetscInt),&tns);
295: PetscMalloc((n + 1)*sizeof(PetscInt),&tvc);
296: for (i1=0,tns[0]=0; i1<nslim_col; ++i1) tns[i1+1] = tns[i1] + ns_col[i1];
298: for (i1=0,col=0; i1<nslim_col; ++i1){
299: nsz = ns_col[i1];
300: for (i2=0; i2<nsz; ++i2,++col)
301: tvc[col] = i1;
302: }
303: /* allocate space for column pointers */
304: PetscMalloc((nslim_col+1)*sizeof(PetscInt),&ia);
305: *iia = ia;
306: PetscMemzero(ia,(nslim_col+1)*sizeof(PetscInt));
307: PetscMalloc((nslim_col+1)*sizeof(PetscInt),&work);
309: /* determine the number of columns in each row */
310: ia[0] = oshift;
311: for (i1=0,row=0; i1<nslim_row; row+=ns_row[i1],i1++) {
312: j = aj + ai[row] + ishift;
313: col = *j++ + ishift;
314: i2 = tvc[col];
315: nz = ai[row+1] - ai[row];
316: while (nz-- > 0) { /* off-diagonal elemets */
317: /* ia[i1+1]++; */
318: ia[i2+1]++;
319: i2++;
320: while (((col = *j++ + ishift) < tns[i2]) && nz > 0) {nz--;}
321: i2 = tvc[col];
322: }
323: }
325: /* shift ia[i] to point to next col */
326: for (i1=1; i1<nslim_col+1; i1++) {
327: col = ia[i1-1];
328: ia[i1] += col;
329: work[i1-1] = col - oshift;
330: }
332: /* allocate space for column pointers */
333: nz = ia[nslim_col] + (!ishift);
334: PetscMalloc(nz*sizeof(PetscInt),&ja);
335: *jja = ja;
337: /* loop over matrix putting into ja */
338: for (i1=0,row=0; i1<nslim_row; row+=ns_row[i1],i1++) {
339: j = aj + ai[row] + ishift;
340: i2 = 0; /* Col inode index */
341: col = *j++ + ishift;
342: i2 = tvc[col];
343: nz = ai[row+1] - ai[row];
344: while (nz-- > 0) {
345: /* ja[work[i1]++] = i2 + oshift; */
346: ja[work[i2]++] = i1 + oshift;
347: i2++;
348: while(((col = *j++ + ishift) < tns[i2]) && nz > 0) {nz--;}
349: i2 = tvc[col];
350: }
351: }
352: PetscFree(ns_col);
353: PetscFree(work);
354: PetscFree(tns);
355: PetscFree(tvc);
356: return(0);
357: }
361: static PetscErrorCode MatGetColumnIJ_SeqAIJ_Inode(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
362: {
366: Mat_AIJ_CreateColInode(A,n,PETSC_NULL);
367: if (!ia) return(0);
369: if (symmetric) {
370: /* Since the indices are symmetric it does'nt matter */
371: MatGetRowIJ_SeqAIJ_Inode_Symmetric(A,ia,ja,0,oshift);
372: } else {
373: MatGetColumnIJ_SeqAIJ_Inode_Nonsymmetric(A,ia,ja,0,oshift);
374: }
375: return(0);
376: }
380: static PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Inode(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
381: {
385: if (!ia) return(0);
386: PetscFree(*ia);
387: PetscFree(*ja);
388: return(0);
389: }
391: /* ----------------------------------------------------------- */
395: static PetscErrorCode MatMult_SeqAIJ_Inode(Mat A,Vec xx,Vec yy)
396: {
397: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
398: PetscScalar sum1,sum2,sum3,sum4,sum5,tmp0,tmp1;
399: PetscScalar *v1,*v2,*v3,*v4,*v5,*x,*y;
401: PetscInt *idx,i1,i2,n,i,row,node_max,*ns,*ii,nsz,sz;
402:
403: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
404: #pragma disjoint(*x,*y,*v1,*v2,*v3,*v4,*v5)
405: #endif
408: if (!a->inode.size) SETERRQ(PETSC_ERR_COR,"Missing Inode Structure");
409: node_max = a->inode.node_count;
410: ns = a->inode.size; /* Node Size array */
411: VecGetArray(xx,&x);
412: VecGetArray(yy,&y);
413: idx = a->j;
414: v1 = a->a;
415: ii = a->i;
417: for (i = 0,row = 0; i< node_max; ++i){
418: nsz = ns[i];
419: n = ii[1] - ii[0];
420: ii += nsz;
421: sz = n; /* No of non zeros in this row */
422: /* Switch on the size of Node */
423: switch (nsz){ /* Each loop in 'case' is unrolled */
424: case 1 :
425: sum1 = 0;
426:
427: for(n = 0; n< sz-1; n+=2) {
428: i1 = idx[0]; /* The instructions are ordered to */
429: i2 = idx[1]; /* make the compiler's job easy */
430: idx += 2;
431: tmp0 = x[i1];
432: tmp1 = x[i2];
433: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
434: }
435:
436: if (n == sz-1){ /* Take care of the last nonzero */
437: tmp0 = x[*idx++];
438: sum1 += *v1++ * tmp0;
439: }
440: y[row++]=sum1;
441: break;
442: case 2:
443: sum1 = 0;
444: sum2 = 0;
445: v2 = v1 + n;
446:
447: for (n = 0; n< sz-1; n+=2) {
448: i1 = idx[0];
449: i2 = idx[1];
450: idx += 2;
451: tmp0 = x[i1];
452: tmp1 = x[i2];
453: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
454: sum2 += v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
455: }
456: if (n == sz-1){
457: tmp0 = x[*idx++];
458: sum1 += *v1++ * tmp0;
459: sum2 += *v2++ * tmp0;
460: }
461: y[row++]=sum1;
462: y[row++]=sum2;
463: v1 =v2; /* Since the next block to be processed starts there*/
464: idx +=sz;
465: break;
466: case 3:
467: sum1 = 0;
468: sum2 = 0;
469: sum3 = 0;
470: v2 = v1 + n;
471: v3 = v2 + n;
472:
473: for (n = 0; n< sz-1; n+=2) {
474: i1 = idx[0];
475: i2 = idx[1];
476: idx += 2;
477: tmp0 = x[i1];
478: tmp1 = x[i2];
479: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
480: sum2 += v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
481: sum3 += v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
482: }
483: if (n == sz-1){
484: tmp0 = x[*idx++];
485: sum1 += *v1++ * tmp0;
486: sum2 += *v2++ * tmp0;
487: sum3 += *v3++ * tmp0;
488: }
489: y[row++]=sum1;
490: y[row++]=sum2;
491: y[row++]=sum3;
492: v1 =v3; /* Since the next block to be processed starts there*/
493: idx +=2*sz;
494: break;
495: case 4:
496: sum1 = 0;
497: sum2 = 0;
498: sum3 = 0;
499: sum4 = 0;
500: v2 = v1 + n;
501: v3 = v2 + n;
502: v4 = v3 + n;
503:
504: for (n = 0; n< sz-1; n+=2) {
505: i1 = idx[0];
506: i2 = idx[1];
507: idx += 2;
508: tmp0 = x[i1];
509: tmp1 = x[i2];
510: sum1 += v1[0] * tmp0 + v1[1] *tmp1; v1 += 2;
511: sum2 += v2[0] * tmp0 + v2[1] *tmp1; v2 += 2;
512: sum3 += v3[0] * tmp0 + v3[1] *tmp1; v3 += 2;
513: sum4 += v4[0] * tmp0 + v4[1] *tmp1; v4 += 2;
514: }
515: if (n == sz-1){
516: tmp0 = x[*idx++];
517: sum1 += *v1++ * tmp0;
518: sum2 += *v2++ * tmp0;
519: sum3 += *v3++ * tmp0;
520: sum4 += *v4++ * tmp0;
521: }
522: y[row++]=sum1;
523: y[row++]=sum2;
524: y[row++]=sum3;
525: y[row++]=sum4;
526: v1 =v4; /* Since the next block to be processed starts there*/
527: idx +=3*sz;
528: break;
529: case 5:
530: sum1 = 0;
531: sum2 = 0;
532: sum3 = 0;
533: sum4 = 0;
534: sum5 = 0;
535: v2 = v1 + n;
536: v3 = v2 + n;
537: v4 = v3 + n;
538: v5 = v4 + n;
539:
540: for (n = 0; n<sz-1; n+=2) {
541: i1 = idx[0];
542: i2 = idx[1];
543: idx += 2;
544: tmp0 = x[i1];
545: tmp1 = x[i2];
546: sum1 += v1[0] * tmp0 + v1[1] *tmp1; v1 += 2;
547: sum2 += v2[0] * tmp0 + v2[1] *tmp1; v2 += 2;
548: sum3 += v3[0] * tmp0 + v3[1] *tmp1; v3 += 2;
549: sum4 += v4[0] * tmp0 + v4[1] *tmp1; v4 += 2;
550: sum5 += v5[0] * tmp0 + v5[1] *tmp1; v5 += 2;
551: }
552: if (n == sz-1){
553: tmp0 = x[*idx++];
554: sum1 += *v1++ * tmp0;
555: sum2 += *v2++ * tmp0;
556: sum3 += *v3++ * tmp0;
557: sum4 += *v4++ * tmp0;
558: sum5 += *v5++ * tmp0;
559: }
560: y[row++]=sum1;
561: y[row++]=sum2;
562: y[row++]=sum3;
563: y[row++]=sum4;
564: y[row++]=sum5;
565: v1 =v5; /* Since the next block to be processed starts there */
566: idx +=4*sz;
567: break;
568: default :
569: SETERRQ(PETSC_ERR_COR,"Node size not yet supported");
570: }
571: }
572: VecRestoreArray(xx,&x);
573: VecRestoreArray(yy,&y);
574: PetscLogFlops(2*a->nz - A->m);
575: return(0);
576: }
577: /* ----------------------------------------------------------- */
578: /* Almost same code as the MatMult_SeqAij_Inode() */
581: static PetscErrorCode MatMultAdd_SeqAIJ_Inode(Mat A,Vec xx,Vec zz,Vec yy)
582: {
583: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
584: PetscScalar sum1,sum2,sum3,sum4,sum5,tmp0,tmp1;
585: PetscScalar *v1,*v2,*v3,*v4,*v5,*x,*y,*z,*zt;
587: PetscInt *idx,i1,i2,n,i,row,node_max,*ns,*ii,nsz,sz;
588:
590: if (!a->inode.size) SETERRQ(PETSC_ERR_COR,"Missing Inode Structure");
591: node_max = a->inode.node_count;
592: ns = a->inode.size; /* Node Size array */
593: VecGetArray(xx,&x);
594: VecGetArray(yy,&y);
595: if (zz != yy) {
596: VecGetArray(zz,&z);
597: } else {
598: z = y;
599: }
600: zt = z;
602: idx = a->j;
603: v1 = a->a;
604: ii = a->i;
606: for (i = 0,row = 0; i< node_max; ++i){
607: nsz = ns[i];
608: n = ii[1] - ii[0];
609: ii += nsz;
610: sz = n; /* No of non zeros in this row */
611: /* Switch on the size of Node */
612: switch (nsz){ /* Each loop in 'case' is unrolled */
613: case 1 :
614: sum1 = *zt++;
615:
616: for(n = 0; n< sz-1; n+=2) {
617: i1 = idx[0]; /* The instructions are ordered to */
618: i2 = idx[1]; /* make the compiler's job easy */
619: idx += 2;
620: tmp0 = x[i1];
621: tmp1 = x[i2];
622: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
623: }
624:
625: if(n == sz-1){ /* Take care of the last nonzero */
626: tmp0 = x[*idx++];
627: sum1 += *v1++ * tmp0;
628: }
629: y[row++]=sum1;
630: break;
631: case 2:
632: sum1 = *zt++;
633: sum2 = *zt++;
634: v2 = v1 + n;
635:
636: for(n = 0; n< sz-1; n+=2) {
637: i1 = idx[0];
638: i2 = idx[1];
639: idx += 2;
640: tmp0 = x[i1];
641: tmp1 = x[i2];
642: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
643: sum2 += v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
644: }
645: if(n == sz-1){
646: tmp0 = x[*idx++];
647: sum1 += *v1++ * tmp0;
648: sum2 += *v2++ * tmp0;
649: }
650: y[row++]=sum1;
651: y[row++]=sum2;
652: v1 =v2; /* Since the next block to be processed starts there*/
653: idx +=sz;
654: break;
655: case 3:
656: sum1 = *zt++;
657: sum2 = *zt++;
658: sum3 = *zt++;
659: v2 = v1 + n;
660: v3 = v2 + n;
661:
662: for (n = 0; n< sz-1; n+=2) {
663: i1 = idx[0];
664: i2 = idx[1];
665: idx += 2;
666: tmp0 = x[i1];
667: tmp1 = x[i2];
668: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
669: sum2 += v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
670: sum3 += v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
671: }
672: if (n == sz-1){
673: tmp0 = x[*idx++];
674: sum1 += *v1++ * tmp0;
675: sum2 += *v2++ * tmp0;
676: sum3 += *v3++ * tmp0;
677: }
678: y[row++]=sum1;
679: y[row++]=sum2;
680: y[row++]=sum3;
681: v1 =v3; /* Since the next block to be processed starts there*/
682: idx +=2*sz;
683: break;
684: case 4:
685: sum1 = *zt++;
686: sum2 = *zt++;
687: sum3 = *zt++;
688: sum4 = *zt++;
689: v2 = v1 + n;
690: v3 = v2 + n;
691: v4 = v3 + n;
692:
693: for (n = 0; n< sz-1; n+=2) {
694: i1 = idx[0];
695: i2 = idx[1];
696: idx += 2;
697: tmp0 = x[i1];
698: tmp1 = x[i2];
699: sum1 += v1[0] * tmp0 + v1[1] *tmp1; v1 += 2;
700: sum2 += v2[0] * tmp0 + v2[1] *tmp1; v2 += 2;
701: sum3 += v3[0] * tmp0 + v3[1] *tmp1; v3 += 2;
702: sum4 += v4[0] * tmp0 + v4[1] *tmp1; v4 += 2;
703: }
704: if (n == sz-1){
705: tmp0 = x[*idx++];
706: sum1 += *v1++ * tmp0;
707: sum2 += *v2++ * tmp0;
708: sum3 += *v3++ * tmp0;
709: sum4 += *v4++ * tmp0;
710: }
711: y[row++]=sum1;
712: y[row++]=sum2;
713: y[row++]=sum3;
714: y[row++]=sum4;
715: v1 =v4; /* Since the next block to be processed starts there*/
716: idx +=3*sz;
717: break;
718: case 5:
719: sum1 = *zt++;
720: sum2 = *zt++;
721: sum3 = *zt++;
722: sum4 = *zt++;
723: sum5 = *zt++;
724: v2 = v1 + n;
725: v3 = v2 + n;
726: v4 = v3 + n;
727: v5 = v4 + n;
728:
729: for (n = 0; n<sz-1; n+=2) {
730: i1 = idx[0];
731: i2 = idx[1];
732: idx += 2;
733: tmp0 = x[i1];
734: tmp1 = x[i2];
735: sum1 += v1[0] * tmp0 + v1[1] *tmp1; v1 += 2;
736: sum2 += v2[0] * tmp0 + v2[1] *tmp1; v2 += 2;
737: sum3 += v3[0] * tmp0 + v3[1] *tmp1; v3 += 2;
738: sum4 += v4[0] * tmp0 + v4[1] *tmp1; v4 += 2;
739: sum5 += v5[0] * tmp0 + v5[1] *tmp1; v5 += 2;
740: }
741: if(n == sz-1){
742: tmp0 = x[*idx++];
743: sum1 += *v1++ * tmp0;
744: sum2 += *v2++ * tmp0;
745: sum3 += *v3++ * tmp0;
746: sum4 += *v4++ * tmp0;
747: sum5 += *v5++ * tmp0;
748: }
749: y[row++]=sum1;
750: y[row++]=sum2;
751: y[row++]=sum3;
752: y[row++]=sum4;
753: y[row++]=sum5;
754: v1 =v5; /* Since the next block to be processed starts there */
755: idx +=4*sz;
756: break;
757: default :
758: SETERRQ(PETSC_ERR_COR,"Node size not yet supported");
759: }
760: }
761: VecRestoreArray(xx,&x);
762: VecRestoreArray(yy,&y);
763: if (zz != yy) {
764: VecRestoreArray(zz,&z);
765: }
766: PetscLogFlops(2*a->nz);
767: return(0);
768: }
769: /* ----------------------------------------------------------- */
770: EXTERN PetscErrorCode MatColoringPatch_SeqAIJ_Inode(Mat,PetscInt,PetscInt,ISColoringValue[],ISColoring *);
772: /*
773: samestructure indicates that the matrix has not changed its nonzero structure so we
774: do not need to recompute the inodes
775: */
778: PetscErrorCode Mat_AIJ_CheckInode(Mat A,PetscTruth samestructure)
779: {
780: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
782: PetscInt i,j,m,nzx,nzy,*idx,*idy,*ns,*ii,node_count,blk_size;
783: PetscTruth flag,flg;
786: if (samestructure && a->inode.checked) return(0);
788: a->inode.checked = PETSC_TRUE;
790: /* Notes: We set a->inode.limit=5 in MatCreateSeqAIJ(). */
791: if (!a->inode.use) {PetscLogInfo(A,"Mat_AIJ_CheckInode: Not using Inode routines due to MatSetOption(MAT_DO_NOT_USE_INODES\n"); return(0);}
792: PetscOptionsHasName(A->prefix,"-mat_aij_no_inode",&flg);
793: if (flg) {PetscLogInfo(A,"Mat_AIJ_CheckInode: Not using Inode routines due to -mat_aij_no_inode\n");return(0);}
794: PetscOptionsHasName(A->prefix,"-mat_no_unroll",&flg);
795: if (flg) {PetscLogInfo(A,"Mat_AIJ_CheckInode: Not using Inode routines due to -mat_no_unroll\n");return(0);}
796: PetscOptionsGetInt(A->prefix,"-mat_aij_inode_limit",&a->inode.limit,PETSC_NULL);
797: if (a->inode.limit > a->inode.max_limit) a->inode.limit = a->inode.max_limit;
798: m = A->m;
799: if (a->inode.size) {ns = a->inode.size;}
800: else {PetscMalloc((m+1)*sizeof(PetscInt),&ns);}
802: i = 0;
803: node_count = 0;
804: idx = a->j;
805: ii = a->i;
806: while (i < m){ /* For each row */
807: nzx = ii[i+1] - ii[i]; /* Number of nonzeros */
808: /* Limits the number of elements in a node to 'a->inode.limit' */
809: for (j=i+1,idy=idx,blk_size=1; j<m && blk_size <a->inode.limit; ++j,++blk_size) {
810: nzy = ii[j+1] - ii[j]; /* Same number of nonzeros */
811: if (nzy != nzx) break;
812: idy += nzx; /* Same nonzero pattern */
813: PetscMemcmp(idx,idy,nzx*sizeof(PetscInt),&flag);
814: if (flag == PETSC_FALSE) break;
815: }
816: ns[node_count++] = blk_size;
817: idx += blk_size*nzx;
818: i = j;
819: }
820: /* If not enough inodes found,, do not use inode version of the routines */
821: if (!a->inode.size && m && node_count > 0.9*m) {
822: PetscFree(ns);
823: A->ops->mult = MatMult_SeqAIJ;
824: A->ops->multadd = MatMultAdd_SeqAIJ;
825: A->ops->solve = MatSolve_SeqAIJ;
826: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
827: A->ops->getrowij = MatGetRowIJ_SeqAIJ;
828: A->ops->restorerowij = MatRestoreRowIJ_SeqAIJ;
829: A->ops->getcolumnij = MatGetColumnIJ_SeqAIJ;
830: A->ops->restorecolumnij = MatRestoreColumnIJ_SeqAIJ;
831: A->ops->coloringpatch = 0;
832: a->inode.node_count = 0;
833: a->inode.size = 0;
834: PetscLogInfo(A,"Mat_AIJ_CheckInode: Found %D nodes out of %D rows. Not using Inode routines\n",node_count,m);
835: } else {
836: A->ops->mult = MatMult_SeqAIJ_Inode;
837: A->ops->multadd = MatMultAdd_SeqAIJ_Inode;
838: A->ops->solve = MatSolve_SeqAIJ_Inode;
839: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
840: A->ops->getrowij = MatGetRowIJ_SeqAIJ_Inode;
841: A->ops->restorerowij = MatRestoreRowIJ_SeqAIJ_Inode;
842: A->ops->getcolumnij = MatGetColumnIJ_SeqAIJ_Inode;
843: A->ops->restorecolumnij = MatRestoreColumnIJ_SeqAIJ_Inode;
844: A->ops->coloringpatch = MatColoringPatch_SeqAIJ_Inode;
845: a->inode.node_count = node_count;
846: a->inode.size = ns;
847: PetscLogInfo(A,"Mat_AIJ_CheckInode: Found %D nodes of %D. Limit used: %D. Using Inode routines\n",node_count,m,a->inode.limit);
848: }
849: return(0);
850: }
852: /* ----------------------------------------------------------- */
855: PetscErrorCode MatSolve_SeqAIJ_Inode(Mat A,Vec bb,Vec xx)
856: {
857: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
858: IS iscol = a->col,isrow = a->row;
860: PetscInt *r,*c,i,j,n = A->m,*ai = a->i,nz,*a_j = a->j;
861: PetscInt node_max,*ns,row,nsz,aii,*vi,*ad,*aj,i0,i1,*rout,*cout;
862: PetscScalar *x,*b,*a_a = a->a,*tmp,*tmps,*aa,tmp0,tmp1;
863: PetscScalar sum1,sum2,sum3,sum4,sum5,*v1,*v2,*v3,*v4,*v5;
866: if (A->factor!=FACTOR_LU) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unfactored matrix");
867: if (!a->inode.size) SETERRQ(PETSC_ERR_COR,"Missing Inode Structure");
868: node_max = a->inode.node_count;
869: ns = a->inode.size; /* Node Size array */
871: VecGetArray(bb,&b);
872: VecGetArray(xx,&x);
873: tmp = a->solve_work;
874:
875: ISGetIndices(isrow,&rout); r = rout;
876: ISGetIndices(iscol,&cout); c = cout + (n-1);
877:
878: /* forward solve the lower triangular */
879: tmps = tmp ;
880: aa = a_a ;
881: aj = a_j ;
882: ad = a->diag;
884: for (i = 0,row = 0; i< node_max; ++i){
885: nsz = ns[i];
886: aii = ai[row];
887: v1 = aa + aii;
888: vi = aj + aii;
889: nz = ad[row]- aii;
890:
891: switch (nsz){ /* Each loop in 'case' is unrolled */
892: case 1 :
893: sum1 = b[*r++];
894: /* while (nz--) sum1 -= *v1++ *tmps[*vi++];*/
895: for(j=0; j<nz-1; j+=2){
896: i0 = vi[0];
897: i1 = vi[1];
898: vi +=2;
899: tmp0 = tmps[i0];
900: tmp1 = tmps[i1];
901: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
902: }
903: if(j == nz-1){
904: tmp0 = tmps[*vi++];
905: sum1 -= *v1++ *tmp0;
906: }
907: tmp[row ++]=sum1;
908: break;
909: case 2:
910: sum1 = b[*r++];
911: sum2 = b[*r++];
912: v2 = aa + ai[row+1];
914: for(j=0; j<nz-1; j+=2){
915: i0 = vi[0];
916: i1 = vi[1];
917: vi +=2;
918: tmp0 = tmps[i0];
919: tmp1 = tmps[i1];
920: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
921: sum2 -= v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
922: }
923: if(j == nz-1){
924: tmp0 = tmps[*vi++];
925: sum1 -= *v1++ *tmp0;
926: sum2 -= *v2++ *tmp0;
927: }
928: sum2 -= *v2++ * sum1;
929: tmp[row ++]=sum1;
930: tmp[row ++]=sum2;
931: break;
932: case 3:
933: sum1 = b[*r++];
934: sum2 = b[*r++];
935: sum3 = b[*r++];
936: v2 = aa + ai[row+1];
937: v3 = aa + ai[row+2];
938:
939: for (j=0; j<nz-1; j+=2){
940: i0 = vi[0];
941: i1 = vi[1];
942: vi +=2;
943: tmp0 = tmps[i0];
944: tmp1 = tmps[i1];
945: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
946: sum2 -= v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
947: sum3 -= v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
948: }
949: if (j == nz-1){
950: tmp0 = tmps[*vi++];
951: sum1 -= *v1++ *tmp0;
952: sum2 -= *v2++ *tmp0;
953: sum3 -= *v3++ *tmp0;
954: }
955: sum2 -= *v2++ * sum1;
956: sum3 -= *v3++ * sum1;
957: sum3 -= *v3++ * sum2;
958: tmp[row ++]=sum1;
959: tmp[row ++]=sum2;
960: tmp[row ++]=sum3;
961: break;
962:
963: case 4:
964: sum1 = b[*r++];
965: sum2 = b[*r++];
966: sum3 = b[*r++];
967: sum4 = b[*r++];
968: v2 = aa + ai[row+1];
969: v3 = aa + ai[row+2];
970: v4 = aa + ai[row+3];
971:
972: for (j=0; j<nz-1; j+=2){
973: i0 = vi[0];
974: i1 = vi[1];
975: vi +=2;
976: tmp0 = tmps[i0];
977: tmp1 = tmps[i1];
978: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
979: sum2 -= v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
980: sum3 -= v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
981: sum4 -= v4[0] * tmp0 + v4[1] * tmp1; v4 += 2;
982: }
983: if (j == nz-1){
984: tmp0 = tmps[*vi++];
985: sum1 -= *v1++ *tmp0;
986: sum2 -= *v2++ *tmp0;
987: sum3 -= *v3++ *tmp0;
988: sum4 -= *v4++ *tmp0;
989: }
990: sum2 -= *v2++ * sum1;
991: sum3 -= *v3++ * sum1;
992: sum4 -= *v4++ * sum1;
993: sum3 -= *v3++ * sum2;
994: sum4 -= *v4++ * sum2;
995: sum4 -= *v4++ * sum3;
996:
997: tmp[row ++]=sum1;
998: tmp[row ++]=sum2;
999: tmp[row ++]=sum3;
1000: tmp[row ++]=sum4;
1001: break;
1002: case 5:
1003: sum1 = b[*r++];
1004: sum2 = b[*r++];
1005: sum3 = b[*r++];
1006: sum4 = b[*r++];
1007: sum5 = b[*r++];
1008: v2 = aa + ai[row+1];
1009: v3 = aa + ai[row+2];
1010: v4 = aa + ai[row+3];
1011: v5 = aa + ai[row+4];
1012:
1013: for (j=0; j<nz-1; j+=2){
1014: i0 = vi[0];
1015: i1 = vi[1];
1016: vi +=2;
1017: tmp0 = tmps[i0];
1018: tmp1 = tmps[i1];
1019: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
1020: sum2 -= v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
1021: sum3 -= v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
1022: sum4 -= v4[0] * tmp0 + v4[1] * tmp1; v4 += 2;
1023: sum5 -= v5[0] * tmp0 + v5[1] * tmp1; v5 += 2;
1024: }
1025: if (j == nz-1){
1026: tmp0 = tmps[*vi++];
1027: sum1 -= *v1++ *tmp0;
1028: sum2 -= *v2++ *tmp0;
1029: sum3 -= *v3++ *tmp0;
1030: sum4 -= *v4++ *tmp0;
1031: sum5 -= *v5++ *tmp0;
1032: }
1034: sum2 -= *v2++ * sum1;
1035: sum3 -= *v3++ * sum1;
1036: sum4 -= *v4++ * sum1;
1037: sum5 -= *v5++ * sum1;
1038: sum3 -= *v3++ * sum2;
1039: sum4 -= *v4++ * sum2;
1040: sum5 -= *v5++ * sum2;
1041: sum4 -= *v4++ * sum3;
1042: sum5 -= *v5++ * sum3;
1043: sum5 -= *v5++ * sum4;
1044:
1045: tmp[row ++]=sum1;
1046: tmp[row ++]=sum2;
1047: tmp[row ++]=sum3;
1048: tmp[row ++]=sum4;
1049: tmp[row ++]=sum5;
1050: break;
1051: default:
1052: SETERRQ(PETSC_ERR_COR,"Node size not yet supported \n");
1053: }
1054: }
1055: /* backward solve the upper triangular */
1056: for (i=node_max -1 ,row = n-1 ; i>=0; i--){
1057: nsz = ns[i];
1058: aii = ai[row+1] -1;
1059: v1 = aa + aii;
1060: vi = aj + aii;
1061: nz = aii- ad[row];
1062: switch (nsz){ /* Each loop in 'case' is unrolled */
1063: case 1 :
1064: sum1 = tmp[row];
1066: for(j=nz ; j>1; j-=2){
1067: i0 = vi[0];
1068: i1 = vi[-1];
1069: vi -=2;
1070: tmp0 = tmps[i0];
1071: tmp1 = tmps[i1];
1072: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1073: }
1074: if (j==1){
1075: tmp0 = tmps[*vi--];
1076: sum1 -= *v1-- * tmp0;
1077: }
1078: x[*c--] = tmp[row] = sum1*a_a[ad[row]]; row--;
1079: break;
1080: case 2 :
1081: sum1 = tmp[row];
1082: sum2 = tmp[row -1];
1083: v2 = aa + ai[row]-1;
1084: for (j=nz ; j>1; j-=2){
1085: i0 = vi[0];
1086: i1 = vi[-1];
1087: vi -=2;
1088: tmp0 = tmps[i0];
1089: tmp1 = tmps[i1];
1090: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1091: sum2 -= v2[0] * tmp0 + v2[-1] * tmp1; v2 -= 2;
1092: }
1093: if (j==1){
1094: tmp0 = tmps[*vi--];
1095: sum1 -= *v1-- * tmp0;
1096: sum2 -= *v2-- * tmp0;
1097: }
1098:
1099: tmp0 = x[*c--] = tmp[row] = sum1*a_a[ad[row]]; row--;
1100: sum2 -= *v2-- * tmp0;
1101: x[*c--] = tmp[row] = sum2*a_a[ad[row]]; row--;
1102: break;
1103: case 3 :
1104: sum1 = tmp[row];
1105: sum2 = tmp[row -1];
1106: sum3 = tmp[row -2];
1107: v2 = aa + ai[row]-1;
1108: v3 = aa + ai[row -1]-1;
1109: for (j=nz ; j>1; j-=2){
1110: i0 = vi[0];
1111: i1 = vi[-1];
1112: vi -=2;
1113: tmp0 = tmps[i0];
1114: tmp1 = tmps[i1];
1115: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1116: sum2 -= v2[0] * tmp0 + v2[-1] * tmp1; v2 -= 2;
1117: sum3 -= v3[0] * tmp0 + v3[-1] * tmp1; v3 -= 2;
1118: }
1119: if (j==1){
1120: tmp0 = tmps[*vi--];
1121: sum1 -= *v1-- * tmp0;
1122: sum2 -= *v2-- * tmp0;
1123: sum3 -= *v3-- * tmp0;
1124: }
1125: tmp0 = x[*c--] = tmp[row] = sum1*a_a[ad[row]]; row--;
1126: sum2 -= *v2-- * tmp0;
1127: sum3 -= *v3-- * tmp0;
1128: tmp0 = x[*c--] = tmp[row] = sum2*a_a[ad[row]]; row--;
1129: sum3 -= *v3-- * tmp0;
1130: x[*c--] = tmp[row] = sum3*a_a[ad[row]]; row--;
1131:
1132: break;
1133: case 4 :
1134: sum1 = tmp[row];
1135: sum2 = tmp[row -1];
1136: sum3 = tmp[row -2];
1137: sum4 = tmp[row -3];
1138: v2 = aa + ai[row]-1;
1139: v3 = aa + ai[row -1]-1;
1140: v4 = aa + ai[row -2]-1;
1142: for (j=nz ; j>1; j-=2){
1143: i0 = vi[0];
1144: i1 = vi[-1];
1145: vi -=2;
1146: tmp0 = tmps[i0];
1147: tmp1 = tmps[i1];
1148: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1149: sum2 -= v2[0] * tmp0 + v2[-1] * tmp1; v2 -= 2;
1150: sum3 -= v3[0] * tmp0 + v3[-1] * tmp1; v3 -= 2;
1151: sum4 -= v4[0] * tmp0 + v4[-1] * tmp1; v4 -= 2;
1152: }
1153: if (j==1){
1154: tmp0 = tmps[*vi--];
1155: sum1 -= *v1-- * tmp0;
1156: sum2 -= *v2-- * tmp0;
1157: sum3 -= *v3-- * tmp0;
1158: sum4 -= *v4-- * tmp0;
1159: }
1161: tmp0 = x[*c--] = tmp[row] = sum1*a_a[ad[row]]; row--;
1162: sum2 -= *v2-- * tmp0;
1163: sum3 -= *v3-- * tmp0;
1164: sum4 -= *v4-- * tmp0;
1165: tmp0 = x[*c--] = tmp[row] = sum2*a_a[ad[row]]; row--;
1166: sum3 -= *v3-- * tmp0;
1167: sum4 -= *v4-- * tmp0;
1168: tmp0 = x[*c--] = tmp[row] = sum3*a_a[ad[row]]; row--;
1169: sum4 -= *v4-- * tmp0;
1170: x[*c--] = tmp[row] = sum4*a_a[ad[row]]; row--;
1171: break;
1172: case 5 :
1173: sum1 = tmp[row];
1174: sum2 = tmp[row -1];
1175: sum3 = tmp[row -2];
1176: sum4 = tmp[row -3];
1177: sum5 = tmp[row -4];
1178: v2 = aa + ai[row]-1;
1179: v3 = aa + ai[row -1]-1;
1180: v4 = aa + ai[row -2]-1;
1181: v5 = aa + ai[row -3]-1;
1182: for (j=nz ; j>1; j-=2){
1183: i0 = vi[0];
1184: i1 = vi[-1];
1185: vi -=2;
1186: tmp0 = tmps[i0];
1187: tmp1 = tmps[i1];
1188: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1189: sum2 -= v2[0] * tmp0 + v2[-1] * tmp1; v2 -= 2;
1190: sum3 -= v3[0] * tmp0 + v3[-1] * tmp1; v3 -= 2;
1191: sum4 -= v4[0] * tmp0 + v4[-1] * tmp1; v4 -= 2;
1192: sum5 -= v5[0] * tmp0 + v5[-1] * tmp1; v5 -= 2;
1193: }
1194: if (j==1){
1195: tmp0 = tmps[*vi--];
1196: sum1 -= *v1-- * tmp0;
1197: sum2 -= *v2-- * tmp0;
1198: sum3 -= *v3-- * tmp0;
1199: sum4 -= *v4-- * tmp0;
1200: sum5 -= *v5-- * tmp0;
1201: }
1203: tmp0 = x[*c--] = tmp[row] = sum1*a_a[ad[row]]; row--;
1204: sum2 -= *v2-- * tmp0;
1205: sum3 -= *v3-- * tmp0;
1206: sum4 -= *v4-- * tmp0;
1207: sum5 -= *v5-- * tmp0;
1208: tmp0 = x[*c--] = tmp[row] = sum2*a_a[ad[row]]; row--;
1209: sum3 -= *v3-- * tmp0;
1210: sum4 -= *v4-- * tmp0;
1211: sum5 -= *v5-- * tmp0;
1212: tmp0 = x[*c--] = tmp[row] = sum3*a_a[ad[row]]; row--;
1213: sum4 -= *v4-- * tmp0;
1214: sum5 -= *v5-- * tmp0;
1215: tmp0 = x[*c--] = tmp[row] = sum4*a_a[ad[row]]; row--;
1216: sum5 -= *v5-- * tmp0;
1217: x[*c--] = tmp[row] = sum5*a_a[ad[row]]; row--;
1218: break;
1219: default:
1220: SETERRQ(PETSC_ERR_COR,"Node size not yet supported \n");
1221: }
1222: }
1223: ISRestoreIndices(isrow,&rout);
1224: ISRestoreIndices(iscol,&cout);
1225: VecRestoreArray(bb,&b);
1226: VecRestoreArray(xx,&x);
1227: PetscLogFlops(2*a->nz - A->n);
1228: return(0);
1229: }
1234: PetscErrorCode MatLUFactorNumeric_SeqAIJ_Inode(Mat A,Mat *B)
1235: {
1236: Mat C = *B;
1237: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
1238: IS iscol = b->col,isrow = b->row,isicol = b->icol;
1240: PetscInt *r,*ic,*c,n = A->m,*bi = b->i;
1241: PetscInt *bj = b->j,*nbj=b->j +1,*ajtmp,*bjtmp,nz,row,prow;
1242: PetscInt *ics,i,j,idx,*ai = a->i,*aj = a->j,*bd = b->diag,node_max,nsz;
1243: PetscInt *ns,*tmp_vec1,*tmp_vec2,*nsmap,*pj,ndamp = 0;
1244: PetscScalar *rtmp1,*rtmp2,*rtmp3,*v1,*v2,*v3,*pc1,*pc2,*pc3,mul1,mul2,mul3;
1245: PetscScalar tmp,*ba = b->a,*aa = a->a,*pv,*rtmps1,*rtmps2,*rtmps3;
1246: PetscReal damping = b->lu_damping,zeropivot = b->lu_zeropivot;
1247: PetscTruth damp;
1250: ISGetIndices(isrow,&r);
1251: ISGetIndices(iscol,&c);
1252: ISGetIndices(isicol,&ic);
1253: PetscMalloc((3*n+1)*sizeof(PetscScalar),&rtmp1);
1254: PetscMemzero(rtmp1,(3*n+1)*sizeof(PetscScalar));
1255: ics = ic ; rtmps1 = rtmp1 ;
1256: rtmp2 = rtmp1 + n; rtmps2 = rtmp2 ;
1257: rtmp3 = rtmp2 + n; rtmps3 = rtmp3 ;
1258:
1259: node_max = a->inode.node_count;
1260: ns = a->inode.size ;
1261: if (!ns){
1262: SETERRQ(PETSC_ERR_PLIB,"Matrix without inode information");
1263: }
1265: /* If max inode size > 3, split it into two inodes.*/
1266: /* also map the inode sizes according to the ordering */
1267: PetscMalloc((n+1)* sizeof(PetscInt),&tmp_vec1);
1268: for (i=0,j=0; i<node_max; ++i,++j){
1269: if (ns[i]>3) {
1270: tmp_vec1[j] = ns[i]/2; /* Assuming ns[i] < =5 */
1271: ++j;
1272: tmp_vec1[j] = ns[i] - tmp_vec1[j-1];
1273: } else {
1274: tmp_vec1[j] = ns[i];
1275: }
1276: }
1277: /* Use the correct node_max */
1278: node_max = j;
1280: /* Now reorder the inode info based on mat re-ordering info */
1281: /* First create a row -> inode_size_array_index map */
1282: PetscMalloc(n*sizeof(PetscInt)+1,&nsmap);
1283: PetscMalloc(node_max*sizeof(PetscInt)+1,&tmp_vec2);
1284: for (i=0,row=0; i<node_max; i++) {
1285: nsz = tmp_vec1[i];
1286: for (j=0; j<nsz; j++,row++) {
1287: nsmap[row] = i;
1288: }
1289: }
1290: /* Using nsmap, create a reordered ns structure */
1291: for (i=0,j=0; i< node_max; i++) {
1292: nsz = tmp_vec1[nsmap[r[j]]]; /* here the reordered row_no is in r[] */
1293: tmp_vec2[i] = nsz;
1294: j += nsz;
1295: }
1296: PetscFree(nsmap);
1297: PetscFree(tmp_vec1);
1298: /* Now use the correct ns */
1299: ns = tmp_vec2;
1302: do {
1303: damp = PETSC_FALSE;
1304: /* Now loop over each block-row, and do the factorization */
1305: for (i=0,row=0; i<node_max; i++) {
1306: nsz = ns[i];
1307: nz = bi[row+1] - bi[row];
1308: bjtmp = bj + bi[row];
1309:
1310: switch (nsz){
1311: case 1:
1312: for (j=0; j<nz; j++){
1313: idx = bjtmp[j];
1314: rtmps1[idx] = 0.0;
1315: }
1316:
1317: /* load in initial (unfactored row) */
1318: idx = r[row];
1319: nz = ai[idx+1] - ai[idx];
1320: ajtmp = aj + ai[idx];
1321: v1 = aa + ai[idx];
1323: for (j=0; j<nz; j++) {
1324: idx = ics[ajtmp[j]];
1325: rtmp1[idx] = v1[j];
1326: if (ndamp && ajtmp[j] == r[row]) {
1327: rtmp1[idx] += damping;
1328: }
1329: }
1330: prow = *bjtmp++ ;
1331: while (prow < row) {
1332: pc1 = rtmp1 + prow;
1333: if (*pc1 != 0.0){
1334: pv = ba + bd[prow];
1335: pj = nbj + bd[prow];
1336: mul1 = *pc1 * *pv++;
1337: *pc1 = mul1;
1338: nz = bi[prow+1] - bd[prow] - 1;
1339: PetscLogFlops(2*nz);
1340: for (j=0; j<nz; j++) {
1341: tmp = pv[j];
1342: idx = pj[j];
1343: rtmps1[idx] -= mul1 * tmp;
1344: }
1345: }
1346: prow = *bjtmp++ ;
1347: }
1348: nz = bi[row+1] - bi[row];
1349: pj = bj + bi[row];
1350: pc1 = ba + bi[row];
1351: if (PetscAbsScalar(rtmp1[row]) < zeropivot) {
1352: if (damping) {
1353: if (ndamp) damping *= 2.0;
1354: damp = PETSC_TRUE;
1355: ndamp++;
1356: goto endofwhile;
1357: } else {
1358: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",row,PetscAbsScalar(rtmp1[row]),zeropivot);
1359: }
1360: }
1361: rtmp1[row] = 1.0/rtmp1[row];
1362: for (j=0; j<nz; j++) {
1363: idx = pj[j];
1364: pc1[j] = rtmps1[idx];
1365: }
1366: break;
1367:
1368: case 2:
1369: for (j=0; j<nz; j++) {
1370: idx = bjtmp[j];
1371: rtmps1[idx] = 0.0;
1372: rtmps2[idx] = 0.0;
1373: }
1374:
1375: /* load in initial (unfactored row) */
1376: idx = r[row];
1377: nz = ai[idx+1] - ai[idx];
1378: ajtmp = aj + ai[idx];
1379: v1 = aa + ai[idx];
1380: v2 = aa + ai[idx+1];
1381:
1382: for (j=0; j<nz; j++) {
1383: idx = ics[ajtmp[j]];
1384: rtmp1[idx] = v1[j];
1385: rtmp2[idx] = v2[j];
1386: if (ndamp && ajtmp[j] == r[row]) {
1387: rtmp1[idx] += damping;
1388: }
1389: if (ndamp && ajtmp[j] == r[row+1]) {
1390: rtmp2[idx] += damping;
1391: }
1392: }
1393: prow = *bjtmp++ ;
1394: while (prow < row) {
1395: pc1 = rtmp1 + prow;
1396: pc2 = rtmp2 + prow;
1397: if (*pc1 != 0.0 || *pc2 != 0.0){
1398: pv = ba + bd[prow];
1399: pj = nbj + bd[prow];
1400: mul1 = *pc1 * *pv;
1401: mul2 = *pc2 * *pv;
1402: ++pv;
1403: *pc1 = mul1;
1404: *pc2 = mul2;
1405:
1406: nz = bi[prow+1] - bd[prow] - 1;
1407: PetscLogFlops(2*2*nz);
1408: for (j=0; j<nz; j++) {
1409: tmp = pv[j];
1410: idx = pj[j];
1411: rtmps1[idx] -= mul1 * tmp;
1412: rtmps2[idx] -= mul2 * tmp;
1413: }
1414: }
1415: prow = *bjtmp++ ;
1416: }
1417: /* Now take care of the odd element*/
1418: pc1 = rtmp1 + prow;
1419: pc2 = rtmp2 + prow;
1420: if (*pc2 != 0.0){
1421: pj = nbj + bd[prow];
1422: if (PetscAbsScalar(*pc1) < zeropivot) {
1423: if (damping) {
1424: if (ndamp) damping *= 2.0;
1425: damp = PETSC_TRUE;
1426: ndamp++;
1427: goto endofwhile;
1428: } else {
1429: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",prow,PetscAbsScalar(*pc1),zeropivot);
1430: }
1431: }
1432: mul2 = (*pc2)/(*pc1); /* since diag is not yet inverted.*/
1433: *pc2 = mul2;
1434: nz = bi[prow+1] - bd[prow] - 1;
1435: PetscLogFlops(2*nz);
1436: for (j=0; j<nz; j++) {
1437: idx = pj[j] ;
1438: tmp = rtmp1[idx];
1439: rtmp2[idx] -= mul2 * tmp;
1440: }
1441: }
1442:
1443: nz = bi[row+1] - bi[row];
1444: pj = bj + bi[row];
1445: pc1 = ba + bi[row];
1446: pc2 = ba + bi[row+1];
1447: if (PetscAbsScalar(rtmp1[row]) < zeropivot || PetscAbsScalar(rtmp2[row+1]) < zeropivot) {
1448: if (damping) {
1449: if (ndamp) damping *= 2.0;
1450: damp = PETSC_TRUE;
1451: ndamp++;
1452: goto endofwhile;
1453: } else if (PetscAbsScalar(rtmp1[row]) < zeropivot) {
1454: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",row,PetscAbsScalar(rtmp1[row]),zeropivot);
1455: } else {
1456: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",row+1,PetscAbsScalar(rtmp2[row+1]),zeropivot);
1457: }
1458: }
1459: rtmp1[row] = 1.0/rtmp1[row];
1460: rtmp2[row+1] = 1.0/rtmp2[row+1];
1461: for (j=0; j<nz; j++) {
1462: idx = pj[j];
1463: pc1[j] = rtmps1[idx];
1464: pc2[j] = rtmps2[idx];
1465: }
1466: break;
1468: case 3:
1469: for (j=0; j<nz; j++) {
1470: idx = bjtmp[j];
1471: rtmps1[idx] = 0.0;
1472: rtmps2[idx] = 0.0;
1473: rtmps3[idx] = 0.0;
1474: }
1475: /* copy the nonzeros for the 3 rows from sparse representation to dense in rtmp*[] */
1476: idx = r[row];
1477: nz = ai[idx+1] - ai[idx];
1478: ajtmp = aj + ai[idx];
1479: v1 = aa + ai[idx];
1480: v2 = aa + ai[idx+1];
1481: v3 = aa + ai[idx+2];
1482: for (j=0; j<nz; j++) {
1483: idx = ics[ajtmp[j]];
1484: rtmp1[idx] = v1[j];
1485: rtmp2[idx] = v2[j];
1486: rtmp3[idx] = v3[j];
1487: if (ndamp && ajtmp[j] == r[row]) {
1488: rtmp1[idx] += damping;
1489: }
1490: if (ndamp && ajtmp[j] == r[row+1]) {
1491: rtmp2[idx] += damping;
1492: }
1493: if (ndamp && ajtmp[j] == r[row+2]) {
1494: rtmp3[idx] += damping;
1495: }
1496: }
1497: /* loop over all pivot row blocks above this row block */
1498: prow = *bjtmp++ ;
1499: while (prow < row) {
1500: pc1 = rtmp1 + prow;
1501: pc2 = rtmp2 + prow;
1502: pc3 = rtmp3 + prow;
1503: if (*pc1 != 0.0 || *pc2 != 0.0 || *pc3 !=0.0){
1504: pv = ba + bd[prow];
1505: pj = nbj + bd[prow];
1506: mul1 = *pc1 * *pv;
1507: mul2 = *pc2 * *pv;
1508: mul3 = *pc3 * *pv;
1509: ++pv;
1510: *pc1 = mul1;
1511: *pc2 = mul2;
1512: *pc3 = mul3;
1513:
1514: nz = bi[prow+1] - bd[prow] - 1;
1515: PetscLogFlops(3*2*nz);
1516: /* update this row based on pivot row */
1517: for (j=0; j<nz; j++) {
1518: tmp = pv[j];
1519: idx = pj[j];
1520: rtmps1[idx] -= mul1 * tmp;
1521: rtmps2[idx] -= mul2 * tmp;
1522: rtmps3[idx] -= mul3 * tmp;
1523: }
1524: }
1525: prow = *bjtmp++ ;
1526: }
1527: /* Now take care of diagonal block in this set of rows */
1528: pc1 = rtmp1 + prow;
1529: pc2 = rtmp2 + prow;
1530: pc3 = rtmp3 + prow;
1531: if (*pc2 != 0.0 || *pc3 != 0.0){
1532: pj = nbj + bd[prow];
1533: if (PetscAbsScalar(*pc1) < zeropivot) {
1534: if (damping) {
1535: if (ndamp) damping *= 2.0;
1536: damp = PETSC_TRUE;
1537: ndamp++;
1538: goto endofwhile;
1539: } else {
1540: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",prow,PetscAbsScalar(*pc1),zeropivot);
1541: }
1542: }
1543: mul2 = (*pc2)/(*pc1);
1544: mul3 = (*pc3)/(*pc1);
1545: *pc2 = mul2;
1546: *pc3 = mul3;
1547: nz = bi[prow+1] - bd[prow] - 1;
1548: PetscLogFlops(2*2*nz);
1549: for (j=0; j<nz; j++) {
1550: idx = pj[j] ;
1551: tmp = rtmp1[idx];
1552: rtmp2[idx] -= mul2 * tmp;
1553: rtmp3[idx] -= mul3 * tmp;
1554: }
1555: }
1556: ++prow;
1557: pc2 = rtmp2 + prow;
1558: pc3 = rtmp3 + prow;
1559: if (*pc3 != 0.0){
1560: pj = nbj + bd[prow];
1561: pj = nbj + bd[prow];
1562: if (PetscAbsScalar(*pc2) < zeropivot) {
1563: if (damping) {
1564: if (ndamp) damping *= 2.0;
1565: damp = PETSC_TRUE;
1566: ndamp++;
1567: goto endofwhile;
1568: } else {
1569: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",prow,PetscAbsScalar(*pc2),zeropivot);
1570: }
1571: }
1572: mul3 = (*pc3)/(*pc2);
1573: *pc3 = mul3;
1574: nz = bi[prow+1] - bd[prow] - 1;
1575: PetscLogFlops(2*2*nz);
1576: for (j=0; j<nz; j++) {
1577: idx = pj[j] ;
1578: tmp = rtmp2[idx];
1579: rtmp3[idx] -= mul3 * tmp;
1580: }
1581: }
1582: nz = bi[row+1] - bi[row];
1583: pj = bj + bi[row];
1584: pc1 = ba + bi[row];
1585: pc2 = ba + bi[row+1];
1586: pc3 = ba + bi[row+2];
1587: if (PetscAbsScalar(rtmp1[row]) < zeropivot || PetscAbsScalar(rtmp2[row+1]) < zeropivot || PetscAbsScalar(rtmp3[row+2]) < zeropivot) {
1588: if (damping) {
1589: if (ndamp) damping *= 2.0;
1590: damp = PETSC_TRUE;
1591: ndamp++;
1592: goto endofwhile;
1593: } else if (PetscAbsScalar(rtmp1[row]) < zeropivot) {
1594: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",row,PetscAbsScalar(rtmp1[row]),zeropivot);
1595: } else if (PetscAbsScalar(rtmp2[row+1]) < zeropivot) {
1596: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",row+1,PetscAbsScalar(rtmp2[row+1]),zeropivot);
1597: } else {
1598: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g",row+2,PetscAbsScalar(rtmp3[row+2]),zeropivot);
1599: }
1600: }
1601: rtmp1[row] = 1.0/rtmp1[row];
1602: rtmp2[row+1] = 1.0/rtmp2[row+1];
1603: rtmp3[row+2] = 1.0/rtmp3[row+2];
1604: /* copy row entries from dense representation to sparse */
1605: for (j=0; j<nz; j++) {
1606: idx = pj[j];
1607: pc1[j] = rtmps1[idx];
1608: pc2[j] = rtmps2[idx];
1609: pc3[j] = rtmps3[idx];
1610: }
1611: break;
1613: default:
1614: SETERRQ(PETSC_ERR_COR,"Node size not yet supported \n");
1615: }
1616: row += nsz; /* Update the row */
1617: }
1618: endofwhile:;
1619: } while (damp);
1620: PetscFree(rtmp1);
1621: PetscFree(tmp_vec2);
1622: ISRestoreIndices(isicol,&ic);
1623: ISRestoreIndices(isrow,&r);
1624: ISRestoreIndices(iscol,&c);
1625: C->factor = FACTOR_LU;
1626: C->assembled = PETSC_TRUE;
1627: if (ndamp || b->lu_damping) {
1628: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ_Inode: number of damping tries %D damping value %g\n",ndamp,damping);
1629: }
1630: PetscLogFlops(C->n);
1631: return(0);
1632: }
1634: /*
1635: This is really ugly. if inodes are used this replaces the
1636: permutations with ones that correspond to rows/cols of the matrix
1637: rather then inode blocks
1638: */
1641: PetscErrorCode MatAdjustForInodes(Mat A,IS *rperm,IS *cperm)
1642: {
1643: PetscErrorCode ierr,(*f)(Mat,IS*,IS*);
1646: PetscObjectQueryFunction((PetscObject)A,"MatAdjustForInodes_C",(void (**)(void))&f);
1647: if (f) {
1648: (*f)(A,rperm,cperm);
1649: }
1650: return(0);
1651: }
1656: PetscErrorCode MatAdjustForInodes_SeqAIJ(Mat A,IS *rperm,IS *cperm)
1657: {
1658: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1660: PetscInt m = A->m,n = A->n,i,j,*ridx,*cidx,nslim_row = a->inode.node_count;
1661: PetscInt row,col,*permr,*permc,*ns_row = a->inode.size,*tns,start_val,end_val,indx;
1662: PetscInt nslim_col,*ns_col;
1663: IS ris = *rperm,cis = *cperm;
1666: if (!a->inode.size) return(0); /* no inodes so return */
1667: if (a->inode.node_count == m) return(0); /* all inodes are of size 1 */
1669: Mat_AIJ_CreateColInode(A,&nslim_col,&ns_col);
1670: PetscMalloc((((nslim_row>nslim_col)?nslim_row:nslim_col)+1)*sizeof(PetscInt),&tns);
1671: PetscMalloc((m+n+1)*sizeof(PetscInt),&permr);
1672: permc = permr + m;
1674: ISGetIndices(ris,&ridx);
1675: ISGetIndices(cis,&cidx);
1677: /* Form the inode structure for the rows of permuted matric using inv perm*/
1678: for (i=0,tns[0]=0; i<nslim_row; ++i) tns[i+1] = tns[i] + ns_row[i];
1680: /* Construct the permutations for rows*/
1681: for (i=0,row = 0; i<nslim_row; ++i){
1682: indx = ridx[i];
1683: start_val = tns[indx];
1684: end_val = tns[indx + 1];
1685: for (j=start_val; j<end_val; ++j,++row) permr[row]= j;
1686: }
1688: /* Form the inode structure for the columns of permuted matrix using inv perm*/
1689: for (i=0,tns[0]=0; i<nslim_col; ++i) tns[i+1] = tns[i] + ns_col[i];
1691: /* Construct permutations for columns */
1692: for (i=0,col=0; i<nslim_col; ++i){
1693: indx = cidx[i];
1694: start_val = tns[indx];
1695: end_val = tns[indx + 1];
1696: for (j = start_val; j<end_val; ++j,++col) permc[col]= j;
1697: }
1699: ISCreateGeneral(PETSC_COMM_SELF,n,permr,rperm);
1700: ISSetPermutation(*rperm);
1701: ISCreateGeneral(PETSC_COMM_SELF,n,permc,cperm);
1702: ISSetPermutation(*cperm);
1703:
1704: ISRestoreIndices(ris,&ridx);
1705: ISRestoreIndices(cis,&cidx);
1707: PetscFree(ns_col);
1708: PetscFree(permr);
1709: ISDestroy(cis);
1710: ISDestroy(ris);
1711: PetscFree(tns);
1712: return(0);
1713: }
1718: /*@C
1719: MatSeqAIJGetInodeSizes - Returns the inode information of the SeqAIJ matrix.
1721: Collective on Mat
1723: Input Parameter:
1724: . A - the SeqAIJ matrix.
1726: Output Parameter:
1727: + node_count - no of inodes present in the matrix.
1728: . sizes - an array of size node_count,with sizes of each inode.
1729: - limit - the max size used to generate the inodes.
1731: Level: advanced
1733: Notes: This routine returns some internal storage information
1734: of the matrix, it is intended to be used by advanced users.
1735: It should be called after the matrix is assembled.
1736: The contents of the sizes[] array should not be changed.
1738: .keywords: matrix, seqaij, get, inode
1740: .seealso: MatGetInfo()
1741: @*/
1742: PetscErrorCode MatSeqAIJGetInodeSizes(Mat A,PetscInt *node_count,PetscInt *sizes[],PetscInt *limit)
1743: {
1744: PetscErrorCode ierr,(*f)(Mat,PetscInt*,PetscInt*[],PetscInt*);
1747: PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJGetInodeSizes_C",(void (**)(void))&f);
1748: if (f) {
1749: (*f)(A,node_count,sizes,limit);
1750: }
1751: return(0);
1752: }
1757: PetscErrorCode MatSeqAIJGetInodeSizes_SeqAIJ(Mat A,PetscInt *node_count,PetscInt *sizes[],PetscInt *limit)
1758: {
1759: Mat_SeqAIJ *a;
1762: a = (Mat_SeqAIJ*)A->data;
1763: *node_count = a->inode.node_count;
1764: *sizes = a->inode.size;
1765: *limit = a->inode.limit;
1766: return(0);
1767: }