Actual source code: baijfact3.c

  1: /*$Id: baijfact3.c,v 1.4 2001/03/23 23:22:07 balay Exp $*/
  2: /*
  3:     Factorization code for BAIJ format. 
  4: */
 5:  #include src/mat/impls/baij/seq/baij.h
 6:  #include src/vec/vecimpl.h
 7:  #include src/inline/ilu.h

  9: /*
 10:     The symbolic factorization code is identical to that for AIJ format,
 11:   except for very small changes since this is now a SeqBAIJ datastructure.
 12:   NOT good code reuse.
 13: */
 14: int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B)
 15: {
 16:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
 17:   IS          isicol;
 18:   int         *r,*ic,ierr,i,n = a->mbs,*ai = a->i,*aj = a->j;
 19:   int         *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = a->bs,bs2=a->bs2;
 20:   int         *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
 21:   PetscReal   f = 1.0;

 26:   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");

 28:   if (!isrow) {
 29:     ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&isrow);
 30:   }
 31:   if (!iscol) {
 32:     ISCreateStride(PETSC_COMM_SELF,A->M,0,1,&iscol);
 33:   }
 34:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
 35:   ISGetIndices(isrow,&r);
 36:   ISGetIndices(isicol,&ic);

 38:   if (info) f = info->fill;
 39:   /* get new row pointers */
 40:   ierr     = PetscMalloc((n+1)*sizeof(int),&ainew);
 41:   ainew[0] = 0;
 42:   /* don't know how many column pointers are needed so estimate */
 43:   jmax     = (int)(f*ai[n] + 1);
 44:   ierr     = PetscMalloc((jmax)*sizeof(int),&ajnew);
 45:   /* fill is a linked list of nonzeros in active row */
 46:   ierr     = PetscMalloc((2*n+1)*sizeof(int),&fill);
 47:   im       = fill + n + 1;
 48:   /* idnew is location of diagonal in factor */
 49:   ierr     = PetscMalloc((n+1)*sizeof(int),&idnew);
 50:   idnew[0] = 0;

 52:   for (i=0; i<n; i++) {
 53:     /* first copy previous fill into linked list */
 54:     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
 55:     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
 56:     ajtmp   = aj + ai[r[i]];
 57:     fill[n] = n;
 58:     while (nz--) {
 59:       fm  = n;
 60:       idx = ic[*ajtmp++];
 61:       do {
 62:         m  = fm;
 63:         fm = fill[m];
 64:       } while (fm < idx);
 65:       fill[m]   = idx;
 66:       fill[idx] = fm;
 67:     }
 68:     row = fill[n];
 69:     while (row < i) {
 70:       ajtmp = ajnew + idnew[row] + 1;
 71:       nzbd  = 1 + idnew[row] - ainew[row];
 72:       nz    = im[row] - nzbd;
 73:       fm    = row;
 74:       while (nz-- > 0) {
 75:         idx = *ajtmp++;
 76:         nzbd++;
 77:         if (idx == i) im[row] = nzbd;
 78:         do {
 79:           m  = fm;
 80:           fm = fill[m];
 81:         } while (fm < idx);
 82:         if (fm != idx) {
 83:           fill[m]   = idx;
 84:           fill[idx] = fm;
 85:           fm        = idx;
 86:           nnz++;
 87:         }
 88:       }
 89:       row = fill[row];
 90:     }
 91:     /* copy new filled row into permanent storage */
 92:     ainew[i+1] = ainew[i] + nnz;
 93:     if (ainew[i+1] > jmax) {

 95:       /* estimate how much additional space we will need */
 96:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
 97:       /* just double the memory each time */
 98:       int maxadd = jmax;
 99:       /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */
100:       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
101:       jmax += maxadd;

103:       /* allocate a longer ajnew */
104:       ierr  = PetscMalloc(jmax*sizeof(int),&ajtmp);
105:       ierr  = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(int));
106:       ierr  = PetscFree(ajnew);
107:       ajnew = ajtmp;
108:       realloc++; /* count how many times we realloc */
109:     }
110:     ajtmp = ajnew + ainew[i];
111:     fm    = fill[n];
112:     nzi   = 0;
113:     im[i] = nnz;
114:     while (nnz--) {
115:       if (fm < i) nzi++;
116:       *ajtmp++ = fm;
117:       fm       = fill[fm];
118:     }
119:     idnew[i] = ainew[i] + nzi;
120:   }

122:   if (ai[n] != 0) {
123:     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
124:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
125:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Run with -pc_lu_fill %g or use n",af);
126:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:PCLUSetFill(pc,%g);n",af);
127:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:for best performance.n");
128:   } else {
129:      PetscLogInfo(A,"MatLUFactorSymbolic_SeqBAIJ:Empty matrix.n");
130:   }

132:   ISRestoreIndices(isrow,&r);
133:   ISRestoreIndices(isicol,&ic);

135:   PetscFree(fill);

137:   /* put together the new matrix */
138:   MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B);
139:   PetscLogObjectParent(*B,isicol);
140:   b = (Mat_SeqBAIJ*)(*B)->data;
141:   PetscFree(b->imax);
142:   b->singlemalloc = PETSC_FALSE;
143:   /* the next line frees the default space generated by the Create() */
144:   PetscFree(b->a);
145:   PetscFree(b->ilen);
146:   ierr          = PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);
147:   b->j          = ajnew;
148:   b->i          = ainew;
149:   b->diag       = idnew;
150:   b->ilen       = 0;
151:   b->imax       = 0;
152:   b->row        = isrow;
153:   b->col        = iscol;
154:   ierr          = PetscObjectReference((PetscObject)isrow);
155:   ierr          = PetscObjectReference((PetscObject)iscol);
156:   b->icol       = isicol;
157:   PetscMalloc((bs*n+bs)*sizeof(Scalar),&b->solve_work);
158:   /* In b structure:  Free imax, ilen, old a, old j.  
159:      Allocate idnew, solve_work, new a, new j */
160:   PetscLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(MatScalar)));
161:   b->maxnz = b->nz = ainew[n];
162: 
163:   (*B)->factor                 = FACTOR_LU;
164:   (*B)->info.factor_mallocs    = realloc;
165:   (*B)->info.fill_ratio_given  = f;
166:   if (ai[n] != 0) {
167:     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
168:   } else {
169:     (*B)->info.fill_ratio_needed = 0.0;
170:   }
171:   return(0);
172: }