fathom/moab-docs/MsqHessian_8cpp_source.html

00001 /* *****************************************************************
00002     MESQUITE -- The Mesh Quality Improvement Toolkit
00003
00004     Copyright 2004 Sandia Corporation and Argonne National
00005     Laboratory.  Under the terms of Contract DE-AC04-94AL85000
00006     with Sandia Corporation, the U.S. Government retains certain
00007     rights in this software.
00008
00009     This library is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU Lesser General Public
00011     License as published by the Free Software Foundation; either
00012     version 2.1 of the License, or (at your option) any later version.
00013
00014     This library is distributed in the hope that it will be useful,
00015     but WITHOUT ANY WARRANTY; without even the implied warranty of
00016     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017     Lesser General Public License for more details.
00018
00019     You should have received a copy of the GNU Lesser General Public License
00020     (lgpl.txt) along with this library; if not, write to the Free Software
00021     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022
00023     [email protected], [email protected], [email protected],
00024     [email protected], [email protected], [email protected]
00025
00026   ***************************************************************** */
00027 // -*- Mode : c++; tab-width: 3; c-tab-always-indent: t; indent-tabs-mode: nil; c-basic-offset: 3
00028 // -*-
00029 //
00030 //    AUTHOR: Todd Munson <[email protected]>
00031 //       ORG: Argonne National Laboratory
00032 //    E-MAIL: [email protected]
00033 //
00034 // ORIG-DATE:  2-Jan-03 at 11:02:19 by Thomas Leurent
00035 //  LAST-MOD: 26-Nov-03 at 15:47:42 by Thomas Leurent
00036 //
00037 // DESCRIPTION:
00038 // ============
00039 /*! \file MsqHessian.cpp
00040
00041   \author Thomas Leurent
00042
00043 */
00044
00045 #include "MsqHessian.hpp"
00046 #include "MsqTimer.hpp"
00047
00048 #include <cmath>
00049 #include <iostream>
00050
00051 namespace MBMesquite
00052 {
00053
00054 MsqHessian::MsqHessian()
00055     : mEntries( 0 ), mRowStart( 0 ), mColIndex( 0 ), mSize( 0 ), mPreconditioner( 0 ), precondArraySize( 0 ), mR( 0 ),
00056       mZ( 0 ), mP( 0 ), mW( 0 ), cgArraySizes( 0 ), maxCGiter( 50 )
00057 {
00058 }
00059
00060 MsqHessian::~MsqHessian()
00061 {
00062     clear();
00063 }
00064
00065 void MsqHessian::clear()
00066 {
00067     mSize = precondArraySize = cgArraySizes = 0;
00068
00069     delete[] mEntries;
00070     mEntries = 0;
00071     delete[] mRowStart;
00072     mRowStart = 0;
00073     delete[] mColIndex;
00074     mColIndex = 0;
00075
00076     delete[] mPreconditioner;
00077     mPreconditioner = 0;
00078
00079     delete[] mR;
00080     mR = 0;
00081     delete[] mZ;
00082     mZ = 0;
00083     delete[] mP;
00084     mP = 0;
00085     delete[] mW;
00086     mW = 0;
00087 }
00088
00089 /*! \brief creates a sparse structure for a Hessian, based on the
00090   connectivity information contained in the PatchData.
00091   Only the upper triangular part of the Hessian is stored. */
00092 void MsqHessian::initialize( PatchData& pd, MsqError& err )
00093 {
00094     MSQ_FUNCTION_TIMER( "MsqHession::initialize" );
00095     delete[] mEntries;
00096     delete[] mRowStart;
00097     delete[] mColIndex;
00098
00099     size_t num_vertices = pd.num_free_vertices();
00100     size_t num_elements = pd.num_elements();
00101     size_t const* vtx_list;
00102     size_t e, r, rs, re, c, cs, ce, nz, nnz, nve, i, j;
00103     MsqMeshEntity* patchElemArray = pd.get_element_array( err );MSQ_CHKERR( err );
00104
00105     if( num_vertices == 0 )
00106     {
00107         MSQ_SETERR( err )( "No vertices in PatchData", MsqError::INVALID_ARG );
00108         return;
00109     }
00110
00111     mSize = num_vertices;
00112
00113     // Calculate the offsets for a CSC representation of the accumulation
00114     // pattern.
00115
00116     size_t* col_start = new size_t[num_vertices + 1];
00117     // mAccumElemStart = new size_t[num_elements+1];
00118     // mAccumElemStart[0] = 0;
00119
00120     for( i = 0; i < num_vertices; ++i )
00121     {
00122         col_start[i] = 0;
00123     }
00124
00125     for( e = 0; e < num_elements; ++e )
00126     {
00127         nve      = patchElemArray[e].node_count();
00128         vtx_list = patchElemArray[e].get_vertex_index_array();
00129         int nfe  = 0;
00130
00131         for( i = 0; i < nve; ++i )
00132         {
00133             r = vtx_list[i];
00134             if( r < num_vertices ) ++nfe;
00135
00136             for( j = i; j < nve; ++j )
00137             {
00138                 c = vtx_list[j];
00139
00140                 if( r <= c )
00141                 {
00142                     if( c < num_vertices ) ++col_start[c];
00143                 }
00144                 else
00145                 {
00146                     if( r < num_vertices ) ++col_start[r];
00147                 }
00148             }
00149         }
00150         // mAccumElemStart[e+1] = mAccumElemStart[e] + (nfe+1)*nfe/2;
00151     }
00152
00153     nz = 0;
00154     for( i = 0; i < num_vertices; ++i )
00155     {
00156         j            = col_start[i];
00157         col_start[i] = nz;
00158         nz += j;
00159     }
00160     col_start[i] = nz;
00161
00162     // Finished putting matrix into CSC representation
00163
00164     int* row_instr    = new int[5 * nz];
00165     size_t* row_index = new size_t[nz];
00166
00167     nz = 0;
00168     for( e = 0; e < num_elements; ++e )
00169     {
00170         nve      = patchElemArray[e].node_count();
00171         vtx_list = patchElemArray[e].get_vertex_index_array();
00172
00173         for( i = 0; i < nve; ++i )
00174         {
00175             r = vtx_list[i];
00176
00177             for( j = i; j < nve; ++j )
00178             {
00179                 c = vtx_list[j];
00180
00181                 if( r <= c )
00182                 {
00183                     if( c < num_vertices )
00184                     {
00185                         row_index[col_start[c]] = r;
00186                         row_instr[col_start[c]] = nz++;
00187                         ++col_start[c];
00188                     }
00189                 }
00190                 else
00191                 {
00192                     if( r < num_vertices )
00193                     {
00194                         row_index[col_start[r]] = c;
00195                         // can't use -nz, but can negate row_instr[col_start[r]]
00196                         row_instr[col_start[r]] = nz++;
00197                         row_instr[col_start[r]] = -row_instr[col_start[r]];
00198                         ++col_start[r];
00199                     }
00200                 }
00201             }
00202         }
00203     }
00204
00205     for( i = num_vertices - 1; i > 0; --i )
00206     {
00207         col_start[i + 1] = col_start[i];
00208     }
00209     col_start[1] = col_start[0];
00210     col_start[0] = 0;
00211
00212     //   cout << "col_start: ";
00213     //   for (int t=0; t<num_vertices+1; ++t)
00214     //     cout << col_start[t] << " ";
00215     //   cout << endl;
00216     //   cout << "row_index: ";
00217     //   for (int t=0; t<nz; ++t)
00218     //     cout << row_index[t] << " ";
00219     //   cout << endl;
00220     //   cout << "row_instr: ";
00221     //   for (int t=0; t<nz; ++t)
00222     //     cout << row_instr[t] << " ";
00223     //   cout << endl;
00224
00225     // Convert CSC to CSR
00226     // First calculate the offsets in the row
00227
00228     size_t* row_start = new size_t[num_vertices + 1];
00229
00230     for( i = 0; i < num_vertices; ++i )
00231     {
00232         row_start[i] = 0;
00233     }
00234
00235     for( i = 0; i < nz; ++i )
00236     {
00237         ++row_start[row_index[i]];
00238     }
00239
00240     nz = 0;
00241     for( i = 0; i < num_vertices; ++i )
00242     {
00243         j            = row_start[i];
00244         row_start[i] = nz;
00245         nz += j;
00246     }
00247     row_start[i] = nz;
00248
00249     // Now calculate the pattern
00250
00251     size_t* col_index = new size_t[nz];
00252     int* col_instr    = new int[nz];
00253
00254     for( i = 0; i < num_vertices; ++i )
00255     {
00256         cs = col_start[i];
00257         ce = col_start[i + 1];
00258
00259         while( cs < ce )
00260         {
00261             r = row_index[cs];
00262
00263             col_index[row_start[r]] = i;
00264             col_instr[row_start[r]] = row_instr[cs];
00265
00266             ++row_start[r];
00267             ++cs;
00268         }
00269     }
00270
00271     for( i = num_vertices - 1; i > 0; --i )
00272     {
00273         row_start[i + 1] = row_start[i];
00274     }
00275     row_start[1] = row_start[0];
00276     row_start[0] = 0;
00277
00278     delete[] row_index;
00279
00280     // Now that the matrix is CSR
00281     // Column indices for each row are sorted
00282
00283     // Compaction -- count the number of nonzeros
00284     mRowStart = col_start;  // don't need to reallocate
00285     // mAccumulation = row_instr;   // don't need to reallocate
00286     delete[] row_instr;
00287
00288     for( i = 0; i <= num_vertices; ++i )
00289     {
00290         mRowStart[i] = 0;
00291     }
00292
00293     nnz = 0;
00294     for( i = 0; i < num_vertices; ++i )
00295     {
00296         rs = row_start[i];
00297         re = row_start[i + 1];
00298
00299         c = num_vertices;
00300         while( rs < re )
00301         {
00302             if( c != col_index[rs] )
00303             {
00304                 // This is an unseen nonzero
00305
00306                 c = col_index[rs];
00307                 ++mRowStart[i];
00308                 ++nnz;
00309             }
00310
00311             // if (col_instr[rs] >= 0) {
00312             //  mAccumulation[col_instr[rs]] = nnz - 1;
00313             //}
00314             // else {
00315             //  mAccumulation[-col_instr[rs]] = 1 - nnz;
00316             //}
00317
00318             ++rs;
00319         }
00320     }
00321
00322     nnz = 0;
00323     for( i = 0; i < num_vertices; ++i )
00324     {
00325         j            = mRowStart[i];
00326         mRowStart[i] = nnz;
00327         nnz += j;
00328     }
00329     mRowStart[i] = nnz;
00330
00331     delete[] col_instr;
00332
00333     // Fill in the compacted hessian matrix
00334
00335     mColIndex = new size_t[nnz];
00336
00337     for( i = 0; i < num_vertices; ++i )
00338     {
00339         rs = row_start[i];
00340         re = row_start[i + 1];
00341
00342         c = num_vertices;
00343         while( rs < re )
00344         {
00345             if( c != col_index[rs] )
00346             {
00347                 // This is an unseen nonzero
00348
00349                 c                       = col_index[rs];
00350                 mColIndex[mRowStart[i]] = c;
00351                 mRowStart[i]++;
00352             }
00353             ++rs;
00354         }
00355     }
00356
00357     for( i = num_vertices - 1; i > 0; --i )
00358     {
00359         mRowStart[i + 1] = mRowStart[i];
00360     }
00361     mRowStart[1] = mRowStart[0];
00362     mRowStart[0] = 0;
00363
00364     delete[] row_start;
00365     delete[] col_index;
00366
00367     mEntries = new Matrix3D[nnz];  // On Solaris, no initializer allowed for new of an array
00368     for( i = 0; i < nnz; ++i )
00369         mEntries[i] = 0.;  // so we initialize all entries manually.
00370
00371     // origin_pd = &pd;
00372
00373     return;
00374 }
00375
00376 void MsqHessian::initialize( const MsqHessian& other )
00377 {
00378     if( !other.mSize )
00379     {
00380         delete[] mEntries;
00381         delete[] mRowStart;
00382         delete[] mColIndex;
00383         mEntries  = 0;
00384         mRowStart = 0;
00385         mColIndex = 0;
00386         mSize     = 0;
00387         return;
00388     }
00389
00390     if( mSize != other.mSize || mRowStart[mSize] != other.mRowStart[mSize] )
00391     {
00392         delete[] mEntries;
00393         delete[] mRowStart;
00394         delete[] mColIndex;
00395
00396         mSize = other.mSize;
00397
00398         mRowStart = new size_t[mSize + 1];
00399         mEntries  = new Matrix3D[other.mRowStart[mSize]];
00400         mColIndex = new size_t[other.mRowStart[mSize]];
00401     }
00402
00403     memcpy( mRowStart, other.mRowStart, sizeof( size_t ) * ( mSize + 1 ) );
00404     memcpy( mColIndex, other.mColIndex, sizeof( size_t ) * mRowStart[mSize] );
00405 }
00406
00407 void MsqHessian::add( const MsqHessian& other )
00408 {
00409     assert( mSize == other.mSize );
00410     assert( !memcmp( mRowStart, other.mRowStart, sizeof( size_t ) * ( mSize + 1 ) ) );
00411     assert( !memcmp( mColIndex, other.mColIndex, sizeof( size_t ) * mRowStart[mSize] ) );
00412     for( unsigned i = 0; i < mRowStart[mSize]; ++i )
00413         mEntries[i] += other.mEntries[i];
00414 }
00415
00416 /*! \param diag is an STL vector of size MsqHessian::size() . */
00417 void MsqHessian::get_diagonal_blocks( std::vector< Matrix3D >& diag, MsqError& /*err*/ ) const
00418 {
00419     // make sure we have enough memory, so that no reallocation is needed later.
00420     if( diag.size() != size() )
00421     {
00422         diag.reserve( size() );
00423     }
00424
00425     for( size_t i = 0; i < size(); ++i )
00426     {
00427         diag[i] = mEntries[mRowStart[i]];
00428     }
00429 }
00430
00431 /*! compute a preconditioner used in the preconditioned conjugate gradient
00432   algebraic solver. In fact, this computes \f$ M^{-1} \f$ .
00433 */
00434 void MsqHessian::compute_preconditioner( MsqError& /*err*/ )
00435 {
00436     // reallocates arrays if size of the Hessian has changed too much.
00437     if( mSize > precondArraySize || mSize < precondArraySize / 10 )
00438     {
00439         delete[] mPreconditioner;
00440         mPreconditioner = new Matrix3D[mSize];
00441     }
00442
00443     Matrix3D* diag_block;
00444     double sum, tmp;
00445     size_t m;
00446     // For each diagonal block, the (inverted) preconditioner is
00447     // the inverse of the sum of the diagonal entries.
00448     for( m = 0; m < mSize; ++m )
00449     {
00450         diag_block = mEntries + mRowStart[m];  // Gets block at position m,m .
00451
00452 #if !DIAGONAL_PRECONDITIONER
00453         // calculate LDL^T factorization of the diagonal block
00454         //  L = [1 pre[0][1] pre[0][2]]
00455         //      [0 1         pre[1][2]]
00456         //      [0 0         1        ]
00457         //  inv(D) = [pre[0][0] 0         0        ]
00458         //           [0         pre[1][1] 0        ]
00459         //           [0         0         pre[2][2]]
00460
00461         // If the first method of calculating a preconditioner fails,
00462         // use the diagonal method.
00463         bool use_diag = false;
00464
00465         if( fabs( ( *diag_block )[0][0] ) < DBL_EPSILON )
00466         {
00467             use_diag = true;
00468         }
00469         else
00470         {
00471             mPreconditioner[m][0][0] = 1.0 / ( *diag_block )[0][0];
00472             mPreconditioner[m][0][1] = ( *diag_block )[0][1] * mPreconditioner[m][0][0];
00473             mPreconditioner[m][0][2] = ( *diag_block )[0][2] * mPreconditioner[m][0][0];
00474             sum                      = ( ( *diag_block )[1][1] - ( *diag_block )[0][1] * mPreconditioner[m][0][1] );
00475             if( fabs( sum ) <= DBL_EPSILON )
00476                 use_diag = true;
00477             else
00478             {
00479                 mPreconditioner[m][1][1] = 1.0 / sum;
00480
00481                 tmp = ( *diag_block )[1][2] - ( *diag_block )[0][2] * mPreconditioner[m][0][1];
00482
00483                 mPreconditioner[m][1][2] = mPreconditioner[m][1][1] * tmp;
00484
00485                 sum = ( ( *diag_block )[2][2] - ( *diag_block )[0][2] * mPreconditioner[m][0][2] -
00486                         mPreconditioner[m][1][2] * tmp );
00487
00488                 if( fabs( sum ) <= DBL_EPSILON )
00489                     use_diag = true;
00490                 else
00491                     mPreconditioner[m][2][2] = 1.0 / sum;
00492             }
00493         }
00494         if( use_diag )
00495 #endif
00496         {
00497             // Either this is a fixed vertex or the diagonal block is not
00498             // invertible.  Switch to the diagonal preconditioner in this
00499             // case.
00500
00501             sum = ( *diag_block )[0][0] + ( *diag_block )[1][1] + ( *diag_block )[2][2];
00502             if( fabs( sum ) > DBL_EPSILON )
00503                 sum = 1 / sum;
00504             else
00505                 sum = 0.0;
00506
00507             mPreconditioner[m][0][0] = sum;
00508             mPreconditioner[m][0][1] = 0.0;
00509             mPreconditioner[m][0][2] = 0.0;
00510             mPreconditioner[m][1][1] = sum;
00511             mPreconditioner[m][1][2] = 0.0;
00512             mPreconditioner[m][2][2] = sum;
00513         }
00514     }
00515 }
00516
00517 /*! uses the preconditionned conjugate gradient algebraic solver
00518   to find d in \f$ H * d = -g \f$ .
00519   \param x : the solution, usually the descent direction d.
00520   \param b : -b will be the right hand side. Usually b is the gradient.
00521 */
00522 void MsqHessian::cg_solver( Vector3D x[], Vector3D b[], MsqError& err )
00523 {
00524     MSQ_FUNCTION_TIMER( "MsqHessian::cg_solver" );
00525
00526     // reallocates arrays if size of the Hessian has changed too much.
00527     if( mSize > cgArraySizes || mSize < cgArraySizes / 10 )
00528     {
00529         delete[] mR;
00530         delete[] mZ;
00531         delete[] mP;
00532         delete[] mW;
00533         mR           = new Vector3D[mSize];
00534         mZ           = new Vector3D[mSize];
00535         mP           = new Vector3D[mSize];
00536         mW           = new Vector3D[mSize];
00537         cgArraySizes = mSize;
00538     }
00539
00540     size_t i;
00541     double alpha_, alpha, beta;
00542     double cg_tol = 1e-2;  // 1e-2 will give a reasonably good solution (~1%).
00543     double norm_g = length( b, mSize );
00544     double norm_r = norm_g;
00545     double rzm1;  // r^T_{k-1} z_{k-1}
00546     double rzm2;  // r^T_{k-2} z_{k-2}
00547
00548     this->compute_preconditioner( err );MSQ_CHKERR( err );  // get M^{-1} for diagonal blocks
00549
00550     for( i = 0; i < mSize; ++i )
00551         x[i] = 0.;
00552     for( i = 0; i < mSize; ++i )
00553         mR[i] = -b[i];  // r = -b because x_0 = 0 and we solve H*x = -b
00554     norm_g *= cg_tol;
00555
00556     this->apply_preconditioner( mZ, mR, err );  // solve Mz = r (computes z = M^-1 r)
00557     for( i = 0; i < mSize; ++i )
00558         mP[i] = mZ[i];              // p_1 = z_0
00559     rzm1 = inner( mZ, mR, mSize );  // inner product r_{k-1}^T z_{k-1}
00560
00561     size_t cg_iter = 0;
00562     while( ( norm_r > norm_g ) && ( cg_iter < maxCGiter ) )
00563     {
00564         ++cg_iter;
00565
00566         axpy( mW, mSize, *this, mP, mSize, 0, 0, err );  // w = A * p_k
00567
00568         alpha_ = inner( mP, mW, mSize );  // alpha_ = p_k^T A p_k
00569         if( alpha_ <= 0.0 )
00570         {
00571             if( 1 == cg_iter )
00572             {
00573                 for( i = 0; i < mSize; ++i )
00574                     x[i] += mP[i];  // x_{k+1} = x_k + p_{k+1}
00575             }
00576             break;  // Newton goes on with this direction of negative curvature
00577         }
00578
00579         alpha = rzm1 / alpha_;
00580
00581         for( i = 0; i < mSize; ++i )
00582             x[i] += alpha * mP[i];  // x_{k+1} = x_k + alpha_{k+1} p_{k+1}
00583         for( i = 0; i < mSize; ++i )
00584             mR[i] -= alpha * mW[i];  // r_{k+1} = r_k - alpha_{k+1} A p_{k+1}
00585         norm_r = length( mR, mSize );
00586
00587         this->apply_preconditioner( mZ, mR, err );  // solve Mz = r (computes z = M^-1 r)
00588
00589         rzm2 = rzm1;
00590         rzm1 = inner( mZ, mR, mSize );  // inner product r_{k-1}^T z_{k-1}
00591         beta = rzm1 / rzm2;
00592         for( i = 0; i < mSize; ++i )
00593             mP[i] = mZ[i] + beta * mP[i];  // p_k = z_{k-1} + Beta_k * p_{k-1}
00594     }
00595 }
00596
00597 void MsqHessian::product( Vector3D* v, const Vector3D* x ) const
00598 {
00599     // zero output array
00600     memset( v, 0, size() * sizeof( *v ) );
00601
00602     // for each row
00603     const Matrix3D* m_iter = mEntries;
00604     const size_t* c_iter   = mColIndex;
00605     for( size_t r = 0; r < size(); ++r )
00606     {
00607
00608         // diagonal entry
00609         plusEqAx( v[r], *m_iter, x[r] );
00610         ++m_iter;
00611         ++c_iter;
00612
00613         // off-diagonal entires
00614         for( size_t c = mRowStart[r] + 1; c != mRowStart[r + 1]; ++c )
00615         {
00616             plusEqAx( v[r], *m_iter, x[*c_iter] );
00617             plusEqTransAx( v[*c_iter], *m_iter, x[r] );
00618             ++m_iter;
00619             ++c_iter;
00620         }
00621     }
00622 }
00623
00624 /* ------------------ I/O ----------------- */
00625
00626 //! Prints out the MsqHessian blocks.
00627 std::ostream& operator<<( std::ostream& s, const MsqHessian& h )
00628 {
00629     size_t i, j;
00630     s << "MsqHessian of size: " << h.mSize << "x" << h.mSize << "\n";
00631     for( i = 0; i < h.mSize; ++i )
00632     {
00633         s << " ROW " << i << " ------------------------\n";
00634         for( j = h.mRowStart[i]; j < h.mRowStart[i + 1]; ++j )
00635         {
00636             s << "   column " << h.mColIndex[j] << " ----\n";
00637             s << h.mEntries[j];
00638         }
00639     }
00640     return s;
00641 }
00642
00643 }  // namespace MBMesquite