MOAB
4.9.3pre
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008-2009 Gael Guennebaud <[email protected]> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H 00011 #define EIGEN_GENERAL_MATRIX_MATRIX_H 00012 00013 namespace Eigen { 00014 00015 namespace internal { 00016 00017 template<typename _LhsScalar, typename _RhsScalar> class level3_blocking; 00018 00019 /* Specialization for a row-major destination matrix => simple transposition of the product */ 00020 template< 00021 typename Index, 00022 typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 00023 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs> 00024 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor> 00025 { 00026 typedef gebp_traits<RhsScalar,LhsScalar> Traits; 00027 00028 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; 00029 static EIGEN_STRONG_INLINE void run( 00030 Index rows, Index cols, Index depth, 00031 const LhsScalar* lhs, Index lhsStride, 00032 const RhsScalar* rhs, Index rhsStride, 00033 ResScalar* res, Index resStride, 00034 ResScalar alpha, 00035 level3_blocking<RhsScalar,LhsScalar>& blocking, 00036 GemmParallelInfo<Index>* info = 0) 00037 { 00038 // transpose the product such that the result is column major 00039 general_matrix_matrix_product<Index, 00040 RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, 00041 LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, 00042 ColMajor> 00043 ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info); 00044 } 00045 }; 00046 00047 /* Specialization for a col-major destination matrix 00048 * => Blocking algorithm following Goto's paper */ 00049 template< 00050 typename Index, 00051 typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 00052 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs> 00053 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor> 00054 { 00055 00056 typedef gebp_traits<LhsScalar,RhsScalar> Traits; 00057 00058 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; 00059 static void run(Index rows, Index cols, Index depth, 00060 const LhsScalar* _lhs, Index lhsStride, 00061 const RhsScalar* _rhs, Index rhsStride, 00062 ResScalar* _res, Index resStride, 00063 ResScalar alpha, 00064 level3_blocking<LhsScalar,RhsScalar>& blocking, 00065 GemmParallelInfo<Index>* info = 0) 00066 { 00067 typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper; 00068 typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper; 00069 typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; 00070 LhsMapper lhs(_lhs,lhsStride); 00071 RhsMapper rhs(_rhs,rhsStride); 00072 ResMapper res(_res, resStride); 00073 00074 Index kc = blocking.kc(); // cache block size along the K direction 00075 Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction 00076 Index nc = (std::min)(cols,blocking.nc()); // cache block size along the N direction 00077 00078 gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 00079 gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; 00080 gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; 00081 00082 #ifdef EIGEN_HAS_OPENMP 00083 if(info) 00084 { 00085 // this is the parallel version! 00086 Index tid = omp_get_thread_num(); 00087 Index threads = omp_get_num_threads(); 00088 00089 LhsScalar* blockA = blocking.blockA(); 00090 eigen_internal_assert(blockA!=0); 00091 00092 std::size_t sizeB = kc*nc; 00093 ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0); 00094 00095 // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs... 00096 for(Index k=0; k<depth; k+=kc) 00097 { 00098 const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A' 00099 00100 // In order to reduce the chance that a thread has to wait for the other, 00101 // let's start by packing B'. 00102 pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc); 00103 00104 // Pack A_k to A' in a parallel fashion: 00105 // each thread packs the sub block A_k,i to A'_i where i is the thread id. 00106 00107 // However, before copying to A'_i, we have to make sure that no other thread is still using it, 00108 // i.e., we test that info[tid].users equals 0. 00109 // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it. 00110 while(info[tid].users!=0) {} 00111 info[tid].users += threads; 00112 00113 pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length); 00114 00115 // Notify the other threads that the part A'_i is ready to go. 00116 info[tid].sync = k; 00117 00118 // Computes C_i += A' * B' per A'_i 00119 for(Index shift=0; shift<threads; ++shift) 00120 { 00121 Index i = (tid+shift)%threads; 00122 00123 // At this point we have to make sure that A'_i has been updated by the thread i, 00124 // we use testAndSetOrdered to mimic a volatile access. 00125 // However, no need to wait for the B' part which has been updated by the current thread! 00126 if (shift>0) { 00127 while(info[i].sync!=k) { 00128 } 00129 } 00130 00131 gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha); 00132 } 00133 00134 // Then keep going as usual with the remaining B' 00135 for(Index j=nc; j<cols; j+=nc) 00136 { 00137 const Index actual_nc = (std::min)(j+nc,cols)-j; 00138 00139 // pack B_k,j to B' 00140 pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc); 00141 00142 // C_j += A' * B' 00143 gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha); 00144 } 00145 00146 // Release all the sub blocks A'_i of A' for the current thread, 00147 // i.e., we simply decrement the number of users by 1 00148 for(Index i=0; i<threads; ++i) 00149 #pragma omp atomic 00150 info[i].users -= 1; 00151 } 00152 } 00153 else 00154 #endif // EIGEN_HAS_OPENMP 00155 { 00156 EIGEN_UNUSED_VARIABLE(info); 00157 00158 // this is the sequential version! 00159 std::size_t sizeA = kc*mc; 00160 std::size_t sizeB = kc*nc; 00161 00162 ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA()); 00163 ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB()); 00164 00165 const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols; 00166 00167 // For each horizontal panel of the rhs, and corresponding panel of the lhs... 00168 for(Index i2=0; i2<rows; i2+=mc) 00169 { 00170 const Index actual_mc = (std::min)(i2+mc,rows)-i2; 00171 00172 for(Index k2=0; k2<depth; k2+=kc) 00173 { 00174 const Index actual_kc = (std::min)(k2+kc,depth)-k2; 00175 00176 // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs. 00177 // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching) 00178 // Note that this panel will be read as many times as the number of blocks in the rhs's 00179 // horizontal panel which is, in practice, a very low number. 00180 pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc); 00181 00182 // For each kc x nc block of the rhs's horizontal panel... 00183 for(Index j2=0; j2<cols; j2+=nc) 00184 { 00185 const Index actual_nc = (std::min)(j2+nc,cols)-j2; 00186 00187 // We pack the rhs's block into a sequential chunk of memory (L2 caching) 00188 // Note that this block will be read a very high number of times, which is equal to the number of 00189 // micro horizontal panel of the large rhs's panel (e.g., rows/12 times). 00190 if((!pack_rhs_once) || i2==0) 00191 pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc); 00192 00193 // Everything is packed, we can now call the panel * block kernel: 00194 gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha); 00195 } 00196 } 00197 } 00198 } 00199 } 00200 00201 }; 00202 00203 /********************************************************************************* 00204 * Specialization of generic_product_impl for "large" GEMM, i.e., 00205 * implementation of the high level wrapper to general_matrix_matrix_product 00206 **********************************************************************************/ 00207 00208 template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType> 00209 struct gemm_functor 00210 { 00211 gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking) 00212 : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking) 00213 {} 00214 00215 void initParallelSession(Index num_threads) const 00216 { 00217 m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads); 00218 m_blocking.allocateA(); 00219 } 00220 00221 void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const 00222 { 00223 if(cols==-1) 00224 cols = m_rhs.cols(); 00225 00226 Gemm::run(rows, cols, m_lhs.cols(), 00227 &m_lhs.coeffRef(row,0), m_lhs.outerStride(), 00228 &m_rhs.coeffRef(0,col), m_rhs.outerStride(), 00229 (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(), 00230 m_actualAlpha, m_blocking, info); 00231 } 00232 00233 typedef typename Gemm::Traits Traits; 00234 00235 protected: 00236 const Lhs& m_lhs; 00237 const Rhs& m_rhs; 00238 Dest& m_dest; 00239 Scalar m_actualAlpha; 00240 BlockingType& m_blocking; 00241 }; 00242 00243 template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1, 00244 bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space; 00245 00246 template<typename _LhsScalar, typename _RhsScalar> 00247 class level3_blocking 00248 { 00249 typedef _LhsScalar LhsScalar; 00250 typedef _RhsScalar RhsScalar; 00251 00252 protected: 00253 LhsScalar* m_blockA; 00254 RhsScalar* m_blockB; 00255 00256 Index m_mc; 00257 Index m_nc; 00258 Index m_kc; 00259 00260 public: 00261 00262 level3_blocking() 00263 : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0) 00264 {} 00265 00266 inline Index mc() const { return m_mc; } 00267 inline Index nc() const { return m_nc; } 00268 inline Index kc() const { return m_kc; } 00269 00270 inline LhsScalar* blockA() { return m_blockA; } 00271 inline RhsScalar* blockB() { return m_blockB; } 00272 }; 00273 00274 template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor> 00275 class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */> 00276 : public level3_blocking< 00277 typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type, 00278 typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type> 00279 { 00280 enum { 00281 Transpose = StorageOrder==RowMajor, 00282 ActualRows = Transpose ? MaxCols : MaxRows, 00283 ActualCols = Transpose ? MaxRows : MaxCols 00284 }; 00285 typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar; 00286 typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar; 00287 typedef gebp_traits<LhsScalar,RhsScalar> Traits; 00288 enum { 00289 SizeA = ActualRows * MaxDepth, 00290 SizeB = ActualCols * MaxDepth 00291 }; 00292 00293 #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES 00294 EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA]; 00295 EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB]; 00296 #else 00297 EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1]; 00298 EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1]; 00299 #endif 00300 00301 public: 00302 00303 gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/) 00304 { 00305 this->m_mc = ActualRows; 00306 this->m_nc = ActualCols; 00307 this->m_kc = MaxDepth; 00308 #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES 00309 this->m_blockA = m_staticA; 00310 this->m_blockB = m_staticB; 00311 #else 00312 this->m_blockA = reinterpret_cast<LhsScalar*>((std::size_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); 00313 this->m_blockB = reinterpret_cast<RhsScalar*>((std::size_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); 00314 #endif 00315 } 00316 00317 void initParallel(Index, Index, Index, Index) 00318 {} 00319 00320 inline void allocateA() {} 00321 inline void allocateB() {} 00322 inline void allocateAll() {} 00323 }; 00324 00325 template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor> 00326 class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false> 00327 : public level3_blocking< 00328 typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type, 00329 typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type> 00330 { 00331 enum { 00332 Transpose = StorageOrder==RowMajor 00333 }; 00334 typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar; 00335 typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar; 00336 typedef gebp_traits<LhsScalar,RhsScalar> Traits; 00337 00338 Index m_sizeA; 00339 Index m_sizeB; 00340 00341 public: 00342 00343 gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking) 00344 { 00345 this->m_mc = Transpose ? cols : rows; 00346 this->m_nc = Transpose ? rows : cols; 00347 this->m_kc = depth; 00348 00349 if(l3_blocking) 00350 { 00351 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads); 00352 } 00353 else // no l3 blocking 00354 { 00355 Index n = this->m_nc; 00356 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads); 00357 } 00358 00359 m_sizeA = this->m_mc * this->m_kc; 00360 m_sizeB = this->m_kc * this->m_nc; 00361 } 00362 00363 void initParallel(Index rows, Index cols, Index depth, Index num_threads) 00364 { 00365 this->m_mc = Transpose ? cols : rows; 00366 this->m_nc = Transpose ? rows : cols; 00367 this->m_kc = depth; 00368 00369 eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0); 00370 Index m = this->m_mc; 00371 computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads); 00372 m_sizeA = this->m_mc * this->m_kc; 00373 m_sizeB = this->m_kc * this->m_nc; 00374 } 00375 00376 void allocateA() 00377 { 00378 if(this->m_blockA==0) 00379 this->m_blockA = aligned_new<LhsScalar>(m_sizeA); 00380 } 00381 00382 void allocateB() 00383 { 00384 if(this->m_blockB==0) 00385 this->m_blockB = aligned_new<RhsScalar>(m_sizeB); 00386 } 00387 00388 void allocateAll() 00389 { 00390 allocateA(); 00391 allocateB(); 00392 } 00393 00394 ~gemm_blocking_space() 00395 { 00396 aligned_delete(this->m_blockA, m_sizeA); 00397 aligned_delete(this->m_blockB, m_sizeB); 00398 } 00399 }; 00400 00401 } // end namespace internal 00402 00403 namespace internal { 00404 00405 template<typename Lhs, typename Rhs> 00406 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> 00407 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> > 00408 { 00409 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00410 typedef typename Lhs::Scalar LhsScalar; 00411 typedef typename Rhs::Scalar RhsScalar; 00412 00413 typedef internal::blas_traits<Lhs> LhsBlasTraits; 00414 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; 00415 typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned; 00416 00417 typedef internal::blas_traits<Rhs> RhsBlasTraits; 00418 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; 00419 typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; 00420 00421 enum { 00422 MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime) 00423 }; 00424 00425 typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct; 00426 00427 template<typename Dst> 00428 static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00429 { 00430 if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) 00431 lazyproduct::evalTo(dst, lhs, rhs); 00432 else 00433 { 00434 dst.setZero(); 00435 scaleAndAddTo(dst, lhs, rhs, Scalar(1)); 00436 } 00437 } 00438 00439 template<typename Dst> 00440 static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00441 { 00442 if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) 00443 lazyproduct::addTo(dst, lhs, rhs); 00444 else 00445 scaleAndAddTo(dst,lhs, rhs, Scalar(1)); 00446 } 00447 00448 template<typename Dst> 00449 static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00450 { 00451 if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) 00452 lazyproduct::subTo(dst, lhs, rhs); 00453 else 00454 scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); 00455 } 00456 00457 template<typename Dest> 00458 static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) 00459 { 00460 eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols()); 00461 if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0) 00462 return; 00463 00464 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); 00465 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); 00466 00467 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) 00468 * RhsBlasTraits::extractScalarFactor(a_rhs); 00469 00470 typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar, 00471 Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType; 00472 00473 typedef internal::gemm_functor< 00474 Scalar, Index, 00475 internal::general_matrix_matrix_product< 00476 Index, 00477 LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate), 00478 RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate), 00479 (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>, 00480 ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor; 00481 00482 BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true); 00483 internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)> 00484 (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit); 00485 } 00486 }; 00487 00488 } // end namespace internal 00489 00490 } // end namespace Eigen 00491 00492 #endif // EIGEN_GENERAL_MATRIX_MATRIX_H