MOAB  4.9.3pre
GeneralMatrixMatrix.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008-2009 Gael Guennebaud <[email protected]>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
00011 #define EIGEN_GENERAL_MATRIX_MATRIX_H
00012 
00013 namespace Eigen { 
00014 
00015 namespace internal {
00016 
00017 template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
00018 
00019 /* Specialization for a row-major destination matrix => simple transposition of the product */
00020 template<
00021   typename Index,
00022   typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
00023   typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
00024 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
00025 {
00026   typedef gebp_traits<RhsScalar,LhsScalar> Traits;
00027   
00028   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
00029   static EIGEN_STRONG_INLINE void run(
00030     Index rows, Index cols, Index depth,
00031     const LhsScalar* lhs, Index lhsStride,
00032     const RhsScalar* rhs, Index rhsStride,
00033     ResScalar* res, Index resStride,
00034     ResScalar alpha,
00035     level3_blocking<RhsScalar,LhsScalar>& blocking,
00036     GemmParallelInfo<Index>* info = 0)
00037   {
00038     // transpose the product such that the result is column major
00039     general_matrix_matrix_product<Index,
00040       RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
00041       LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
00042       ColMajor>
00043     ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking,info);
00044   }
00045 };
00046 
00047 /*  Specialization for a col-major destination matrix
00048  *    => Blocking algorithm following Goto's paper */
00049 template<
00050   typename Index,
00051   typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
00052   typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
00053 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
00054 {
00055 
00056 typedef gebp_traits<LhsScalar,RhsScalar> Traits;
00057   
00058 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
00059 static void run(Index rows, Index cols, Index depth,
00060   const LhsScalar* _lhs, Index lhsStride,
00061   const RhsScalar* _rhs, Index rhsStride,
00062   ResScalar* _res, Index resStride,
00063   ResScalar alpha,
00064   level3_blocking<LhsScalar,RhsScalar>& blocking,
00065   GemmParallelInfo<Index>* info = 0)
00066 {
00067   typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
00068   typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
00069   typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
00070   LhsMapper lhs(_lhs,lhsStride);
00071   RhsMapper rhs(_rhs,rhsStride);
00072   ResMapper res(_res, resStride);
00073 
00074   Index kc = blocking.kc();                   // cache block size along the K direction
00075   Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
00076   Index nc = (std::min)(cols,blocking.nc());  // cache block size along the N direction
00077 
00078   gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00079   gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
00080   gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
00081 
00082 #ifdef EIGEN_HAS_OPENMP
00083   if(info)
00084   {
00085     // this is the parallel version!
00086     Index tid = omp_get_thread_num();
00087     Index threads = omp_get_num_threads();
00088     
00089     LhsScalar* blockA = blocking.blockA();
00090     eigen_internal_assert(blockA!=0);
00091     
00092     std::size_t sizeB = kc*nc;
00093     ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
00094       
00095     // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
00096     for(Index k=0; k<depth; k+=kc)
00097     {
00098       const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
00099 
00100       // In order to reduce the chance that a thread has to wait for the other,
00101       // let's start by packing B'.
00102       pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);
00103 
00104       // Pack A_k to A' in a parallel fashion:
00105       // each thread packs the sub block A_k,i to A'_i where i is the thread id.
00106 
00107       // However, before copying to A'_i, we have to make sure that no other thread is still using it,
00108       // i.e., we test that info[tid].users equals 0.
00109       // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
00110       while(info[tid].users!=0) {}
00111       info[tid].users += threads;
00112 
00113       pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
00114 
00115       // Notify the other threads that the part A'_i is ready to go.
00116       info[tid].sync = k;
00117       
00118       // Computes C_i += A' * B' per A'_i
00119       for(Index shift=0; shift<threads; ++shift)
00120       {
00121         Index i = (tid+shift)%threads;
00122 
00123         // At this point we have to make sure that A'_i has been updated by the thread i,
00124         // we use testAndSetOrdered to mimic a volatile access.
00125         // However, no need to wait for the B' part which has been updated by the current thread!
00126         if (shift>0) {
00127           while(info[i].sync!=k) {
00128           }
00129         }
00130 
00131         gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
00132       }
00133 
00134       // Then keep going as usual with the remaining B'
00135       for(Index j=nc; j<cols; j+=nc)
00136       {
00137         const Index actual_nc = (std::min)(j+nc,cols)-j;
00138 
00139         // pack B_k,j to B'
00140         pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);
00141 
00142         // C_j += A' * B'
00143         gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
00144       }
00145 
00146       // Release all the sub blocks A'_i of A' for the current thread,
00147       // i.e., we simply decrement the number of users by 1
00148       for(Index i=0; i<threads; ++i)
00149         #pragma omp atomic
00150         info[i].users -= 1;
00151     }
00152   }
00153   else
00154 #endif // EIGEN_HAS_OPENMP
00155   {
00156     EIGEN_UNUSED_VARIABLE(info);
00157 
00158     // this is the sequential version!
00159     std::size_t sizeA = kc*mc;
00160     std::size_t sizeB = kc*nc;
00161 
00162     ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
00163     ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
00164     
00165     const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
00166 
00167     // For each horizontal panel of the rhs, and corresponding panel of the lhs...
00168     for(Index i2=0; i2<rows; i2+=mc)
00169     {
00170       const Index actual_mc = (std::min)(i2+mc,rows)-i2;
00171 
00172       for(Index k2=0; k2<depth; k2+=kc)
00173       {
00174         const Index actual_kc = (std::min)(k2+kc,depth)-k2;
00175         
00176         // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
00177         // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
00178         // Note that this panel will be read as many times as the number of blocks in the rhs's
00179         // horizontal panel which is, in practice, a very low number.
00180         pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
00181         
00182         // For each kc x nc block of the rhs's horizontal panel...
00183         for(Index j2=0; j2<cols; j2+=nc)
00184         {
00185           const Index actual_nc = (std::min)(j2+nc,cols)-j2;
00186           
00187           // We pack the rhs's block into a sequential chunk of memory (L2 caching)
00188           // Note that this block will be read a very high number of times, which is equal to the number of
00189           // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
00190           if((!pack_rhs_once) || i2==0)
00191             pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
00192           
00193           // Everything is packed, we can now call the panel * block kernel:
00194           gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
00195         }
00196       }
00197     }
00198   }
00199 }
00200 
00201 };
00202 
00203 /*********************************************************************************
00204 *  Specialization of generic_product_impl for "large" GEMM, i.e.,
00205 *  implementation of the high level wrapper to general_matrix_matrix_product
00206 **********************************************************************************/
00207 
00208 template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
00209 struct gemm_functor
00210 {
00211   gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
00212     : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
00213   {}
00214 
00215   void initParallelSession(Index num_threads) const
00216   {
00217     m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
00218     m_blocking.allocateA();
00219   }
00220 
00221   void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
00222   {
00223     if(cols==-1)
00224       cols = m_rhs.cols();
00225 
00226     Gemm::run(rows, cols, m_lhs.cols(),
00227               &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
00228               &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
00229               (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
00230               m_actualAlpha, m_blocking, info);
00231   }
00232   
00233   typedef typename Gemm::Traits Traits;
00234 
00235   protected:
00236     const Lhs& m_lhs;
00237     const Rhs& m_rhs;
00238     Dest& m_dest;
00239     Scalar m_actualAlpha;
00240     BlockingType& m_blocking;
00241 };
00242 
00243 template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
00244 bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
00245 
00246 template<typename _LhsScalar, typename _RhsScalar>
00247 class level3_blocking
00248 {
00249     typedef _LhsScalar LhsScalar;
00250     typedef _RhsScalar RhsScalar;
00251 
00252   protected:
00253     LhsScalar* m_blockA;
00254     RhsScalar* m_blockB;
00255 
00256     Index m_mc;
00257     Index m_nc;
00258     Index m_kc;
00259 
00260   public:
00261 
00262     level3_blocking()
00263       : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
00264     {}
00265 
00266     inline Index mc() const { return m_mc; }
00267     inline Index nc() const { return m_nc; }
00268     inline Index kc() const { return m_kc; }
00269 
00270     inline LhsScalar* blockA() { return m_blockA; }
00271     inline RhsScalar* blockB() { return m_blockB; }
00272 };
00273 
00274 template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
00275 class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
00276   : public level3_blocking<
00277       typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
00278       typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
00279 {
00280     enum {
00281       Transpose = StorageOrder==RowMajor,
00282       ActualRows = Transpose ? MaxCols : MaxRows,
00283       ActualCols = Transpose ? MaxRows : MaxCols
00284     };
00285     typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
00286     typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
00287     typedef gebp_traits<LhsScalar,RhsScalar> Traits;
00288     enum {
00289       SizeA = ActualRows * MaxDepth,
00290       SizeB = ActualCols * MaxDepth
00291     };
00292 
00293 #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
00294     EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
00295     EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
00296 #else
00297     EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
00298     EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
00299 #endif
00300 
00301   public:
00302 
00303     gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
00304     {
00305       this->m_mc = ActualRows;
00306       this->m_nc = ActualCols;
00307       this->m_kc = MaxDepth;
00308 #if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
00309       this->m_blockA = m_staticA;
00310       this->m_blockB = m_staticB;
00311 #else
00312       this->m_blockA = reinterpret_cast<LhsScalar*>((std::size_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
00313       this->m_blockB = reinterpret_cast<RhsScalar*>((std::size_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
00314 #endif
00315     }
00316     
00317     void initParallel(Index, Index, Index, Index)
00318     {}
00319 
00320     inline void allocateA() {}
00321     inline void allocateB() {}
00322     inline void allocateAll() {}
00323 };
00324 
00325 template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
00326 class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
00327   : public level3_blocking<
00328       typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
00329       typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
00330 {
00331     enum {
00332       Transpose = StorageOrder==RowMajor
00333     };
00334     typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
00335     typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
00336     typedef gebp_traits<LhsScalar,RhsScalar> Traits;
00337 
00338     Index m_sizeA;
00339     Index m_sizeB;
00340 
00341   public:
00342 
00343     gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
00344     {
00345       this->m_mc = Transpose ? cols : rows;
00346       this->m_nc = Transpose ? rows : cols;
00347       this->m_kc = depth;
00348 
00349       if(l3_blocking)
00350       {
00351         computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
00352       }
00353       else  // no l3 blocking
00354       {
00355         Index n = this->m_nc;
00356         computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
00357       }
00358 
00359       m_sizeA = this->m_mc * this->m_kc;
00360       m_sizeB = this->m_kc * this->m_nc;
00361     }
00362     
00363     void initParallel(Index rows, Index cols, Index depth, Index num_threads)
00364     {
00365       this->m_mc = Transpose ? cols : rows;
00366       this->m_nc = Transpose ? rows : cols;
00367       this->m_kc = depth;
00368       
00369       eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);      
00370       Index m = this->m_mc;
00371       computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
00372       m_sizeA = this->m_mc * this->m_kc;
00373       m_sizeB = this->m_kc * this->m_nc;
00374     }
00375 
00376     void allocateA()
00377     {
00378       if(this->m_blockA==0)
00379         this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
00380     }
00381 
00382     void allocateB()
00383     {
00384       if(this->m_blockB==0)
00385         this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
00386     }
00387 
00388     void allocateAll()
00389     {
00390       allocateA();
00391       allocateB();
00392     }
00393 
00394     ~gemm_blocking_space()
00395     {
00396       aligned_delete(this->m_blockA, m_sizeA);
00397       aligned_delete(this->m_blockB, m_sizeB);
00398     }
00399 };
00400 
00401 } // end namespace internal
00402 
00403 namespace internal {
00404   
00405 template<typename Lhs, typename Rhs>
00406 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
00407   : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
00408 {
00409   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
00410   typedef typename Lhs::Scalar LhsScalar;
00411   typedef typename Rhs::Scalar RhsScalar;
00412   
00413   typedef internal::blas_traits<Lhs> LhsBlasTraits;
00414   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
00415   typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
00416   
00417   typedef internal::blas_traits<Rhs> RhsBlasTraits;
00418   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
00419   typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
00420   
00421   enum {
00422     MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
00423   };
00424   
00425   typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
00426   
00427   template<typename Dst>
00428   static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
00429   {
00430     if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
00431       lazyproduct::evalTo(dst, lhs, rhs);
00432     else
00433     {
00434       dst.setZero();
00435       scaleAndAddTo(dst, lhs, rhs, Scalar(1));
00436     }
00437   }
00438 
00439   template<typename Dst>
00440   static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
00441   {
00442     if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
00443       lazyproduct::addTo(dst, lhs, rhs);
00444     else
00445       scaleAndAddTo(dst,lhs, rhs, Scalar(1));
00446   }
00447 
00448   template<typename Dst>
00449   static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
00450   {
00451     if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
00452       lazyproduct::subTo(dst, lhs, rhs);
00453     else
00454       scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
00455   }
00456   
00457   template<typename Dest>
00458   static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
00459   {
00460     eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
00461     if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
00462       return;
00463 
00464     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
00465     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
00466 
00467     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
00468                                * RhsBlasTraits::extractScalarFactor(a_rhs);
00469 
00470     typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
00471             Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
00472 
00473     typedef internal::gemm_functor<
00474       Scalar, Index,
00475       internal::general_matrix_matrix_product<
00476         Index,
00477         LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
00478         RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
00479         (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor>,
00480       ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
00481 
00482     BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
00483     internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
00484                               (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit);
00485   }
00486 };
00487 
00488 } // end namespace internal
00489 
00490 } // end namespace Eigen
00491 
00492 #endif // EIGEN_GENERAL_MATRIX_MATRIX_H
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines