MOAB
4.9.3pre
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2006-2008 Benoit Jacob <[email protected]> 00005 // Copyright (C) 2008-2011 Gael Guennebaud <[email protected]> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_GENERAL_PRODUCT_H 00012 #define EIGEN_GENERAL_PRODUCT_H 00013 00014 namespace Eigen { 00015 00016 enum { 00017 Large = 2, 00018 Small = 3 00019 }; 00020 00021 namespace internal { 00022 00023 template<int Rows, int Cols, int Depth> struct product_type_selector; 00024 00025 template<int Size, int MaxSize> struct product_size_category 00026 { 00027 enum { is_large = MaxSize == Dynamic || 00028 Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD, 00029 value = is_large ? Large 00030 : Size == 1 ? 1 00031 : Small 00032 }; 00033 }; 00034 00035 template<typename Lhs, typename Rhs> struct product_type 00036 { 00037 typedef typename remove_all<Lhs>::type _Lhs; 00038 typedef typename remove_all<Rhs>::type _Rhs; 00039 enum { 00040 MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, 00041 Rows = traits<_Lhs>::RowsAtCompileTime, 00042 MaxCols = traits<_Rhs>::MaxColsAtCompileTime, 00043 Cols = traits<_Rhs>::ColsAtCompileTime, 00044 MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, 00045 traits<_Rhs>::MaxRowsAtCompileTime), 00046 Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, 00047 traits<_Rhs>::RowsAtCompileTime) 00048 }; 00049 00050 // the splitting into different lines of code here, introducing the _select enums and the typedef below, 00051 // is to work around an internal compiler error with gcc 4.1 and 4.2. 00052 private: 00053 enum { 00054 rows_select = product_size_category<Rows,MaxRows>::value, 00055 cols_select = product_size_category<Cols,MaxCols>::value, 00056 depth_select = product_size_category<Depth,MaxDepth>::value 00057 }; 00058 typedef product_type_selector<rows_select, cols_select, depth_select> selector; 00059 00060 public: 00061 enum { 00062 value = selector::ret, 00063 ret = selector::ret 00064 }; 00065 #ifdef EIGEN_DEBUG_PRODUCT 00066 static void debug() 00067 { 00068 EIGEN_DEBUG_VAR(Rows); 00069 EIGEN_DEBUG_VAR(Cols); 00070 EIGEN_DEBUG_VAR(Depth); 00071 EIGEN_DEBUG_VAR(rows_select); 00072 EIGEN_DEBUG_VAR(cols_select); 00073 EIGEN_DEBUG_VAR(depth_select); 00074 EIGEN_DEBUG_VAR(value); 00075 } 00076 #endif 00077 }; 00078 00079 /* The following allows to select the kind of product at compile time 00080 * based on the three dimensions of the product. 00081 * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ 00082 // FIXME I'm not sure the current mapping is the ideal one. 00083 template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; }; 00084 template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; 00085 template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; 00086 template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; }; 00087 template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; }; 00088 template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; }; 00089 template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; }; 00090 template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; }; 00091 template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; }; 00092 template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; }; 00093 template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; 00094 template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; }; 00095 template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; }; 00096 template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; }; 00097 template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; }; 00098 template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; }; 00099 template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; }; 00100 template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; }; 00101 template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; }; 00102 template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; }; 00103 template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; }; 00104 template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; }; 00105 00106 } // end namespace internal 00107 00108 /*********************************************************************** 00109 * Implementation of Inner Vector Vector Product 00110 ***********************************************************************/ 00111 00112 // FIXME : maybe the "inner product" could return a Scalar 00113 // instead of a 1x1 matrix ?? 00114 // Pro: more natural for the user 00115 // Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix 00116 // product ends up to a row-vector times col-vector product... To tackle this use 00117 // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x); 00118 00119 /*********************************************************************** 00120 * Implementation of Outer Vector Vector Product 00121 ***********************************************************************/ 00122 00123 /*********************************************************************** 00124 * Implementation of General Matrix Vector Product 00125 ***********************************************************************/ 00126 00127 /* According to the shape/flags of the matrix we have to distinghish 3 different cases: 00128 * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine 00129 * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine 00130 * 3 - all other cases are handled using a simple loop along the outer-storage direction. 00131 * Therefore we need a lower level meta selector. 00132 * Furthermore, if the matrix is the rhs, then the product has to be transposed. 00133 */ 00134 namespace internal { 00135 00136 template<int Side, int StorageOrder, bool BlasCompatible> 00137 struct gemv_dense_selector; 00138 00139 } // end namespace internal 00140 00141 namespace internal { 00142 00143 template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if; 00144 00145 template<typename Scalar,int Size,int MaxSize> 00146 struct gemv_static_vector_if<Scalar,Size,MaxSize,false> 00147 { 00148 EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } 00149 }; 00150 00151 template<typename Scalar,int Size> 00152 struct gemv_static_vector_if<Scalar,Size,Dynamic,true> 00153 { 00154 EIGEN_STRONG_INLINE Scalar* data() { return 0; } 00155 }; 00156 00157 template<typename Scalar,int Size,int MaxSize> 00158 struct gemv_static_vector_if<Scalar,Size,MaxSize,true> 00159 { 00160 #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 00161 internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data; 00162 EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } 00163 #else 00164 // Some architectures cannot align on the stack, 00165 // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. 00166 enum { 00167 ForceAlignment = internal::packet_traits<Scalar>::Vectorizable, 00168 PacketSize = internal::packet_traits<Scalar>::size 00169 }; 00170 internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data; 00171 EIGEN_STRONG_INLINE Scalar* data() { 00172 return ForceAlignment 00173 ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) 00174 : m_data.array; 00175 } 00176 #endif 00177 }; 00178 00179 // The vector is on the left => transposition 00180 template<int StorageOrder, bool BlasCompatible> 00181 struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible> 00182 { 00183 template<typename Lhs, typename Rhs, typename Dest> 00184 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) 00185 { 00186 Transpose<Dest> destT(dest); 00187 enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; 00188 gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible> 00189 ::run(rhs.transpose(), lhs.transpose(), destT, alpha); 00190 } 00191 }; 00192 00193 template<> struct gemv_dense_selector<OnTheRight,ColMajor,true> 00194 { 00195 template<typename Lhs, typename Rhs, typename Dest> 00196 static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) 00197 { 00198 typedef typename Lhs::Scalar LhsScalar; 00199 typedef typename Rhs::Scalar RhsScalar; 00200 typedef typename Dest::Scalar ResScalar; 00201 typedef typename Dest::RealScalar RealScalar; 00202 00203 typedef internal::blas_traits<Lhs> LhsBlasTraits; 00204 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; 00205 typedef internal::blas_traits<Rhs> RhsBlasTraits; 00206 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; 00207 00208 typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; 00209 00210 ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); 00211 ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); 00212 00213 ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) 00214 * RhsBlasTraits::extractScalarFactor(rhs); 00215 00216 // make sure Dest is a compile-time vector type (bug 1166) 00217 typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest; 00218 00219 enum { 00220 // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 00221 // on, the other hand it is good for the cache to pack the vector anyways... 00222 EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), 00223 ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex), 00224 MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal 00225 }; 00226 00227 gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest; 00228 00229 const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); 00230 const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; 00231 00232 RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha); 00233 00234 ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), 00235 evalToDest ? dest.data() : static_dest.data()); 00236 00237 if(!evalToDest) 00238 { 00239 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN 00240 Index size = dest.size(); 00241 EIGEN_DENSE_STORAGE_CTOR_PLUGIN 00242 #endif 00243 if(!alphaIsCompatible) 00244 { 00245 MappedDest(actualDestPtr, dest.size()).setZero(); 00246 compatibleAlpha = RhsScalar(1); 00247 } 00248 else 00249 MappedDest(actualDestPtr, dest.size()) = dest; 00250 } 00251 00252 typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper; 00253 typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper; 00254 general_matrix_vector_product 00255 <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run( 00256 actualLhs.rows(), actualLhs.cols(), 00257 LhsMapper(actualLhs.data(), actualLhs.outerStride()), 00258 RhsMapper(actualRhs.data(), actualRhs.innerStride()), 00259 actualDestPtr, 1, 00260 compatibleAlpha); 00261 00262 if (!evalToDest) 00263 { 00264 if(!alphaIsCompatible) 00265 dest += actualAlpha * MappedDest(actualDestPtr, dest.size()); 00266 else 00267 dest = MappedDest(actualDestPtr, dest.size()); 00268 } 00269 } 00270 }; 00271 00272 template<> struct gemv_dense_selector<OnTheRight,RowMajor,true> 00273 { 00274 template<typename Lhs, typename Rhs, typename Dest> 00275 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) 00276 { 00277 typedef typename Lhs::Scalar LhsScalar; 00278 typedef typename Rhs::Scalar RhsScalar; 00279 typedef typename Dest::Scalar ResScalar; 00280 00281 typedef internal::blas_traits<Lhs> LhsBlasTraits; 00282 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; 00283 typedef internal::blas_traits<Rhs> RhsBlasTraits; 00284 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; 00285 typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; 00286 00287 typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); 00288 typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); 00289 00290 ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) 00291 * RhsBlasTraits::extractScalarFactor(rhs); 00292 00293 enum { 00294 // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 00295 // on, the other hand it is good for the cache to pack the vector anyways... 00296 DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 00297 }; 00298 00299 gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; 00300 00301 ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), 00302 DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data()); 00303 00304 if(!DirectlyUseRhs) 00305 { 00306 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN 00307 Index size = actualRhs.size(); 00308 EIGEN_DENSE_STORAGE_CTOR_PLUGIN 00309 #endif 00310 Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; 00311 } 00312 00313 typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper; 00314 typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper; 00315 general_matrix_vector_product 00316 <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run( 00317 actualLhs.rows(), actualLhs.cols(), 00318 LhsMapper(actualLhs.data(), actualLhs.outerStride()), 00319 RhsMapper(actualRhsPtr, 1), 00320 dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166) 00321 actualAlpha); 00322 } 00323 }; 00324 00325 template<> struct gemv_dense_selector<OnTheRight,ColMajor,false> 00326 { 00327 template<typename Lhs, typename Rhs, typename Dest> 00328 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) 00329 { 00330 // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp 00331 typename nested_eval<Rhs,1>::type actual_rhs(rhs); 00332 const Index size = rhs.rows(); 00333 for(Index k=0; k<size; ++k) 00334 dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k); 00335 } 00336 }; 00337 00338 template<> struct gemv_dense_selector<OnTheRight,RowMajor,false> 00339 { 00340 template<typename Lhs, typename Rhs, typename Dest> 00341 static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) 00342 { 00343 typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs); 00344 const Index rows = dest.rows(); 00345 for(Index i=0; i<rows; ++i) 00346 dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum(); 00347 } 00348 }; 00349 00350 } // end namespace internal 00351 00352 /*************************************************************************** 00353 * Implementation of matrix base methods 00354 ***************************************************************************/ 00355 00362 #ifndef __CUDACC__ 00363 00364 template<typename Derived> 00365 template<typename OtherDerived> 00366 inline const Product<Derived, OtherDerived> 00367 MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const 00368 { 00369 // A note regarding the function declaration: In MSVC, this function will sometimes 00370 // not be inlined since DenseStorage is an unwindable object for dynamic 00371 // matrices and product types are holding a member to store the result. 00372 // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. 00373 enum { 00374 ProductIsValid = Derived::ColsAtCompileTime==Dynamic 00375 || OtherDerived::RowsAtCompileTime==Dynamic 00376 || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), 00377 AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, 00378 SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) 00379 }; 00380 // note to the lost user: 00381 // * for a dot product use: v1.dot(v2) 00382 // * for a coeff-wise product use: v1.cwiseProduct(v2) 00383 EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), 00384 INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) 00385 EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), 00386 INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) 00387 EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) 00388 #ifdef EIGEN_DEBUG_PRODUCT 00389 internal::product_type<Derived,OtherDerived>::debug(); 00390 #endif 00391 00392 return Product<Derived, OtherDerived>(derived(), other.derived()); 00393 } 00394 00395 #endif // __CUDACC__ 00396 00408 template<typename Derived> 00409 template<typename OtherDerived> 00410 const Product<Derived,OtherDerived,LazyProduct> 00411 MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const 00412 { 00413 enum { 00414 ProductIsValid = Derived::ColsAtCompileTime==Dynamic 00415 || OtherDerived::RowsAtCompileTime==Dynamic 00416 || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), 00417 AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, 00418 SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) 00419 }; 00420 // note to the lost user: 00421 // * for a dot product use: v1.dot(v2) 00422 // * for a coeff-wise product use: v1.cwiseProduct(v2) 00423 EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), 00424 INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) 00425 EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), 00426 INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) 00427 EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) 00428 00429 return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived()); 00430 } 00431 00432 } // end namespace Eigen 00433 00434 #endif // EIGEN_PRODUCT_H