MOAB
4.9.3pre
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2011 Benoit Jacob <[email protected]> 00005 // Copyright (C) 2011-2014 Gael Guennebaud <[email protected]> 00006 // Copyright (C) 2011-2012 Jitse Niesen <[email protected]> 00007 // 00008 // This Source Code Form is subject to the terms of the Mozilla 00009 // Public License v. 2.0. If a copy of the MPL was not distributed 00010 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00011 00012 #ifndef EIGEN_ASSIGN_EVALUATOR_H 00013 #define EIGEN_ASSIGN_EVALUATOR_H 00014 00015 namespace Eigen { 00016 00017 // This implementation is based on Assign.h 00018 00019 namespace internal { 00020 00021 /*************************************************************************** 00022 * Part 1 : the logic deciding a strategy for traversal and unrolling * 00023 ***************************************************************************/ 00024 00025 // copy_using_evaluator_traits is based on assign_traits 00026 00027 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc> 00028 struct copy_using_evaluator_traits 00029 { 00030 typedef typename DstEvaluator::XprType Dst; 00031 typedef typename Dst::Scalar DstScalar; 00032 // TODO distinguish between linear traversal and inner-traversals 00033 typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType; 00034 00035 enum { 00036 DstFlags = DstEvaluator::Flags, 00037 SrcFlags = SrcEvaluator::Flags, 00038 RequiredAlignment = unpacket_traits<PacketType>::alignment 00039 }; 00040 00041 public: 00042 enum { 00043 DstAlignment = DstEvaluator::Alignment, 00044 SrcAlignment = SrcEvaluator::Alignment, 00045 DstHasDirectAccess = DstFlags & DirectAccessBit, 00046 JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) 00047 }; 00048 00049 private: 00050 enum { 00051 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) 00052 : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) 00053 : int(Dst::RowsAtCompileTime), 00054 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) 00055 : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) 00056 : int(Dst::MaxRowsAtCompileTime), 00057 OuterStride = int(outer_stride_at_compile_time<Dst>::ret), 00058 MaxSizeAtCompileTime = Dst::SizeAtCompileTime, 00059 PacketSize = unpacket_traits<PacketType>::size 00060 }; 00061 00062 enum { 00063 DstIsRowMajor = DstFlags&RowMajorBit, 00064 SrcIsRowMajor = SrcFlags&RowMajorBit, 00065 StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), 00066 MightVectorize = StorageOrdersAgree 00067 && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) 00068 && (functor_traits<AssignFunc>::PacketAccess), 00069 MayInnerVectorize = MightVectorize 00070 && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 00071 && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0 00072 && int(JointAlignment)>=int(RequiredAlignment), 00073 MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), 00074 MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess 00075 && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), 00076 /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, 00077 so it's only good for large enough sizes. */ 00078 MaySliceVectorize = MightVectorize && DstHasDirectAccess 00079 && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) 00080 /* slice vectorization can be slow, so we only want it if the slices are big, which is 00081 indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block 00082 in a fixed-size matrix */ 00083 }; 00084 00085 public: 00086 enum { 00087 Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) 00088 : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 00089 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 00090 : int(MayLinearize) ? int(LinearTraversal) 00091 : int(DefaultTraversal), 00092 Vectorized = int(Traversal) == InnerVectorizedTraversal 00093 || int(Traversal) == LinearVectorizedTraversal 00094 || int(Traversal) == SliceVectorizedTraversal 00095 }; 00096 00097 private: 00098 enum { 00099 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), 00100 MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic 00101 && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), 00102 MayUnrollInner = int(InnerSize) != Dynamic 00103 && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) 00104 }; 00105 00106 public: 00107 enum { 00108 Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) 00109 ? ( 00110 int(MayUnrollCompletely) ? int(CompleteUnrolling) 00111 : int(MayUnrollInner) ? int(InnerUnrolling) 00112 : int(NoUnrolling) 00113 ) 00114 : int(Traversal) == int(LinearVectorizedTraversal) 00115 ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling) 00116 : int(NoUnrolling) ) 00117 : int(Traversal) == int(LinearTraversal) 00118 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 00119 : int(NoUnrolling) ) 00120 : int(NoUnrolling) 00121 }; 00122 00123 #ifdef EIGEN_DEBUG_ASSIGN 00124 static void debug() 00125 { 00126 std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; 00127 std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; 00128 std::cerr.setf(std::ios::hex, std::ios::basefield); 00129 std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; 00130 std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; 00131 std::cerr.unsetf(std::ios::hex); 00132 EIGEN_DEBUG_VAR(DstAlignment) 00133 EIGEN_DEBUG_VAR(SrcAlignment) 00134 EIGEN_DEBUG_VAR(RequiredAlignment) 00135 EIGEN_DEBUG_VAR(JointAlignment) 00136 EIGEN_DEBUG_VAR(InnerSize) 00137 EIGEN_DEBUG_VAR(InnerMaxSize) 00138 EIGEN_DEBUG_VAR(PacketSize) 00139 EIGEN_DEBUG_VAR(StorageOrdersAgree) 00140 EIGEN_DEBUG_VAR(MightVectorize) 00141 EIGEN_DEBUG_VAR(MayLinearize) 00142 EIGEN_DEBUG_VAR(MayInnerVectorize) 00143 EIGEN_DEBUG_VAR(MayLinearVectorize) 00144 EIGEN_DEBUG_VAR(MaySliceVectorize) 00145 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; 00146 EIGEN_DEBUG_VAR(UnrollingLimit) 00147 EIGEN_DEBUG_VAR(MayUnrollCompletely) 00148 EIGEN_DEBUG_VAR(MayUnrollInner) 00149 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; 00150 std::cerr << std::endl; 00151 } 00152 #endif 00153 }; 00154 00155 /*************************************************************************** 00156 * Part 2 : meta-unrollers 00157 ***************************************************************************/ 00158 00159 /************************ 00160 *** Default traversal *** 00161 ************************/ 00162 00163 template<typename Kernel, int Index, int Stop> 00164 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling 00165 { 00166 // FIXME: this is not very clean, perhaps this information should be provided by the kernel? 00167 typedef typename Kernel::DstEvaluatorType DstEvaluatorType; 00168 typedef typename DstEvaluatorType::XprType DstXprType; 00169 00170 enum { 00171 outer = Index / DstXprType::InnerSizeAtCompileTime, 00172 inner = Index % DstXprType::InnerSizeAtCompileTime 00173 }; 00174 00175 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00176 { 00177 kernel.assignCoeffByOuterInner(outer, inner); 00178 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); 00179 } 00180 }; 00181 00182 template<typename Kernel, int Stop> 00183 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> 00184 { 00185 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 00186 }; 00187 00188 template<typename Kernel, int Index_, int Stop> 00189 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling 00190 { 00191 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) 00192 { 00193 kernel.assignCoeffByOuterInner(outer, Index_); 00194 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer); 00195 } 00196 }; 00197 00198 template<typename Kernel, int Stop> 00199 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> 00200 { 00201 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } 00202 }; 00203 00204 /*********************** 00205 *** Linear traversal *** 00206 ***********************/ 00207 00208 template<typename Kernel, int Index, int Stop> 00209 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling 00210 { 00211 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) 00212 { 00213 kernel.assignCoeff(Index); 00214 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); 00215 } 00216 }; 00217 00218 template<typename Kernel, int Stop> 00219 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> 00220 { 00221 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 00222 }; 00223 00224 /************************** 00225 *** Inner vectorization *** 00226 **************************/ 00227 00228 template<typename Kernel, int Index, int Stop> 00229 struct copy_using_evaluator_innervec_CompleteUnrolling 00230 { 00231 // FIXME: this is not very clean, perhaps this information should be provided by the kernel? 00232 typedef typename Kernel::DstEvaluatorType DstEvaluatorType; 00233 typedef typename DstEvaluatorType::XprType DstXprType; 00234 typedef typename Kernel::PacketType PacketType; 00235 00236 enum { 00237 outer = Index / DstXprType::InnerSizeAtCompileTime, 00238 inner = Index % DstXprType::InnerSizeAtCompileTime, 00239 JointAlignment = Kernel::AssignmentTraits::JointAlignment 00240 }; 00241 00242 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00243 { 00244 kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner); 00245 enum { NextIndex = Index + unpacket_traits<PacketType>::size }; 00246 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); 00247 } 00248 }; 00249 00250 template<typename Kernel, int Stop> 00251 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> 00252 { 00253 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 00254 }; 00255 00256 template<typename Kernel, int Index_, int Stop> 00257 struct copy_using_evaluator_innervec_InnerUnrolling 00258 { 00259 typedef typename Kernel::PacketType PacketType; 00260 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) 00261 { 00262 kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_); 00263 enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; 00264 copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer); 00265 } 00266 }; 00267 00268 template<typename Kernel, int Stop> 00269 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop> 00270 { 00271 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } 00272 }; 00273 00274 /*************************************************************************** 00275 * Part 3 : implementation of all cases 00276 ***************************************************************************/ 00277 00278 // dense_assignment_loop is based on assign_impl 00279 00280 template<typename Kernel, 00281 int Traversal = Kernel::AssignmentTraits::Traversal, 00282 int Unrolling = Kernel::AssignmentTraits::Unrolling> 00283 struct dense_assignment_loop; 00284 00285 /************************ 00286 *** Default traversal *** 00287 ************************/ 00288 00289 template<typename Kernel> 00290 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> 00291 { 00292 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) 00293 { 00294 for(Index outer = 0; outer < kernel.outerSize(); ++outer) { 00295 for(Index inner = 0; inner < kernel.innerSize(); ++inner) { 00296 kernel.assignCoeffByOuterInner(outer, inner); 00297 } 00298 } 00299 } 00300 }; 00301 00302 template<typename Kernel> 00303 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> 00304 { 00305 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00306 { 00307 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00308 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 00309 } 00310 }; 00311 00312 template<typename Kernel> 00313 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> 00314 { 00315 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00316 { 00317 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00318 00319 const Index outerSize = kernel.outerSize(); 00320 for(Index outer = 0; outer < outerSize; ++outer) 00321 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer); 00322 } 00323 }; 00324 00325 /*************************** 00326 *** Linear vectorization *** 00327 ***************************/ 00328 00329 00330 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling 00331 // of the non vectorizable beginning and ending parts 00332 00333 template <bool IsAligned = false> 00334 struct unaligned_dense_assignment_loop 00335 { 00336 // if IsAligned = true, then do nothing 00337 template <typename Kernel> 00338 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} 00339 }; 00340 00341 template <> 00342 struct unaligned_dense_assignment_loop<false> 00343 { 00344 // MSVC must not inline this functions. If it does, it fails to optimize the 00345 // packet access path. 00346 // FIXME check which version exhibits this issue 00347 #if EIGEN_COMP_MSVC 00348 template <typename Kernel> 00349 static EIGEN_DONT_INLINE void run(Kernel &kernel, 00350 Index start, 00351 Index end) 00352 #else 00353 template <typename Kernel> 00354 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, 00355 Index start, 00356 Index end) 00357 #endif 00358 { 00359 for (Index index = start; index < end; ++index) 00360 kernel.assignCoeff(index); 00361 } 00362 }; 00363 00364 template<typename Kernel> 00365 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> 00366 { 00367 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00368 { 00369 const Index size = kernel.size(); 00370 typedef typename Kernel::Scalar Scalar; 00371 typedef typename Kernel::PacketType PacketType; 00372 enum { 00373 requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment, 00374 packetSize = unpacket_traits<PacketType>::size, 00375 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), 00376 dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment) 00377 : int(Kernel::AssignmentTraits::DstAlignment), 00378 srcAlignment = Kernel::AssignmentTraits::JointAlignment 00379 }; 00380 const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size); 00381 const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; 00382 00383 unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart); 00384 00385 for(Index index = alignedStart; index < alignedEnd; index += packetSize) 00386 kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index); 00387 00388 unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); 00389 } 00390 }; 00391 00392 template<typename Kernel> 00393 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> 00394 { 00395 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00396 { 00397 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00398 00399 enum { size = DstXprType::SizeAtCompileTime, 00400 packetSize = packet_traits<typename Kernel::Scalar>::size, 00401 alignedSize = (size/packetSize)*packetSize }; 00402 00403 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel); 00404 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel); 00405 } 00406 }; 00407 00408 /************************** 00409 *** Inner vectorization *** 00410 **************************/ 00411 00412 template<typename Kernel> 00413 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> 00414 { 00415 typedef typename Kernel::PacketType PacketType; 00416 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00417 { 00418 const Index innerSize = kernel.innerSize(); 00419 const Index outerSize = kernel.outerSize(); 00420 const Index packetSize = unpacket_traits<PacketType>::size; 00421 for(Index outer = 0; outer < outerSize; ++outer) 00422 for(Index inner = 0; inner < innerSize; inner+=packetSize) 00423 kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner); 00424 } 00425 }; 00426 00427 template<typename Kernel> 00428 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> 00429 { 00430 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00431 { 00432 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00433 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 00434 } 00435 }; 00436 00437 template<typename Kernel> 00438 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> 00439 { 00440 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00441 { 00442 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00443 const Index outerSize = kernel.outerSize(); 00444 for(Index outer = 0; outer < outerSize; ++outer) 00445 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer); 00446 } 00447 }; 00448 00449 /*********************** 00450 *** Linear traversal *** 00451 ***********************/ 00452 00453 template<typename Kernel> 00454 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> 00455 { 00456 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00457 { 00458 const Index size = kernel.size(); 00459 for(Index i = 0; i < size; ++i) 00460 kernel.assignCoeff(i); 00461 } 00462 }; 00463 00464 template<typename Kernel> 00465 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> 00466 { 00467 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00468 { 00469 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00470 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 00471 } 00472 }; 00473 00474 /************************** 00475 *** Slice vectorization *** 00476 ***************************/ 00477 00478 template<typename Kernel> 00479 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> 00480 { 00481 EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) 00482 { 00483 typedef typename Kernel::Scalar Scalar; 00484 typedef typename Kernel::PacketType PacketType; 00485 enum { 00486 packetSize = unpacket_traits<PacketType>::size, 00487 requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment), 00488 alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), 00489 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), 00490 dstAlignment = alignable ? int(requestedAlignment) 00491 : int(Kernel::AssignmentTraits::DstAlignment) 00492 }; 00493 const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0); 00494 if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) 00495 { 00496 // the pointer is not aligend-on scalar, so alignment is not possible 00497 return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel); 00498 } 00499 const Index packetAlignedMask = packetSize - 1; 00500 const Index innerSize = kernel.innerSize(); 00501 const Index outerSize = kernel.outerSize(); 00502 const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; 00503 Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize); 00504 00505 for(Index outer = 0; outer < outerSize; ++outer) 00506 { 00507 const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); 00508 // do the non-vectorizable part of the assignment 00509 for(Index inner = 0; inner<alignedStart ; ++inner) 00510 kernel.assignCoeffByOuterInner(outer, inner); 00511 00512 // do the vectorizable part of the assignment 00513 for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) 00514 kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner); 00515 00516 // do the non-vectorizable part of the assignment 00517 for(Index inner = alignedEnd; inner<innerSize ; ++inner) 00518 kernel.assignCoeffByOuterInner(outer, inner); 00519 00520 alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize); 00521 } 00522 } 00523 }; 00524 00525 /*************************************************************************** 00526 * Part 4 : Generic dense assignment kernel 00527 ***************************************************************************/ 00528 00529 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator 00530 // to another dense writable evaluator. 00531 // It is parametrized by the two evaluators, and the actual assignment functor. 00532 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible. 00533 // One can customize the assignment using this generic dense_assignment_kernel with different 00534 // functors, or by completely overloading it, by-passing a functor. 00535 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized> 00536 class generic_dense_assignment_kernel 00537 { 00538 protected: 00539 typedef typename DstEvaluatorTypeT::XprType DstXprType; 00540 typedef typename SrcEvaluatorTypeT::XprType SrcXprType; 00541 public: 00542 00543 typedef DstEvaluatorTypeT DstEvaluatorType; 00544 typedef SrcEvaluatorTypeT SrcEvaluatorType; 00545 typedef typename DstEvaluatorType::Scalar Scalar; 00546 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits; 00547 typedef typename AssignmentTraits::PacketType PacketType; 00548 00549 00550 EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) 00551 : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) 00552 { 00553 #ifdef EIGEN_DEBUG_ASSIGN 00554 AssignmentTraits::debug(); 00555 #endif 00556 } 00557 00558 EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } 00559 EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } 00560 EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } 00561 EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } 00562 EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } 00563 EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } 00564 00565 EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } 00566 EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } 00567 00569 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) 00570 { 00571 m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); 00572 } 00573 00575 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) 00576 { 00577 m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); 00578 } 00579 00581 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) 00582 { 00583 Index row = rowIndexByOuterInner(outer, inner); 00584 Index col = colIndexByOuterInner(outer, inner); 00585 assignCoeff(row, col); 00586 } 00587 00588 00589 template<int StoreMode, int LoadMode, typename PacketType> 00590 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) 00591 { 00592 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col)); 00593 } 00594 00595 template<int StoreMode, int LoadMode, typename PacketType> 00596 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) 00597 { 00598 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index)); 00599 } 00600 00601 template<int StoreMode, int LoadMode, typename PacketType> 00602 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) 00603 { 00604 Index row = rowIndexByOuterInner(outer, inner); 00605 Index col = colIndexByOuterInner(outer, inner); 00606 assignPacket<StoreMode,LoadMode,PacketType>(row, col); 00607 } 00608 00609 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) 00610 { 00611 typedef typename DstEvaluatorType::ExpressionTraits Traits; 00612 return int(Traits::RowsAtCompileTime) == 1 ? 0 00613 : int(Traits::ColsAtCompileTime) == 1 ? inner 00614 : int(DstEvaluatorType::Flags)&RowMajorBit ? outer 00615 : inner; 00616 } 00617 00618 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) 00619 { 00620 typedef typename DstEvaluatorType::ExpressionTraits Traits; 00621 return int(Traits::ColsAtCompileTime) == 1 ? 0 00622 : int(Traits::RowsAtCompileTime) == 1 ? inner 00623 : int(DstEvaluatorType::Flags)&RowMajorBit ? inner 00624 : outer; 00625 } 00626 00627 protected: 00628 DstEvaluatorType& m_dst; 00629 const SrcEvaluatorType& m_src; 00630 const Functor &m_functor; 00631 // TODO find a way to avoid the needs of the original expression 00632 DstXprType& m_dstExpr; 00633 }; 00634 00635 /*************************************************************************** 00636 * Part 5 : Entry point for dense rectangular assignment 00637 ***************************************************************************/ 00638 00639 template<typename DstXprType, typename SrcXprType, typename Functor> 00640 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) 00641 { 00642 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00643 00644 typedef evaluator<DstXprType> DstEvaluatorType; 00645 typedef evaluator<SrcXprType> SrcEvaluatorType; 00646 00647 DstEvaluatorType dstEvaluator(dst); 00648 SrcEvaluatorType srcEvaluator(src); 00649 00650 typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; 00651 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); 00652 00653 dense_assignment_loop<Kernel>::run(kernel); 00654 } 00655 00656 template<typename DstXprType, typename SrcXprType> 00657 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) 00658 { 00659 call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); 00660 } 00661 00662 /*************************************************************************** 00663 * Part 6 : Generic assignment 00664 ***************************************************************************/ 00665 00666 // Based on the respective shapes of the destination and source, 00667 // the class AssignmentKind determine the kind of assignment mechanism. 00668 // AssignmentKind must define a Kind typedef. 00669 template<typename DstShape, typename SrcShape> struct AssignmentKind; 00670 00671 // Assignement kind defined in this file: 00672 struct Dense2Dense {}; 00673 struct EigenBase2EigenBase {}; 00674 00675 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; 00676 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; }; 00677 00678 // This is the main assignment class 00679 template< typename DstXprType, typename SrcXprType, typename Functor, 00680 typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind, 00681 typename Scalar = typename DstXprType::Scalar> 00682 struct Assignment; 00683 00684 00685 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. 00686 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. 00687 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment 00688 // does not has to bother about these annoying details. 00689 00690 template<typename Dst, typename Src> 00691 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00692 void call_assignment(Dst& dst, const Src& src) 00693 { 00694 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); 00695 } 00696 template<typename Dst, typename Src> 00697 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00698 void call_assignment(const Dst& dst, const Src& src) 00699 { 00700 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); 00701 } 00702 00703 // Deal with "assume-aliasing" 00704 template<typename Dst, typename Src, typename Func> 00705 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00706 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0) 00707 { 00708 typename plain_matrix_type<Src>::type tmp(src); 00709 call_assignment_no_alias(dst, tmp, func); 00710 } 00711 00712 template<typename Dst, typename Src, typename Func> 00713 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00714 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0) 00715 { 00716 call_assignment_no_alias(dst, src, func); 00717 } 00718 00719 // by-pass "assume-aliasing" 00720 // When there is no aliasing, we require that 'dst' has been properly resized 00721 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> 00722 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00723 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) 00724 { 00725 call_assignment_no_alias(dst.expression(), src, func); 00726 } 00727 00728 00729 template<typename Dst, typename Src, typename Func> 00730 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00731 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) 00732 { 00733 enum { 00734 NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) 00735 || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) 00736 ) && int(Dst::SizeAtCompileTime) != 1 00737 }; 00738 00739 Index dstRows = NeedToTranspose ? src.cols() : src.rows(); 00740 Index dstCols = NeedToTranspose ? src.rows() : src.cols(); 00741 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 00742 dst.resize(dstRows, dstCols); 00743 00744 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned; 00745 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType; 00746 ActualDstType actualDst(dst); 00747 00748 // TODO check whether this is the right place to perform these checks: 00749 EIGEN_STATIC_ASSERT_LVALUE(Dst) 00750 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) 00751 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); 00752 00753 Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); 00754 } 00755 template<typename Dst, typename Src> 00756 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00757 void call_assignment_no_alias(Dst& dst, const Src& src) 00758 { 00759 call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>()); 00760 } 00761 00762 template<typename Dst, typename Src, typename Func> 00763 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00764 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) 00765 { 00766 Index dstRows = src.rows(); 00767 Index dstCols = src.cols(); 00768 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 00769 dst.resize(dstRows, dstCols); 00770 00771 // TODO check whether this is the right place to perform these checks: 00772 EIGEN_STATIC_ASSERT_LVALUE(Dst) 00773 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) 00774 00775 Assignment<Dst,Src,Func>::run(dst, src, func); 00776 } 00777 template<typename Dst, typename Src> 00778 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00779 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) 00780 { 00781 call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>()); 00782 } 00783 00784 // forward declaration 00785 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src); 00786 00787 // Generic Dense to Dense assignment 00788 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> 00789 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> 00790 { 00791 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00792 static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) 00793 { 00794 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00795 00796 #ifndef EIGEN_NO_DEBUG 00797 internal::check_for_aliasing(dst, src); 00798 #endif 00799 00800 call_dense_assignment_loop(dst, src, func); 00801 } 00802 }; 00803 00804 // Generic assignment through evalTo. 00805 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. 00806 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> 00807 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar> 00808 { 00809 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00810 static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/) 00811 { 00812 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00813 src.evalTo(dst); 00814 } 00815 }; 00816 00817 } // namespace internal 00818 00819 } // end namespace Eigen 00820 00821 #endif // EIGEN_ASSIGN_EVALUATOR_H