MOAB  4.9.3pre
AssignEvaluator.h
Go to the documentation of this file.
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2011 Benoit Jacob <[email protected]>
00005 // Copyright (C) 2011-2014 Gael Guennebaud <[email protected]>
00006 // Copyright (C) 2011-2012 Jitse Niesen <[email protected]>
00007 //
00008 // This Source Code Form is subject to the terms of the Mozilla
00009 // Public License v. 2.0. If a copy of the MPL was not distributed
00010 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00011 
00012 #ifndef EIGEN_ASSIGN_EVALUATOR_H
00013 #define EIGEN_ASSIGN_EVALUATOR_H
00014 
00015 namespace Eigen {
00016 
00017 // This implementation is based on Assign.h
00018 
00019 namespace internal {
00020   
00021 /***************************************************************************
00022 * Part 1 : the logic deciding a strategy for traversal and unrolling       *
00023 ***************************************************************************/
00024 
00025 // copy_using_evaluator_traits is based on assign_traits
00026 
00027 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
00028 struct copy_using_evaluator_traits
00029 {
00030   typedef typename DstEvaluator::XprType Dst;
00031   typedef typename Dst::Scalar DstScalar;
00032   // TODO distinguish between linear traversal and inner-traversals
00033   typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType; 
00034   
00035   enum {
00036     DstFlags = DstEvaluator::Flags,
00037     SrcFlags = SrcEvaluator::Flags,
00038     RequiredAlignment = unpacket_traits<PacketType>::alignment
00039   };
00040   
00041 public:
00042   enum {
00043     DstAlignment = DstEvaluator::Alignment,
00044     SrcAlignment = SrcEvaluator::Alignment,
00045     DstHasDirectAccess = DstFlags & DirectAccessBit,
00046     JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
00047   };
00048 
00049 private:
00050   enum {
00051     InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
00052               : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
00053               : int(Dst::RowsAtCompileTime),
00054     InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
00055               : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
00056               : int(Dst::MaxRowsAtCompileTime),
00057     OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
00058     MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
00059     PacketSize = unpacket_traits<PacketType>::size
00060   };
00061 
00062   enum {
00063     DstIsRowMajor = DstFlags&RowMajorBit,
00064     SrcIsRowMajor = SrcFlags&RowMajorBit,
00065     StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
00066     MightVectorize = StorageOrdersAgree
00067                   && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
00068                   && (functor_traits<AssignFunc>::PacketAccess),
00069     MayInnerVectorize  = MightVectorize
00070                        && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
00071                        && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0
00072                        && int(JointAlignment)>=int(RequiredAlignment),
00073     MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
00074     MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
00075                        && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
00076       /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
00077          so it's only good for large enough sizes. */
00078     MaySliceVectorize  = MightVectorize && DstHasDirectAccess
00079                        && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
00080       /* slice vectorization can be slow, so we only want it if the slices are big, which is
00081          indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
00082          in a fixed-size matrix */
00083   };
00084 
00085 public:
00086   enum {
00087     Traversal = int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
00088               : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)
00089               : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)
00090               : int(MayLinearize)        ? int(LinearTraversal)
00091                                          : int(DefaultTraversal),
00092     Vectorized = int(Traversal) == InnerVectorizedTraversal
00093               || int(Traversal) == LinearVectorizedTraversal
00094               || int(Traversal) == SliceVectorizedTraversal
00095   };
00096 
00097 private:
00098   enum {
00099     UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
00100     MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
00101                        && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
00102     MayUnrollInner      = int(InnerSize) != Dynamic
00103                        && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
00104   };
00105 
00106 public:
00107   enum {
00108     Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
00109                 ? (
00110                     int(MayUnrollCompletely) ? int(CompleteUnrolling)
00111                   : int(MayUnrollInner)      ? int(InnerUnrolling)
00112                                              : int(NoUnrolling)
00113                   )
00114               : int(Traversal) == int(LinearVectorizedTraversal)
00115                 ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
00116                                                                                              : int(NoUnrolling) )
00117               : int(Traversal) == int(LinearTraversal)
00118                 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
00119                                               : int(NoUnrolling) )
00120               : int(NoUnrolling)
00121   };
00122 
00123 #ifdef EIGEN_DEBUG_ASSIGN
00124   static void debug()
00125   {
00126     std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
00127     std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
00128     std::cerr.setf(std::ios::hex, std::ios::basefield);
00129     std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
00130     std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
00131     std::cerr.unsetf(std::ios::hex);
00132     EIGEN_DEBUG_VAR(DstAlignment)
00133     EIGEN_DEBUG_VAR(SrcAlignment)
00134     EIGEN_DEBUG_VAR(RequiredAlignment)
00135     EIGEN_DEBUG_VAR(JointAlignment)
00136     EIGEN_DEBUG_VAR(InnerSize)
00137     EIGEN_DEBUG_VAR(InnerMaxSize)
00138     EIGEN_DEBUG_VAR(PacketSize)
00139     EIGEN_DEBUG_VAR(StorageOrdersAgree)
00140     EIGEN_DEBUG_VAR(MightVectorize)
00141     EIGEN_DEBUG_VAR(MayLinearize)
00142     EIGEN_DEBUG_VAR(MayInnerVectorize)
00143     EIGEN_DEBUG_VAR(MayLinearVectorize)
00144     EIGEN_DEBUG_VAR(MaySliceVectorize)
00145     std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
00146     EIGEN_DEBUG_VAR(UnrollingLimit)
00147     EIGEN_DEBUG_VAR(MayUnrollCompletely)
00148     EIGEN_DEBUG_VAR(MayUnrollInner)
00149     std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
00150     std::cerr << std::endl;
00151   }
00152 #endif
00153 };
00154 
00155 /***************************************************************************
00156 * Part 2 : meta-unrollers
00157 ***************************************************************************/
00158 
00159 /************************
00160 *** Default traversal ***
00161 ************************/
00162 
00163 template<typename Kernel, int Index, int Stop>
00164 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
00165 {
00166   // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
00167   typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
00168   typedef typename DstEvaluatorType::XprType DstXprType;
00169   
00170   enum {
00171     outer = Index / DstXprType::InnerSizeAtCompileTime,
00172     inner = Index % DstXprType::InnerSizeAtCompileTime
00173   };
00174 
00175   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00176   {
00177     kernel.assignCoeffByOuterInner(outer, inner);
00178     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
00179   }
00180 };
00181 
00182 template<typename Kernel, int Stop>
00183 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
00184 {
00185   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
00186 };
00187 
00188 template<typename Kernel, int Index_, int Stop>
00189 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
00190 {
00191   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
00192   {
00193     kernel.assignCoeffByOuterInner(outer, Index_);
00194     copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
00195   }
00196 };
00197 
00198 template<typename Kernel, int Stop>
00199 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
00200 {
00201   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
00202 };
00203 
00204 /***********************
00205 *** Linear traversal ***
00206 ***********************/
00207 
00208 template<typename Kernel, int Index, int Stop>
00209 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
00210 {
00211   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
00212   {
00213     kernel.assignCoeff(Index);
00214     copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
00215   }
00216 };
00217 
00218 template<typename Kernel, int Stop>
00219 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
00220 {
00221   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
00222 };
00223 
00224 /**************************
00225 *** Inner vectorization ***
00226 **************************/
00227 
00228 template<typename Kernel, int Index, int Stop>
00229 struct copy_using_evaluator_innervec_CompleteUnrolling
00230 {
00231   // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
00232   typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
00233   typedef typename DstEvaluatorType::XprType DstXprType;
00234   typedef typename Kernel::PacketType PacketType;
00235   
00236   enum {
00237     outer = Index / DstXprType::InnerSizeAtCompileTime,
00238     inner = Index % DstXprType::InnerSizeAtCompileTime,
00239     JointAlignment = Kernel::AssignmentTraits::JointAlignment
00240   };
00241 
00242   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00243   {
00244     kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
00245     enum { NextIndex = Index + unpacket_traits<PacketType>::size };
00246     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
00247   }
00248 };
00249 
00250 template<typename Kernel, int Stop>
00251 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
00252 {
00253   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
00254 };
00255 
00256 template<typename Kernel, int Index_, int Stop>
00257 struct copy_using_evaluator_innervec_InnerUnrolling
00258 {
00259   typedef typename Kernel::PacketType PacketType;
00260   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
00261   {
00262     kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
00263     enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
00264     copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
00265   }
00266 };
00267 
00268 template<typename Kernel, int Stop>
00269 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
00270 {
00271   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
00272 };
00273 
00274 /***************************************************************************
00275 * Part 3 : implementation of all cases
00276 ***************************************************************************/
00277 
00278 // dense_assignment_loop is based on assign_impl
00279 
00280 template<typename Kernel,
00281          int Traversal = Kernel::AssignmentTraits::Traversal,
00282          int Unrolling = Kernel::AssignmentTraits::Unrolling>
00283 struct dense_assignment_loop;
00284 
00285 /************************
00286 *** Default traversal ***
00287 ************************/
00288 
00289 template<typename Kernel>
00290 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
00291 {
00292   EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
00293   {
00294     for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
00295       for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
00296         kernel.assignCoeffByOuterInner(outer, inner);
00297       }
00298     }
00299   }
00300 };
00301 
00302 template<typename Kernel>
00303 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
00304 {
00305   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00306   {
00307     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00308     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
00309   }
00310 };
00311 
00312 template<typename Kernel>
00313 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
00314 {
00315   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00316   {
00317     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00318 
00319     const Index outerSize = kernel.outerSize();
00320     for(Index outer = 0; outer < outerSize; ++outer)
00321       copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
00322   }
00323 };
00324 
00325 /***************************
00326 *** Linear vectorization ***
00327 ***************************/
00328 
00329 
00330 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
00331 // of the non vectorizable beginning and ending parts
00332 
00333 template <bool IsAligned = false>
00334 struct unaligned_dense_assignment_loop
00335 {
00336   // if IsAligned = true, then do nothing
00337   template <typename Kernel>
00338   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
00339 };
00340 
00341 template <>
00342 struct unaligned_dense_assignment_loop<false>
00343 {
00344   // MSVC must not inline this functions. If it does, it fails to optimize the
00345   // packet access path.
00346   // FIXME check which version exhibits this issue
00347 #if EIGEN_COMP_MSVC
00348   template <typename Kernel>
00349   static EIGEN_DONT_INLINE void run(Kernel &kernel,
00350                                     Index start,
00351                                     Index end)
00352 #else
00353   template <typename Kernel>
00354   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
00355                                       Index start,
00356                                       Index end)
00357 #endif
00358   {
00359     for (Index index = start; index < end; ++index)
00360       kernel.assignCoeff(index);
00361   }
00362 };
00363 
00364 template<typename Kernel>
00365 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
00366 {
00367   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00368   {
00369     const Index size = kernel.size();
00370     typedef typename Kernel::Scalar Scalar;
00371     typedef typename Kernel::PacketType PacketType;
00372     enum {
00373       requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
00374       packetSize = unpacket_traits<PacketType>::size,
00375       dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
00376       dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
00377                                                             : int(Kernel::AssignmentTraits::DstAlignment),
00378       srcAlignment = Kernel::AssignmentTraits::JointAlignment
00379     };
00380     const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
00381     const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
00382 
00383     unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
00384 
00385     for(Index index = alignedStart; index < alignedEnd; index += packetSize)
00386       kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
00387 
00388     unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
00389   }
00390 };
00391 
00392 template<typename Kernel>
00393 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
00394 {
00395   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00396   {
00397     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00398     
00399     enum { size = DstXprType::SizeAtCompileTime,
00400            packetSize = packet_traits<typename Kernel::Scalar>::size,
00401            alignedSize = (size/packetSize)*packetSize };
00402 
00403     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
00404     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
00405   }
00406 };
00407 
00408 /**************************
00409 *** Inner vectorization ***
00410 **************************/
00411 
00412 template<typename Kernel>
00413 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
00414 {
00415   typedef typename Kernel::PacketType PacketType;
00416   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00417   {
00418     const Index innerSize = kernel.innerSize();
00419     const Index outerSize = kernel.outerSize();
00420     const Index packetSize = unpacket_traits<PacketType>::size;
00421     for(Index outer = 0; outer < outerSize; ++outer)
00422       for(Index inner = 0; inner < innerSize; inner+=packetSize)
00423         kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
00424   }
00425 };
00426 
00427 template<typename Kernel>
00428 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
00429 {
00430   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00431   {
00432     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00433     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
00434   }
00435 };
00436 
00437 template<typename Kernel>
00438 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
00439 {
00440   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00441   {
00442     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00443     const Index outerSize = kernel.outerSize();
00444     for(Index outer = 0; outer < outerSize; ++outer)
00445       copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
00446   }
00447 };
00448 
00449 /***********************
00450 *** Linear traversal ***
00451 ***********************/
00452 
00453 template<typename Kernel>
00454 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
00455 {
00456   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00457   {
00458     const Index size = kernel.size();
00459     for(Index i = 0; i < size; ++i)
00460       kernel.assignCoeff(i);
00461   }
00462 };
00463 
00464 template<typename Kernel>
00465 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
00466 {
00467   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00468   {
00469     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00470     copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
00471   }
00472 };
00473 
00474 /**************************
00475 *** Slice vectorization ***
00476 ***************************/
00477 
00478 template<typename Kernel>
00479 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
00480 {
00481   EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
00482   {
00483     typedef typename Kernel::Scalar Scalar;
00484     typedef typename Kernel::PacketType PacketType;
00485     enum {
00486       packetSize = unpacket_traits<PacketType>::size,
00487       requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
00488       alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
00489       dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
00490       dstAlignment = alignable ? int(requestedAlignment)
00491                                : int(Kernel::AssignmentTraits::DstAlignment)
00492     };
00493     const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
00494     if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
00495     {
00496       // the pointer is not aligend-on scalar, so alignment is not possible
00497       return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
00498     }
00499     const Index packetAlignedMask = packetSize - 1;
00500     const Index innerSize = kernel.innerSize();
00501     const Index outerSize = kernel.outerSize();
00502     const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
00503     Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
00504 
00505     for(Index outer = 0; outer < outerSize; ++outer)
00506     {
00507       const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
00508       // do the non-vectorizable part of the assignment
00509       for(Index inner = 0; inner<alignedStart ; ++inner)
00510         kernel.assignCoeffByOuterInner(outer, inner);
00511 
00512       // do the vectorizable part of the assignment
00513       for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
00514         kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
00515 
00516       // do the non-vectorizable part of the assignment
00517       for(Index inner = alignedEnd; inner<innerSize ; ++inner)
00518         kernel.assignCoeffByOuterInner(outer, inner);
00519 
00520       alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
00521     }
00522   }
00523 };
00524 
00525 /***************************************************************************
00526 * Part 4 : Generic dense assignment kernel
00527 ***************************************************************************/
00528 
00529 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
00530 // to another dense writable evaluator.
00531 // It is parametrized by the two evaluators, and the actual assignment functor.
00532 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
00533 // One can customize the assignment using this generic dense_assignment_kernel with different
00534 // functors, or by completely overloading it, by-passing a functor.
00535 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
00536 class generic_dense_assignment_kernel
00537 {
00538 protected:
00539   typedef typename DstEvaluatorTypeT::XprType DstXprType;
00540   typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
00541 public:
00542   
00543   typedef DstEvaluatorTypeT DstEvaluatorType;
00544   typedef SrcEvaluatorTypeT SrcEvaluatorType;
00545   typedef typename DstEvaluatorType::Scalar Scalar;
00546   typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
00547   typedef typename AssignmentTraits::PacketType PacketType;
00548   
00549   
00550   EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
00551     : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
00552   {
00553     #ifdef EIGEN_DEBUG_ASSIGN
00554     AssignmentTraits::debug();
00555     #endif
00556   }
00557   
00558   EIGEN_DEVICE_FUNC Index size() const        { return m_dstExpr.size(); }
00559   EIGEN_DEVICE_FUNC Index innerSize() const   { return m_dstExpr.innerSize(); }
00560   EIGEN_DEVICE_FUNC Index outerSize() const   { return m_dstExpr.outerSize(); }
00561   EIGEN_DEVICE_FUNC Index rows() const        { return m_dstExpr.rows(); }
00562   EIGEN_DEVICE_FUNC Index cols() const        { return m_dstExpr.cols(); }
00563   EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
00564   
00565   EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
00566   EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
00567   
00569   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
00570   {
00571     m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
00572   }
00573   
00575   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
00576   {
00577     m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
00578   }
00579   
00581   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
00582   {
00583     Index row = rowIndexByOuterInner(outer, inner); 
00584     Index col = colIndexByOuterInner(outer, inner); 
00585     assignCoeff(row, col);
00586   }
00587   
00588   
00589   template<int StoreMode, int LoadMode, typename PacketType>
00590   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
00591   {
00592     m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
00593   }
00594   
00595   template<int StoreMode, int LoadMode, typename PacketType>
00596   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
00597   {
00598     m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
00599   }
00600   
00601   template<int StoreMode, int LoadMode, typename PacketType>
00602   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
00603   {
00604     Index row = rowIndexByOuterInner(outer, inner); 
00605     Index col = colIndexByOuterInner(outer, inner);
00606     assignPacket<StoreMode,LoadMode,PacketType>(row, col);
00607   }
00608   
00609   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
00610   {
00611     typedef typename DstEvaluatorType::ExpressionTraits Traits;
00612     return int(Traits::RowsAtCompileTime) == 1 ? 0
00613       : int(Traits::ColsAtCompileTime) == 1 ? inner
00614       : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
00615       : inner;
00616   }
00617 
00618   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
00619   {
00620     typedef typename DstEvaluatorType::ExpressionTraits Traits;
00621     return int(Traits::ColsAtCompileTime) == 1 ? 0
00622       : int(Traits::RowsAtCompileTime) == 1 ? inner
00623       : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
00624       : outer;
00625   }
00626   
00627 protected:
00628   DstEvaluatorType& m_dst;
00629   const SrcEvaluatorType& m_src;
00630   const Functor &m_functor;
00631   // TODO find a way to avoid the needs of the original expression
00632   DstXprType& m_dstExpr;
00633 };
00634 
00635 /***************************************************************************
00636 * Part 5 : Entry point for dense rectangular assignment
00637 ***************************************************************************/
00638 
00639 template<typename DstXprType, typename SrcXprType, typename Functor>
00640 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
00641 {
00642   eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
00643   
00644   typedef evaluator<DstXprType> DstEvaluatorType;
00645   typedef evaluator<SrcXprType> SrcEvaluatorType;
00646 
00647   DstEvaluatorType dstEvaluator(dst);
00648   SrcEvaluatorType srcEvaluator(src);
00649     
00650   typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
00651   Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
00652   
00653   dense_assignment_loop<Kernel>::run(kernel);
00654 }
00655 
00656 template<typename DstXprType, typename SrcXprType>
00657 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
00658 {
00659   call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
00660 }
00661 
00662 /***************************************************************************
00663 * Part 6 : Generic assignment
00664 ***************************************************************************/
00665 
00666 // Based on the respective shapes of the destination and source,
00667 // the class AssignmentKind determine the kind of assignment mechanism.
00668 // AssignmentKind must define a Kind typedef.
00669 template<typename DstShape, typename SrcShape> struct AssignmentKind;
00670 
00671 // Assignement kind defined in this file:
00672 struct Dense2Dense {};
00673 struct EigenBase2EigenBase {};
00674 
00675 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
00676 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
00677     
00678 // This is the main assignment class
00679 template< typename DstXprType, typename SrcXprType, typename Functor,
00680           typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
00681           typename Scalar = typename DstXprType::Scalar>
00682 struct Assignment;
00683 
00684 
00685 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
00686 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
00687 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
00688 // does not has to bother about these annoying details.
00689 
00690 template<typename Dst, typename Src>
00691 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00692 void call_assignment(Dst& dst, const Src& src)
00693 {
00694   call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
00695 }
00696 template<typename Dst, typename Src>
00697 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00698 void call_assignment(const Dst& dst, const Src& src)
00699 {
00700   call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
00701 }
00702                      
00703 // Deal with "assume-aliasing"
00704 template<typename Dst, typename Src, typename Func>
00705 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00706 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
00707 {
00708   typename plain_matrix_type<Src>::type tmp(src);
00709   call_assignment_no_alias(dst, tmp, func);
00710 }
00711 
00712 template<typename Dst, typename Src, typename Func>
00713 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00714 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
00715 {
00716   call_assignment_no_alias(dst, src, func);
00717 }
00718 
00719 // by-pass "assume-aliasing"
00720 // When there is no aliasing, we require that 'dst' has been properly resized
00721 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
00722 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00723 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
00724 {
00725   call_assignment_no_alias(dst.expression(), src, func);
00726 }
00727 
00728 
00729 template<typename Dst, typename Src, typename Func>
00730 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00731 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
00732 {
00733   enum {
00734     NeedToTranspose = (    (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
00735                         || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
00736                       ) && int(Dst::SizeAtCompileTime) != 1
00737   };
00738 
00739   Index dstRows = NeedToTranspose ? src.cols() : src.rows();
00740   Index dstCols = NeedToTranspose ? src.rows() : src.cols();
00741   if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
00742     dst.resize(dstRows, dstCols);
00743   
00744   typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
00745   typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
00746   ActualDstType actualDst(dst);
00747   
00748   // TODO check whether this is the right place to perform these checks:
00749   EIGEN_STATIC_ASSERT_LVALUE(Dst)
00750   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
00751   EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
00752   
00753   Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
00754 }
00755 template<typename Dst, typename Src>
00756 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00757 void call_assignment_no_alias(Dst& dst, const Src& src)
00758 {
00759   call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
00760 }
00761 
00762 template<typename Dst, typename Src, typename Func>
00763 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00764 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
00765 {
00766   Index dstRows = src.rows();
00767   Index dstCols = src.cols();
00768   if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
00769     dst.resize(dstRows, dstCols);
00770   
00771   // TODO check whether this is the right place to perform these checks:
00772   EIGEN_STATIC_ASSERT_LVALUE(Dst)
00773   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
00774   
00775   Assignment<Dst,Src,Func>::run(dst, src, func);
00776 }
00777 template<typename Dst, typename Src>
00778 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00779 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
00780 {
00781   call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
00782 }
00783 
00784 // forward declaration
00785 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
00786 
00787 // Generic Dense to Dense assignment
00788 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
00789 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
00790 {
00791   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00792   static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
00793   {
00794     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
00795     
00796 #ifndef EIGEN_NO_DEBUG
00797     internal::check_for_aliasing(dst, src);
00798 #endif
00799     
00800     call_dense_assignment_loop(dst, src, func);
00801   }
00802 };
00803 
00804 // Generic assignment through evalTo.
00805 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
00806 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
00807 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
00808 {
00809   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00810   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
00811   {
00812     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
00813     src.evalTo(dst);
00814   }
00815 };
00816 
00817 } // namespace internal
00818 
00819 } // end namespace Eigen
00820 
00821 #endif // EIGEN_ASSIGN_EVALUATOR_H
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines