MOAB
4.9.3pre
|
#include <SelfadjointMatrixMatrix.h>
Public Types | |
enum | { PacketSize = packet_traits<Scalar>::size } |
Public Member Functions | |
void | operator() (Scalar *blockB, const Scalar *_rhs, Index rhsStride, Index rows, Index cols, Index k2) |
Definition at line 84 of file SelfadjointMatrixMatrix.h.
anonymous enum |
Definition at line 86 of file SelfadjointMatrixMatrix.h.
void Eigen::internal::symm_pack_rhs< Scalar, Index, nr, StorageOrder >::operator() | ( | Scalar * | blockB, |
const Scalar * | _rhs, | ||
Index | rhsStride, | ||
Index | rows, | ||
Index | cols, | ||
Index | k2 | ||
) | [inline] |
Definition at line 87 of file SelfadjointMatrixMatrix.h.
{ Index end_k = k2 + rows; Index count = 0; const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride); Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0; Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0; // first part: normal case for(Index j2=0; j2<k2; j2+=nr) { for(Index k=k2; k<end_k; k++) { blockB[count+0] = rhs(k,j2+0); blockB[count+1] = rhs(k,j2+1); if (nr>=4) { blockB[count+2] = rhs(k,j2+2); blockB[count+3] = rhs(k,j2+3); } if (nr>=8) { blockB[count+4] = rhs(k,j2+4); blockB[count+5] = rhs(k,j2+5); blockB[count+6] = rhs(k,j2+6); blockB[count+7] = rhs(k,j2+7); } count += nr; } } // second part: diagonal block Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2; if(nr>=8) { for(Index j2=k2; j2<end8; j2+=8) { // again we can split vertically in three different parts (transpose, symmetric, normal) // transpose for(Index k=k2; k<j2; k++) { blockB[count+0] = numext::conj(rhs(j2+0,k)); blockB[count+1] = numext::conj(rhs(j2+1,k)); blockB[count+2] = numext::conj(rhs(j2+2,k)); blockB[count+3] = numext::conj(rhs(j2+3,k)); blockB[count+4] = numext::conj(rhs(j2+4,k)); blockB[count+5] = numext::conj(rhs(j2+5,k)); blockB[count+6] = numext::conj(rhs(j2+6,k)); blockB[count+7] = numext::conj(rhs(j2+7,k)); count += 8; } // symmetric Index h = 0; for(Index k=j2; k<j2+8; k++) { // normal for (Index w=0 ; w<h; ++w) blockB[count+w] = rhs(k,j2+w); blockB[count+h] = numext::real(rhs(k,k)); // transpose for (Index w=h+1 ; w<8; ++w) blockB[count+w] = numext::conj(rhs(j2+w,k)); count += 8; ++h; } // normal for(Index k=j2+8; k<end_k; k++) { blockB[count+0] = rhs(k,j2+0); blockB[count+1] = rhs(k,j2+1); blockB[count+2] = rhs(k,j2+2); blockB[count+3] = rhs(k,j2+3); blockB[count+4] = rhs(k,j2+4); blockB[count+5] = rhs(k,j2+5); blockB[count+6] = rhs(k,j2+6); blockB[count+7] = rhs(k,j2+7); count += 8; } } } if(nr>=4) { for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4) { // again we can split vertically in three different parts (transpose, symmetric, normal) // transpose for(Index k=k2; k<j2; k++) { blockB[count+0] = numext::conj(rhs(j2+0,k)); blockB[count+1] = numext::conj(rhs(j2+1,k)); blockB[count+2] = numext::conj(rhs(j2+2,k)); blockB[count+3] = numext::conj(rhs(j2+3,k)); count += 4; } // symmetric Index h = 0; for(Index k=j2; k<j2+4; k++) { // normal for (Index w=0 ; w<h; ++w) blockB[count+w] = rhs(k,j2+w); blockB[count+h] = numext::real(rhs(k,k)); // transpose for (Index w=h+1 ; w<4; ++w) blockB[count+w] = numext::conj(rhs(j2+w,k)); count += 4; ++h; } // normal for(Index k=j2+4; k<end_k; k++) { blockB[count+0] = rhs(k,j2+0); blockB[count+1] = rhs(k,j2+1); blockB[count+2] = rhs(k,j2+2); blockB[count+3] = rhs(k,j2+3); count += 4; } } } // third part: transposed if(nr>=8) { for(Index j2=k2+rows; j2<packet_cols8; j2+=8) { for(Index k=k2; k<end_k; k++) { blockB[count+0] = numext::conj(rhs(j2+0,k)); blockB[count+1] = numext::conj(rhs(j2+1,k)); blockB[count+2] = numext::conj(rhs(j2+2,k)); blockB[count+3] = numext::conj(rhs(j2+3,k)); blockB[count+4] = numext::conj(rhs(j2+4,k)); blockB[count+5] = numext::conj(rhs(j2+5,k)); blockB[count+6] = numext::conj(rhs(j2+6,k)); blockB[count+7] = numext::conj(rhs(j2+7,k)); count += 8; } } } if(nr>=4) { for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4) { for(Index k=k2; k<end_k; k++) { blockB[count+0] = numext::conj(rhs(j2+0,k)); blockB[count+1] = numext::conj(rhs(j2+1,k)); blockB[count+2] = numext::conj(rhs(j2+2,k)); blockB[count+3] = numext::conj(rhs(j2+3,k)); count += 4; } } } // copy the remaining columns one at a time (=> the same with nr==1) for(Index j2=packet_cols4; j2<cols; ++j2) { // transpose Index half = (std::min)(end_k,j2); for(Index k=k2; k<half; k++) { blockB[count] = numext::conj(rhs(j2,k)); count += 1; } if(half==j2 && half<k2+rows) { blockB[count] = numext::real(rhs(j2,j2)); count += 1; } else half--; // normal for(Index k=half+1; k<k2+rows; k++) { blockB[count] = rhs(k,j2); count += 1; } } }