MOAB
4.9.3pre
|
Go to the source code of this file.
Definition at line 349 of file GeneralBlockPanelKernel.h.
#define EIGEN_GEBGP_ONESTEP | ( | K | ) |
do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ traits.madd(A0, B_0, C0, B_0); \ traits.madd(A1, B_0, C4, B_0); \ traits.madd(A2, B_0, C8, B_0); \ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \ } while(false)
#define EIGEN_GEBGP_ONESTEP | ( | K | ) |
do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ traits.madd(A0, B_0, C0, T0); \ traits.madd(A1, B_0, C4, B_0); \ traits.madd(A0, B1, C1, T0); \ traits.madd(A1, B1, C5, B1); \ traits.madd(A0, B2, C2, T0); \ traits.madd(A1, B2, C6, B2); \ traits.madd(A0, B3, C3, T0); \ traits.madd(A1, B3, C7, B3); \ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \ } while(false)
#define EIGEN_GEBGP_ONESTEP | ( | K | ) |
do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ traits.madd(A0, B_0, C0, B1); \ traits.madd(A1, B_0, C4, B_0); \ EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \ } while(false)
#define EIGEN_GEBGP_ONESTEP | ( | K | ) |
do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX4"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \ traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \ traits.madd(A0, B_0, C0, B_0); \ traits.madd(A0, B1, C1, B1); \ traits.madd(A0, B2, C2, B2); \ traits.madd(A0, B3, C3, B3); \ EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX4"); \ } while(false)
#define EIGEN_GEBGP_ONESTEP | ( | K | ) |
do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX1"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0); \ traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \ traits.madd(A0, B_0, C0, B_0); \ EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX1"); \ } while(false);
#define EIGEN_GEBP_ONESTEP | ( | K | ) |
do { \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ internal::prefetch(blA+(3*K+16)*LhsProgress); \ if (EIGEN_ARCH_ARM) internal::prefetch(blB+(4*K+16)*RhsProgress); /* Bug 953 */ \ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 0>(B_0, blB); \ traits.madd(A0, B_0, C0, T0); \ traits.madd(A1, B_0, C4, T0); \ traits.madd(A2, B_0, C8, B_0); \ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 1>(B_0, blB); \ traits.madd(A0, B_0, C1, T0); \ traits.madd(A1, B_0, C5, T0); \ traits.madd(A2, B_0, C9, B_0); \ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 2>(B_0, blB); \ traits.madd(A0, B_0, C2, T0); \ traits.madd(A1, B_0, C6, T0); \ traits.madd(A2, B_0, C10, B_0); \ possiblyRotatingKernelHelper.template loadOrRotateRhs<K, 3>(B_0, blB); \ traits.madd(A0, B_0, C3 , T0); \ traits.madd(A1, B_0, C7, T0); \ traits.madd(A2, B_0, C11, B_0); \ EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \ } while(false)