12 #ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H 
   13 #define EIGENRAND_PACKET_RANDOM_ENGINE_H 
   17 #include <type_traits> 
   28             auto test_integral_result_type(
int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value>;
 
   31             auto test_integral_result_type(...)->std::false_type;
 
   34             auto test_intpacket_result_type(
int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
 
   37             auto test_intpacket_result_type(...)->std::false_type;
 
   41         struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
 
   46         struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
 
   50         enum class RandomEngineType
 
   56         struct GetRandomEngineType : std::integral_constant <
 
   58             IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
 
   59             (IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none)
 
   64         template<
typename Ty, 
size_t length, 
size_t alignment = 64>
 
   71                 for (
size_t i = 0; i < length; ++i)
 
   73                     new (&aligned[i]) Ty();
 
   77             AlignedArray(
const AlignedArray& o)
 
   80                 for (
size_t i = 0; i < length; ++i)
 
   86             AlignedArray(AlignedArray&& o)
 
   88                 std::swap(memory, o.memory);
 
   89                 std::swap(aligned, o.aligned);
 
   92             AlignedArray& operator=(
const AlignedArray& o)
 
   94                 for (
size_t i = 0; i < length; ++i)
 
  101             AlignedArray& operator=(AlignedArray&& o)
 
  103                 std::swap(memory, o.memory);
 
  104                 std::swap(aligned, o.aligned);
 
  113             Ty& operator[](
size_t i)
 
  118             const Ty& operator[](
size_t i)
 const 
  133             const Ty* data()
 const 
  141                 memory = std::malloc(
sizeof(Ty) * length + alignment);
 
  142                 aligned = (Ty*)(((
size_t)memory + alignment) & ~(alignment - 1));
 
  149                     for (
size_t i = 0; i < length; ++i)
 
  159             void* memory = 
nullptr;
 
  160             Ty* aligned = 
nullptr;
 
  163 #ifndef EIGEN_DONT_VECTORIZE 
  184         template<
typename Packet,
 
  186             int _Rx, uint64_t _Px,
 
  187             int _Ux, uint64_t _Dx,
 
  188             int _Sx, uint64_t _Bx,
 
  189             int _Tx, uint64_t _Cx,
 
  190             int _Lx, uint64_t _Fx>
 
  194             using result_type = Packet;
 
  196             static constexpr 
int word_size = 64;
 
  197             static constexpr 
int state_size = _Nx;
 
  198             static constexpr 
int shift_size = _Mx;
 
  199             static constexpr 
int mask_bits = _Rx;
 
  200             static constexpr uint64_t parameter_a = _Px;
 
  201             static constexpr 
int output_u = _Ux;
 
  202             static constexpr 
int output_s = _Sx;
 
  203             static constexpr uint64_t output_b = _Bx;
 
  204             static constexpr 
int output_t = _Tx;
 
  205             static constexpr uint64_t output_c = _Cx;
 
  206             static constexpr 
int output_l = _Lx;
 
  208             static constexpr uint64_t default_seed = 5489U;
 
  220                 using namespace Eigen::internal;
 
  221                 std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
 
  222                 for (uint64_t i = 0; i < seeds.size(); ++i)
 
  226                 seed(ploadu<Packet>((
int*)seeds.data()));
 
  246                 using namespace Eigen::internal;
 
  247                 Packet prev = state[0] = x0;
 
  248                 for (
int i = 1; i < _Nx; ++i)
 
  250                     prev = state[i] = pmuluadd64(pxor(prev, psrl64(prev, word_size - 2)), _Fx, i);
 
  287                 else if (2 * _Nx <= stateIdx)
 
  290                 using namespace Eigen::internal;
 
  292                 Packet res = state[stateIdx++];
 
  293                 res = pxor(res, pand(psrl64(res, _Ux), pseti64<Packet>(_Dx)));
 
  294                 res = pxor(res, pand(psll64(res, _Sx), pseti64<Packet>(_Bx)));
 
  295                 res = pxor(res, pand(psll64(res, _Tx), pseti64<Packet>(_Cx)));
 
  296                 res = pxor(res, psrl64(res, _Lx));
 
  307                 for (; 0 < num; --num)
 
  313             typename internal::HalfPacket<Packet>::type half()
 
  320                 typename internal::HalfPacket<Packet>::type a;
 
  321                 internal::split_two(
operator()(), a, cache);
 
  330                 using namespace Eigen::internal;
 
  332                 auto hmask = pseti64<Packet>(_hMask),
 
  333                     lmask = pseti64<Packet>(_lMask),
 
  334                     px = pseti64<Packet>(_Px),
 
  335                     one = pseti64<Packet>(1);
 
  338                 for (i = 0; i < _Nx - _Mx; ++i)
 
  340                     Packet tmp = por(pand(state[i + _Nx], hmask),
 
  341                         pand(state[i + _Nx + 1], lmask));
 
  343                     state[i] = pxor(pxor(
 
  345                         pand(pcmpeq64(pand(tmp, one), one), px)),
 
  350                 for (; i < _Nx - 1; ++i)
 
  352                     Packet tmp = por(pand(state[i + _Nx], hmask),
 
  353                         pand(state[i + _Nx + 1], lmask));
 
  355                     state[i] = pxor(pxor(
 
  357                         pand(pcmpeq64(pand(tmp, one), one), px)),
 
  362                 Packet tmp = por(pand(state[i + _Nx], hmask),
 
  363                     pand(state[0], lmask));
 
  364                 state[i] = pxor(pxor(
 
  366                     pand(pcmpeq64(pand(tmp, one), one), px)),
 
  374                 using namespace Eigen::internal;
 
  376                 auto hmask = pseti64<Packet>(_hMask),
 
  377                     lmask = pseti64<Packet>(_lMask),
 
  378                     px = pseti64<Packet>(_Px),
 
  379                     one = pseti64<Packet>(1);
 
  381                 for (
int i = _Nx; i < 2 * _Nx; ++i)
 
  383                     Packet tmp = por(pand(state[i - _Nx], hmask),
 
  384                         pand(state[i - _Nx + 1], lmask));
 
  386                     state[i] = pxor(pxor(
 
  388                         pand(pcmpeq64(pand(tmp, one), one), px)),
 
  394             AlignedArray<Packet, _Nx * 2> state;
 
  396             typename internal::HalfPacket<Packet>::type cache;
 
  399             static constexpr uint64_t _wMask = (uint64_t)-1;
 
  400             static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
 
  401             static constexpr uint64_t _lMask = ~_hMask & _wMask;
 
  409         template<
typename Packet>
 
  411             0xb5026f5aa96619e9, 29,
 
  412             0x5555555555555555, 17,
 
  413             0x71d67fffeda60000, 37,
 
  414             0xfff7eee000000000, 43, 6364136223846793005>;
 
  417         template<
typename UIntType, 
typename BaseRng, 
int numU64>
 
  418         class ParallelRandomEngineAdaptor
 
  420             static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::none, 
"BaseRng must be a kind of Random Engine.");
 
  422             using result_type = UIntType;
 
  424             ParallelRandomEngineAdaptor(
size_t seed = BaseRng::default_seed)
 
  426                 for (
int i = 0; i < num_parallel; ++i)
 
  429                     new (&rngs[i]) BaseRng{ 
seed + i * u64_stride };
 
  433             ParallelRandomEngineAdaptor(
const BaseRng& o)
 
  435                 for (
int i = 0; i < num_parallel; ++i)
 
  438                     new (&rngs[i]) BaseRng{ o };
 
  442             ParallelRandomEngineAdaptor(
const ParallelRandomEngineAdaptor&) = 
default;
 
  443             ParallelRandomEngineAdaptor(ParallelRandomEngineAdaptor&&) = 
default;
 
  445             static constexpr result_type 
min()
 
  447                 return std::numeric_limits<result_type>::min();
 
  450             static constexpr result_type 
max()
 
  452                 return std::numeric_limits<result_type>::max();
 
  466                 if (fcnt >= fbuf_size)
 
  477                 for (
size_t i = 0; i < num_parallel; ++i)
 
  479                     reinterpret_cast<typename BaseRng::result_type&
>(buf[i * result_type_stride]) = rngs[i]();
 
  483             void refill_fbuffer()
 
  486                 for (
size_t i = 0; i < num_parallel; ++i)
 
  488                     auto urf = internal::bit_to_ur_float(rngs[i]());
 
  489                     reinterpret_cast<decltype(urf)&
>(fbuf[i * u64_stride * 2]) = urf;
 
  493             static constexpr 
int u64_stride = 
sizeof(
typename BaseRng::result_type) / 
sizeof(uint64_t);
 
  494             static constexpr 
int result_type_stride = 
sizeof(
typename BaseRng::result_type) / 
sizeof(result_type);
 
  495             static constexpr 
int num_parallel = numU64 / u64_stride;
 
  496             static constexpr 
int byte_size = 
sizeof(uint64_t) * numU64;
 
  497             static constexpr 
size_t buf_size = byte_size / 
sizeof(result_type);
 
  498             static constexpr 
size_t fbuf_size = byte_size / 
sizeof(float);
 
  500             std::array<BaseRng, num_parallel> rngs;
 
  501             AlignedArray<result_type, buf_size> buf;
 
  502             AlignedArray<float, fbuf_size> fbuf;
 
  503             size_t cnt = buf_size, fcnt = fbuf_size;
 
  512         template<
typename UIntType, 
typename BaseRng>
 
  514             sizeof(
typename BaseRng::result_type) / 
sizeof(uint64_t)>;
 
  516         template<
typename BaseRng>
 
  517         class RandomEngineWrapper : 
public BaseRng
 
  520             using BaseRng::BaseRng;
 
  522             RandomEngineWrapper(
const BaseRng& o) : BaseRng{ o }
 
  526             RandomEngineWrapper(BaseRng&& o) : BaseRng{ o }
 
  530             RandomEngineWrapper(
size_t seed) : BaseRng{ 
seed }
 
  534             RandomEngineWrapper() = 
default;
 
  535             RandomEngineWrapper(
const RandomEngineWrapper&) = 
default;
 
  536             RandomEngineWrapper(RandomEngineWrapper&&) = 
default;
 
  540                 internal::bit_scalar<float> bs;
 
  541                 return bs.to_ur(this->
operator()());
 
  545         template<
typename UIntType, 
typename Rng>
 
  546         using UniversalRandomEngine = 
typename std::conditional<
 
  547             IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
 
  548             PacketRandomEngineAdaptor<UIntType, typename std::remove_reference<Rng>::type>,
 
  549             typename std::conditional<
 
  550             IsScalarRandomEngine<typename std::remove_reference<Rng>::type>::value,
 
  551             RandomEngineWrapper<typename std::remove_reference<Rng>::type>,
 
  564         template<
typename UIntType, 
typename Rng>
 
  567             return { std::forward<Rng>(rng) };
 
  570 #ifdef EIGEN_VECTORIZE_AVX2 
  571         using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
 
  572 #elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2) 
  573         using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;
 
  589         template<
typename UIntType = u
int64_t>
 
  590         using P8_mt19937_64 = ParallelRandomEngineAdaptor<UIntType, Vmt19937_64, 8>;
 
A vectorized version of Mersenne Twister Engine.
Definition: PacketRandomEngine.h:192
 
MersenneTwister(Packet x0)
Construct a new Mersenne Twister engine with a packet seed.
Definition: PacketRandomEngine.h:234
 
uint64_t max() const
maximum value of the result
Definition: PacketRandomEngine.h:270
 
void seed(Packet x0)
initialize the engine with a given seed
Definition: PacketRandomEngine.h:244
 
result_type operator()()
Generates one random packet and advance the internal state.
Definition: PacketRandomEngine.h:283
 
MersenneTwister(uint64_t x0=default_seed)
Construct a new Mersenne Twister engine with a scalar seed.
Definition: PacketRandomEngine.h:218
 
uint64_t min() const
minimum value of the result
Definition: PacketRandomEngine.h:260
 
void discard(unsigned long long num)
Discards num items being generated.
Definition: PacketRandomEngine.h:305
 
ParallelRandomEngineAdaptor< UIntType, Vmt19937_64, 8 > P8_mt19937_64
a vectorized mt19937_64 which generates 8 integers of 64bit simultaneously. It always yields the same...
Definition: PacketRandomEngine.h:590
 
UniversalRandomEngine< UIntType, Rng > makeUniversalRng(Rng &&rng)
Helper function for making a UniversalRandomEngine.
Definition: PacketRandomEngine.h:565
 
std::mt19937_64 Vmt19937_64
same as std::mt19937_64 when EIGEN_DONT_VECTORIZE, Pmt19937_64<internal::Packet4i> when SSE2 enabled ...
Definition: PacketRandomEngine.h:583
 
ParallelRandomEngineAdaptor< UIntType, BaseRng, sizeof(typename BaseRng::result_type)/sizeof(uint64_t)> PacketRandomEngineAdaptor
Scalar adaptor for random engines which generates packet.
Definition: PacketRandomEngine.h:514