Commit 11753b77 authored by Christoph Conrads's avatar Christoph Conrads

Add faster RANLUX8 SWC

parent 81e54445
......@@ -137,7 +137,8 @@ void run(std::uintmax_t num_draws)
Generator::max() == std::numeric_limits<std::uint64_t>::max() ? 8u :
Generator::max() == std::numeric_limits<std::uint32_t>::max() ? 4u :
Generator::max() == (1u<<24) - 1u ? 3u :
Generator::max() == std::numeric_limits<std::uint16_t>::max() ? 2u : 0u
Generator::max() == std::numeric_limits<std::uint16_t>::max() ? 2u :
Generator::max() == std::numeric_limits<std::uint8_t>::max() ? 1u : 0u
;
......
......@@ -352,6 +352,95 @@ struct subtract_with_borrow_engine
};
template<>
struct subtract_with_borrow_engine<std::uint8_t, 8u, 4u, 7u>
{
static constexpr auto w = std::size_t{8};
static constexpr auto r = std::size_t{7};
static constexpr auto s = std::size_t{4};
static constexpr auto long_lag = r;
static constexpr auto short_lag = s;
static constexpr auto word_size = w;
using T = std::uint8_t;
using result_type = T;
static constexpr auto default_seed = std::uint32_t{387853};
static constexpr T max() { return std::numeric_limits<T>::max(); }
static constexpr T min() { return std::numeric_limits<T>::min(); }
explicit subtract_with_borrow_engine(std::uint32_t seed=default_seed)
{
auto gen = xoshiro128plus(seed);
for(auto& x : xs_)
x = gen();
// ensure entering a periodic sequence
discard(r);
}
T operator() ()
{
if(index_ > 0)
{
auto ret = xs_[index_];
index_ = index_ + 1u == r ? 0u : index_ + 1u;
return ret;
}
auto x = std::uint32_t{0};
auto p_x = reinterpret_cast<char*>(xs_ + 0);
auto y = std::uint32_t{0};
auto p_y = reinterpret_cast<char*>(xs_ + r - s);
static_assert(sizeof(x) == s * sizeof(T), "");
std::memcpy(&x, p_x, sizeof(x));
std::memcpy(&y, p_y, sizeof(y));
auto z = std::uint64_t{y} - x - carry_;
std::memcpy(p_x, &z, sizeof(x));
carry_ = -T(z >> 32);
y = std::uint32_t(z) & ((std::uint32_t{1} << 24) - 1u);
xs_[r] = 0;
std::memcpy(&x, p_x + s, sizeof(x));
z = std::uint64_t{y} - x - carry_;
std::memcpy(p_x + s, &z, sizeof(x));
carry_ = -T(z >> 24);
index_ = 1u;
return xs_[0];
}
void discard(unsigned long long n)
{
for(auto i = 0ull; i < n; ++i)
(*this)();
}
std::size_t index_ = 0;
T carry_ = 0;
T xs_[8] = { 0 };
};
// RANLUX subtract-with-borrow(2^w, s, r)
using ranlux16_base =
......@@ -386,6 +475,13 @@ using fast_ranlux32_awc = std::discard_block_engine<ranlux32_awc_base, 71u, 16u>
// RANLUX subtract-with-borrow(2^w, p, q)
using ranlux8_base =
std::subtract_with_carry_engine<std::uint8_t, 8u, 4u, 7u>;
using ranlux8_swc_base =
subtract_with_borrow_engine<std::uint8_t, 8u, 4u, 7u>;
using ranlux16_swb_base =
subtract_with_borrow_engine<std::uint16_t, 16u, 33u, 5u>;
using ranlux32_swb_base =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment